• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2023 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "nir/nir_builder.h"
25 #include "nir/nir_format_convert.h"
26 
27 #include "vk_buffer.h"
28 #include "vk_command_buffer.h"
29 #include "vk_command_pool.h"
30 #include "vk_device.h"
31 #include "vk_format.h"
32 #include "vk_meta.h"
33 #include "vk_meta_private.h"
34 #include "vk_physical_device.h"
35 #include "vk_pipeline.h"
36 
37 #include "util/format/u_format.h"
38 
39 struct vk_meta_fill_buffer_key {
40    enum vk_meta_object_key_type key_type;
41 };
42 
43 struct vk_meta_copy_buffer_key {
44    enum vk_meta_object_key_type key_type;
45 
46    uint32_t chunk_size;
47 };
48 
49 struct vk_meta_copy_image_view {
50    VkImageViewType type;
51 
52    union {
53       struct {
54          VkFormat format;
55       } color;
56       struct {
57          struct {
58             VkFormat format;
59             nir_component_mask_t component_mask;
60          } depth, stencil;
61       };
62    };
63 };
64 
65 struct vk_meta_copy_buffer_image_key {
66    enum vk_meta_object_key_type key_type;
67 
68    VkPipelineBindPoint bind_point;
69 
70    struct {
71       struct vk_meta_copy_image_view view;
72 
73       VkImageAspectFlagBits aspect;
74    } img;
75 
76    uint32_t wg_size[3];
77 };
78 
79 struct vk_meta_copy_image_key {
80    enum vk_meta_object_key_type key_type;
81 
82    VkPipelineBindPoint bind_point;
83 
84    /* One source per-aspect being copied. */
85    struct {
86       struct vk_meta_copy_image_view view;
87    } src, dst;
88 
89    VkImageAspectFlagBits aspects;
90    VkSampleCountFlagBits samples;
91 
92    uint32_t wg_size[3];
93 };
94 
95 #define load_info(__b, __type, __field_name)                                   \
96    nir_load_push_constant((__b), 1,                                            \
97                           sizeof(((__type *)NULL)->__field_name) * 8,          \
98                           nir_imm_int(b, offsetof(__type, __field_name)))
99 
100 struct vk_meta_fill_buffer_info {
101    uint64_t buf_addr;
102    uint32_t data;
103    uint32_t size;
104 };
105 
106 struct vk_meta_copy_buffer_info {
107    uint64_t src_addr;
108    uint64_t dst_addr;
109    uint32_t size;
110 };
111 
112 struct vk_meta_copy_buffer_image_info {
113    struct {
114       uint64_t addr;
115       uint32_t row_stride;
116       uint32_t image_stride;
117    } buf;
118 
119    struct {
120       struct {
121          uint32_t x, y, z;
122       } offset;
123    } img;
124 
125    /* Workgroup size should be selected based on the image tile size. This
126     * means we can issue threads outside the image area we want to copy
127     * from/to. This field encodes the copy IDs that should be skipped, and
128     * also serve as an adjustment for the buffer/image coordinates. */
129    struct {
130       struct {
131          uint32_t x, y, z;
132       } start, end;
133    } copy_id_range;
134 };
135 
136 struct vk_meta_copy_image_fs_info {
137    struct {
138       int32_t x, y, z;
139    } dst_to_src_offs;
140 };
141 
142 struct vk_meta_copy_image_cs_info {
143    struct {
144       struct {
145          uint32_t x, y, z;
146       } offset;
147    } src_img, dst_img;
148 
149    /* Workgroup size should be selected based on the image tile size. This
150     * means we can issue threads outside the image area we want to copy
151     * from/to. This field encodes the copy IDs that should be skipped, and
152     * also serve as an adjustment for the buffer/image coordinates. */
153    struct {
154       struct {
155          uint32_t x, y, z;
156       } start, end;
157    } copy_id_range;
158 };
159 
160 static VkOffset3D
base_layer_as_offset(VkImageViewType view_type,VkOffset3D offset,uint32_t base_layer)161 base_layer_as_offset(VkImageViewType view_type, VkOffset3D offset,
162                      uint32_t base_layer)
163 {
164    switch (view_type) {
165    case VK_IMAGE_VIEW_TYPE_1D:
166       return (VkOffset3D){
167          .x = offset.x,
168       };
169 
170    case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
171       return (VkOffset3D){
172          .x = offset.x,
173          .y = base_layer,
174       };
175 
176    case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
177    case VK_IMAGE_VIEW_TYPE_CUBE:
178    case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
179       return (VkOffset3D){
180          .x = offset.x,
181          .y = offset.y,
182          .z = base_layer,
183       };
184 
185    case VK_IMAGE_VIEW_TYPE_2D:
186    case VK_IMAGE_VIEW_TYPE_3D:
187       return offset;
188 
189    default:
190       assert(!"Invalid view type");
191       return (VkOffset3D){0};
192    }
193 }
194 
195 static VkExtent3D
layer_count_as_extent(VkImageViewType view_type,VkExtent3D extent,uint32_t layer_count)196 layer_count_as_extent(VkImageViewType view_type, VkExtent3D extent,
197                       uint32_t layer_count)
198 {
199    switch (view_type) {
200    case VK_IMAGE_VIEW_TYPE_1D:
201       return (VkExtent3D){
202          .width = extent.width,
203          .height = 1,
204          .depth = 1,
205       };
206 
207    case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
208       return (VkExtent3D){
209          .width = extent.width,
210          .height = layer_count,
211          .depth = 1,
212       };
213 
214    case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
215    case VK_IMAGE_VIEW_TYPE_CUBE:
216    case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
217       return (VkExtent3D){
218          .width = extent.width,
219          .height = extent.height,
220          .depth = layer_count,
221       };
222 
223    case VK_IMAGE_VIEW_TYPE_2D:
224    case VK_IMAGE_VIEW_TYPE_3D:
225       return extent;
226 
227    default:
228       assert(!"Invalid view type");
229       return (VkExtent3D){0};
230    }
231 }
232 
233 #define COPY_SHADER_BINDING(__binding, __type, __stage)                        \
234    {                                                                           \
235       .binding = __binding,                                                    \
236       .descriptorCount = 1,                                                    \
237       .descriptorType = VK_DESCRIPTOR_TYPE_##__type,                           \
238       .stageFlags = VK_SHADER_STAGE_##__stage##_BIT,                           \
239    }
240 
241 static VkResult
get_copy_pipeline_layout(struct vk_device * device,struct vk_meta_device * meta,const char * key,VkShaderStageFlagBits shader_stage,size_t push_const_size,const struct VkDescriptorSetLayoutBinding * bindings,uint32_t binding_count,VkPipelineLayout * layout_out)242 get_copy_pipeline_layout(struct vk_device *device, struct vk_meta_device *meta,
243                          const char *key, VkShaderStageFlagBits shader_stage,
244                          size_t push_const_size,
245                          const struct VkDescriptorSetLayoutBinding *bindings,
246                          uint32_t binding_count, VkPipelineLayout *layout_out)
247 {
248    const VkDescriptorSetLayoutCreateInfo set_layout = {
249       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
250       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
251       .bindingCount = binding_count,
252       .pBindings = bindings,
253    };
254 
255    const VkPushConstantRange push_range = {
256       .stageFlags = shader_stage,
257       .offset = 0,
258       .size = push_const_size,
259    };
260 
261    return vk_meta_get_pipeline_layout(device, meta, &set_layout, &push_range,
262                                       key, strlen(key) + 1, layout_out);
263 }
264 
265 #define COPY_PUSH_SET_IMG_DESC(__binding, __type, __iview, __layout)           \
266    {                                                                           \
267       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,                         \
268       .dstBinding = __binding,                                                 \
269       .descriptorType = VK_DESCRIPTOR_TYPE_##__type##_IMAGE,                   \
270       .descriptorCount = 1,                                                    \
271       .pImageInfo =  &(VkDescriptorImageInfo){                                 \
272          .imageView = __iview,                                                 \
273          .imageLayout = __layout,                                              \
274       },                                                                       \
275    }
276 
277 static VkFormat
copy_img_view_format_for_aspect(const struct vk_meta_copy_image_view * info,VkImageAspectFlagBits aspect)278 copy_img_view_format_for_aspect(const struct vk_meta_copy_image_view *info,
279                                 VkImageAspectFlagBits aspect)
280 {
281    switch (aspect) {
282    case VK_IMAGE_ASPECT_COLOR_BIT:
283       return info->color.format;
284 
285    case VK_IMAGE_ASPECT_DEPTH_BIT:
286       return info->depth.format;
287 
288    case VK_IMAGE_ASPECT_STENCIL_BIT:
289       return info->stencil.format;
290 
291    default:
292       assert(!"Unsupported aspect");
293       return VK_FORMAT_UNDEFINED;
294    }
295 }
296 
297 static bool
depth_stencil_interleaved(const struct vk_meta_copy_image_view * view)298 depth_stencil_interleaved(const struct vk_meta_copy_image_view *view)
299 {
300    return view->stencil.format != VK_FORMAT_UNDEFINED &&
301           view->depth.format != VK_FORMAT_UNDEFINED &&
302           view->stencil.format == view->depth.format &&
303           view->stencil.component_mask != 0 &&
304           view->depth.component_mask != 0 &&
305           (view->stencil.component_mask & view->depth.component_mask) == 0;
306 }
307 
308 static VkResult
get_gfx_copy_pipeline(struct vk_device * device,struct vk_meta_device * meta,VkPipelineLayout layout,VkSampleCountFlagBits samples,nir_shader * (* build_nir)(const struct vk_meta_device *,const void *),VkImageAspectFlagBits aspects,const struct vk_meta_copy_image_view * view,const void * key_data,size_t key_size,VkPipeline * pipeline_out)309 get_gfx_copy_pipeline(
310    struct vk_device *device, struct vk_meta_device *meta,
311    VkPipelineLayout layout, VkSampleCountFlagBits samples,
312    nir_shader *(*build_nir)(const struct vk_meta_device *, const void *),
313    VkImageAspectFlagBits aspects, const struct vk_meta_copy_image_view *view,
314    const void *key_data, size_t key_size, VkPipeline *pipeline_out)
315 {
316    VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key_data, key_size);
317    if (from_cache != VK_NULL_HANDLE) {
318       *pipeline_out = from_cache;
319       return VK_SUCCESS;
320    }
321 
322    const VkPipelineShaderStageNirCreateInfoMESA fs_nir_info = {
323       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
324       .nir = build_nir(meta, key_data),
325    };
326    const VkPipelineShaderStageCreateInfo fs_info = {
327       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
328       .pNext = &fs_nir_info,
329       .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
330       .pName = "main",
331    };
332 
333    VkPipelineDepthStencilStateCreateInfo ds_info = {
334       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
335    };
336    VkPipelineDynamicStateCreateInfo dyn_info = {
337       .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
338    };
339    struct vk_meta_rendering_info render = {
340       .samples = samples,
341    };
342 
343    const VkGraphicsPipelineCreateInfo info = {
344       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
345       .stageCount = 1,
346       .pStages = &fs_info,
347       .pDepthStencilState = &ds_info,
348       .pDynamicState = &dyn_info,
349       .layout = layout,
350    };
351 
352    if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
353       VkFormat fmt =
354          copy_img_view_format_for_aspect(view, aspects);
355 
356       render.color_attachment_formats[render.color_attachment_count] = fmt;
357       render.color_attachment_write_masks[render.color_attachment_count] =
358          VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
359          VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
360       render.color_attachment_count++;
361    }
362 
363    if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
364       VkFormat fmt =
365          copy_img_view_format_for_aspect(view, VK_IMAGE_ASPECT_DEPTH_BIT);
366 
367       render.color_attachment_formats[render.color_attachment_count] = fmt;
368       render.color_attachment_write_masks[render.color_attachment_count] =
369          (VkColorComponentFlags)view->depth.component_mask;
370       render.color_attachment_count++;
371    }
372 
373    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
374       VkFormat fmt =
375          copy_img_view_format_for_aspect(view, VK_IMAGE_ASPECT_STENCIL_BIT);
376 
377       if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT &&
378           depth_stencil_interleaved(view)) {
379          render.color_attachment_write_masks[0] |= view->stencil.component_mask;
380       } else {
381          render.color_attachment_formats[render.color_attachment_count] = fmt;
382          render.color_attachment_write_masks[render.color_attachment_count] =
383             (VkColorComponentFlags)view->stencil.component_mask;
384          render.color_attachment_count++;
385       }
386    }
387 
388    VkResult result = vk_meta_create_graphics_pipeline(
389       device, meta, &info, &render, key_data, key_size, pipeline_out);
390 
391    ralloc_free(fs_nir_info.nir);
392 
393    return result;
394 }
395 
396 static VkResult
get_compute_copy_pipeline(struct vk_device * device,struct vk_meta_device * meta,VkPipelineLayout layout,nir_shader * (* build_nir)(const struct vk_meta_device *,const void *),const void * key_data,size_t key_size,VkPipeline * pipeline_out)397 get_compute_copy_pipeline(
398    struct vk_device *device, struct vk_meta_device *meta,
399    VkPipelineLayout layout,
400    nir_shader *(*build_nir)(const struct vk_meta_device *, const void *),
401    const void *key_data, size_t key_size, VkPipeline *pipeline_out)
402 {
403    VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key_data, key_size);
404    if (from_cache != VK_NULL_HANDLE) {
405       *pipeline_out = from_cache;
406       return VK_SUCCESS;
407    }
408 
409    const VkPipelineShaderStageNirCreateInfoMESA cs_nir_info = {
410       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
411       .nir = build_nir(meta, key_data),
412    };
413 
414    const VkComputePipelineCreateInfo info = {
415       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
416       .stage = {
417          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
418          .pNext = &cs_nir_info,
419          .stage = VK_SHADER_STAGE_COMPUTE_BIT,
420          .pName = "main",
421       },
422       .layout = layout,
423    };
424 
425    VkResult result = vk_meta_create_compute_pipeline(
426       device, meta, &info, key_data, key_size, pipeline_out);
427 
428    ralloc_free(cs_nir_info.nir);
429 
430    return result;
431 }
432 
433 static VkResult
copy_create_src_image_view(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * img,const struct vk_meta_copy_image_view * view_info,VkImageAspectFlags aspect,const VkImageSubresourceLayers * subres,VkImageView * view_out)434 copy_create_src_image_view(struct vk_command_buffer *cmd,
435                            struct vk_meta_device *meta, struct vk_image *img,
436                            const struct vk_meta_copy_image_view *view_info,
437                            VkImageAspectFlags aspect,
438                            const VkImageSubresourceLayers *subres,
439                            VkImageView *view_out)
440 {
441    const VkImageViewUsageCreateInfo usage = {
442       .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
443       .usage = VK_IMAGE_USAGE_SAMPLED_BIT,
444    };
445 
446    VkFormat format = copy_img_view_format_for_aspect(view_info, aspect);
447 
448    VkImageViewCreateInfo info = {
449       .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
450       .pNext = &usage,
451       .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
452       .image = vk_image_to_handle(img),
453       .viewType = view_info->type,
454       .format = format,
455       .subresourceRange = {
456          .aspectMask = vk_format_aspects(format),
457          .baseMipLevel = subres->mipLevel,
458          .levelCount = 1,
459          .baseArrayLayer = 0,
460          .layerCount = img->array_layers,
461       },
462    };
463 
464    if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
465       nir_component_mask_t comp_mask = aspect == VK_IMAGE_ASPECT_STENCIL_BIT
466                                           ? view_info->stencil.component_mask
467                                           : view_info->depth.component_mask;
468       assert(comp_mask != 0);
469 
470       VkComponentSwizzle *swizzle = &info.components.r;
471       unsigned num_comps = util_bitcount(comp_mask);
472       unsigned first_comp = ffs(comp_mask) - 1;
473 
474       assert(first_comp + num_comps <= 4);
475 
476       for (unsigned i = 0; i < num_comps; i++)
477          swizzle[i] = first_comp + i + VK_COMPONENT_SWIZZLE_R;
478    }
479 
480    return vk_meta_create_image_view(cmd, meta, &info, view_out);
481 }
482 
483 static VkResult
copy_create_dst_image_view(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * img,const struct vk_meta_copy_image_view * view_info,VkImageAspectFlags aspect,const VkOffset3D * offset,const VkExtent3D * extent,const VkImageSubresourceLayers * subres,VkPipelineBindPoint bind_point,VkImageView * view_out)484 copy_create_dst_image_view(struct vk_command_buffer *cmd,
485                            struct vk_meta_device *meta, struct vk_image *img,
486                            const struct vk_meta_copy_image_view *view_info,
487                            VkImageAspectFlags aspect, const VkOffset3D *offset,
488                            const VkExtent3D *extent,
489                            const VkImageSubresourceLayers *subres,
490                            VkPipelineBindPoint bind_point,
491                            VkImageView *view_out)
492 {
493    uint32_t layer_count, base_layer;
494    VkFormat format = copy_img_view_format_for_aspect(view_info, aspect);
495    VkImageAspectFlags fmt_aspects = vk_format_aspects(format);
496    const VkImageViewUsageCreateInfo usage = {
497       .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
498       .usage = bind_point == VK_PIPELINE_BIND_POINT_COMPUTE
499                   ? VK_IMAGE_USAGE_STORAGE_BIT
500                   : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
501    };
502 
503    if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
504       layer_count =
505          MAX2(extent->depth, vk_image_subresource_layer_count(img, subres));
506       base_layer = img->image_type == VK_IMAGE_TYPE_3D ? offset->z
507                                                        : subres->baseArrayLayer;
508    } else {
509       /* Always create a view covering the whole image in case of compute. */
510       layer_count = img->image_type == VK_IMAGE_TYPE_3D ? 1 : img->array_layers;
511       base_layer = 0;
512    }
513 
514    const VkImageViewCreateInfo info = {
515       .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
516       .pNext = &usage,
517       .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
518       .image = vk_image_to_handle(img),
519       .viewType = bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS
520                      ? vk_image_render_view_type(img, layer_count)
521                      : vk_image_storage_view_type(img),
522       .format = format,
523       .subresourceRange = {
524          .aspectMask = fmt_aspects,
525          .baseMipLevel = subres->mipLevel,
526          .levelCount = 1,
527          .baseArrayLayer = base_layer,
528          .layerCount = layer_count,
529       },
530    };
531 
532    return vk_meta_create_image_view(cmd, meta, &info, view_out);
533 }
534 
535 static nir_def *
trim_img_coords(nir_builder * b,VkImageViewType view_type,nir_def * coords)536 trim_img_coords(nir_builder *b, VkImageViewType view_type, nir_def *coords)
537 {
538    switch (view_type) {
539    case VK_IMAGE_VIEW_TYPE_1D:
540       return nir_channel(b, coords, 0);
541 
542    case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
543    case VK_IMAGE_VIEW_TYPE_2D:
544       return nir_trim_vector(b, coords, 2);
545 
546    default:
547       return nir_trim_vector(b, coords, 3);
548    }
549 }
550 
551 static nir_def *
copy_img_buf_addr(nir_builder * b,enum pipe_format pfmt,nir_def * coords)552 copy_img_buf_addr(nir_builder *b, enum pipe_format pfmt, nir_def *coords)
553 {
554    nir_def *buf_row_stride =
555       load_info(b, struct vk_meta_copy_buffer_image_info, buf.row_stride);
556    nir_def *buf_img_stride =
557       load_info(b, struct vk_meta_copy_buffer_image_info, buf.image_stride);
558    nir_def *buf_addr =
559       load_info(b, struct vk_meta_copy_buffer_image_info, buf.addr);
560    nir_def *offset = nir_imul(b, nir_channel(b, coords, 2), buf_img_stride);
561    unsigned blk_sz = util_format_get_blocksize(pfmt);
562 
563    offset = nir_iadd(b, offset,
564                      nir_imul(b, nir_channel(b, coords, 1), buf_row_stride));
565    offset = nir_iadd(b, offset,
566                      nir_imul_imm(b, nir_channel(b, coords, 0), blk_sz));
567 
568    return nir_iadd(b, buf_addr, nir_u2u64(b, offset));
569 }
570 
571 static VkFormat
copy_img_buf_format_for_aspect(const struct vk_meta_copy_image_view * info,VkImageAspectFlagBits aspect)572 copy_img_buf_format_for_aspect(const struct vk_meta_copy_image_view *info,
573                                VkImageAspectFlagBits aspect)
574 {
575    if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
576       enum pipe_format pfmt = vk_format_to_pipe_format(info->depth.format);
577       unsigned num_comps = util_format_get_nr_components(pfmt);
578       unsigned depth_comp_bits = 0;
579 
580       for (unsigned i = 0; i < num_comps; i++) {
581          if (info->depth.component_mask & BITFIELD_BIT(i))
582             depth_comp_bits += util_format_get_component_bits(
583                pfmt, UTIL_FORMAT_COLORSPACE_RGB, i);
584       }
585 
586       switch (depth_comp_bits) {
587       case 16:
588          return VK_FORMAT_R16_UINT;
589       case 24:
590       case 32:
591          return VK_FORMAT_R32_UINT;
592       default:
593          assert(!"Unsupported format");
594          return VK_FORMAT_UNDEFINED;
595       }
596    } else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
597       return VK_FORMAT_R8_UINT;
598    }
599 
600    enum pipe_format pfmt = vk_format_to_pipe_format(info->color.format);
601 
602    switch (util_format_get_blocksize(pfmt)) {
603    case 1:
604       return VK_FORMAT_R8_UINT;
605    case 2:
606       return VK_FORMAT_R16_UINT;
607    case 3:
608       return VK_FORMAT_R8G8B8_UINT;
609    case 4:
610       return VK_FORMAT_R32_UINT;
611    case 6:
612       return VK_FORMAT_R16G16B16_UINT;
613    case 8:
614       return VK_FORMAT_R32G32_UINT;
615    case 12:
616       return VK_FORMAT_R32G32B32_UINT;
617    case 16:
618       return VK_FORMAT_R32G32B32A32_UINT;
619    default:
620       assert(!"Unsupported format");
621       return VK_FORMAT_UNDEFINED;
622    }
623 }
624 
625 static nir_def *
convert_texel(nir_builder * b,VkFormat src_fmt,VkFormat dst_fmt,nir_def * texel)626 convert_texel(nir_builder *b, VkFormat src_fmt, VkFormat dst_fmt,
627               nir_def *texel)
628 {
629    enum pipe_format src_pfmt = vk_format_to_pipe_format(src_fmt);
630    enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_fmt);
631 
632    if (src_pfmt == dst_pfmt)
633       return texel;
634 
635    unsigned src_blksz = util_format_get_blocksize(src_pfmt);
636    unsigned dst_blksz = util_format_get_blocksize(dst_pfmt);
637 
638    nir_def *packed = nir_format_pack_rgba(b, src_pfmt, texel);
639 
640    /* Needed for depth/stencil copies where the source/dest formats might
641     * have a different size. */
642    if (src_blksz < dst_blksz)
643       packed = nir_pad_vector_imm_int(b, packed, 0, 4);
644 
645    nir_def *unpacked = nir_format_unpack_rgba(b, packed, dst_pfmt);
646 
647    return unpacked;
648 }
649 
650 static nir_def *
place_ds_texel(nir_builder * b,VkFormat fmt,nir_component_mask_t comp_mask,nir_def * texel)651 place_ds_texel(nir_builder *b, VkFormat fmt, nir_component_mask_t comp_mask,
652                nir_def *texel)
653 {
654    assert(comp_mask != 0);
655 
656    enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
657    unsigned num_comps = util_format_get_nr_components(pfmt);
658 
659    if (comp_mask == nir_component_mask(num_comps))
660       return texel;
661 
662    assert(num_comps <= 4);
663 
664    nir_def *comps[4];
665    unsigned c = 0;
666 
667    for (unsigned i = 0; i < num_comps; i++) {
668       if (comp_mask & BITFIELD_BIT(i))
669          comps[i] = nir_channel(b, texel, c++);
670       else
671          comps[i] = nir_imm_intN_t(b, 0, texel->bit_size);
672    }
673 
674    return nir_vec(b, comps, num_comps);
675 }
676 
677 static nir_deref_instr *
tex_deref(nir_builder * b,const struct vk_meta_copy_image_view * view,VkImageAspectFlags aspect,VkSampleCountFlagBits samples,unsigned binding)678 tex_deref(nir_builder *b, const struct vk_meta_copy_image_view *view,
679           VkImageAspectFlags aspect, VkSampleCountFlagBits samples,
680           unsigned binding)
681 {
682    VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
683    bool is_array = vk_image_view_type_is_array(view->type);
684    enum glsl_sampler_dim sampler_dim =
685       samples != VK_SAMPLE_COUNT_1_BIT
686          ? GLSL_SAMPLER_DIM_MS
687          : vk_image_view_type_to_sampler_dim(view->type);
688    enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
689    enum glsl_base_type base_type =
690       util_format_is_pure_sint(pfmt)   ? GLSL_TYPE_INT
691       : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
692                                        : GLSL_TYPE_FLOAT;
693    const char *tex_name;
694    switch (aspect) {
695    case VK_IMAGE_ASPECT_COLOR_BIT:
696       tex_name = "color_tex";
697       break;
698    case VK_IMAGE_ASPECT_DEPTH_BIT:
699       tex_name = "depth_tex";
700       break;
701    case VK_IMAGE_ASPECT_STENCIL_BIT:
702       tex_name = "stencil_tex";
703       break;
704    default:
705       assert(!"Unsupported aspect");
706       return NULL;
707    }
708 
709    const struct glsl_type *texture_type =
710       glsl_sampler_type(sampler_dim, false, is_array, base_type);
711    nir_variable *texture =
712       nir_variable_create(b->shader, nir_var_uniform, texture_type, tex_name);
713    texture->data.descriptor_set = 0;
714    texture->data.binding = binding;
715 
716    return nir_build_deref_var(b, texture);
717 }
718 
719 static nir_deref_instr *
img_deref(nir_builder * b,const struct vk_meta_copy_image_view * view,VkImageAspectFlags aspect,VkSampleCountFlagBits samples,unsigned binding)720 img_deref(nir_builder *b, const struct vk_meta_copy_image_view *view,
721           VkImageAspectFlags aspect, VkSampleCountFlagBits samples,
722           unsigned binding)
723 {
724    VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
725    bool is_array = vk_image_view_type_is_array(view->type);
726    enum glsl_sampler_dim sampler_dim =
727       samples != VK_SAMPLE_COUNT_1_BIT
728          ? GLSL_SAMPLER_DIM_MS
729          : vk_image_view_type_to_sampler_dim(view->type);
730    enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
731    enum glsl_base_type base_type =
732       util_format_is_pure_sint(pfmt)   ? GLSL_TYPE_INT
733       : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
734                                        : GLSL_TYPE_FLOAT;
735    const char *img_name;
736    switch (aspect) {
737    case VK_IMAGE_ASPECT_COLOR_BIT:
738       img_name = "color_img";
739       break;
740    case VK_IMAGE_ASPECT_DEPTH_BIT:
741       img_name = "depth_img";
742       break;
743    case VK_IMAGE_ASPECT_STENCIL_BIT:
744       img_name = "stencil_img";
745       break;
746    default:
747       assert(!"Unsupported aspect");
748       return NULL;
749    }
750    const struct glsl_type *image_type =
751       glsl_image_type(sampler_dim, is_array, base_type);
752    nir_variable *image_var =
753       nir_variable_create(b->shader, nir_var_uniform, image_type, img_name);
754    image_var->data.descriptor_set = 0;
755    image_var->data.binding = binding;
756 
757    return nir_build_deref_var(b, image_var);
758 }
759 
760 static nir_def *
read_texel(nir_builder * b,nir_deref_instr * tex_deref,nir_def * coords,nir_def * sample_id)761 read_texel(nir_builder *b, nir_deref_instr *tex_deref, nir_def *coords,
762            nir_def *sample_id)
763 {
764    return sample_id ? nir_txf_ms_deref(b, tex_deref, coords, sample_id)
765                     : nir_txf_deref(b, tex_deref, coords, NULL);
766 }
767 
768 static nir_variable *
frag_var(nir_builder * b,const struct vk_meta_copy_image_view * view,VkImageAspectFlags aspect,uint32_t rt)769 frag_var(nir_builder *b, const struct vk_meta_copy_image_view *view,
770          VkImageAspectFlags aspect, uint32_t rt)
771 {
772    VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
773    enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
774    enum glsl_base_type base_type =
775       util_format_is_pure_sint(pfmt)   ? GLSL_TYPE_INT
776       : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
777                                        : GLSL_TYPE_FLOAT;
778    const struct glsl_type *var_type = glsl_vector_type(base_type, 4);
779    static const char *var_names[] = {
780       "gl_FragData[0]",
781       "gl_FragData[1]",
782    };
783 
784    assert(rt < ARRAY_SIZE(var_names));
785 
786    nir_variable *var = nir_variable_create(b->shader, nir_var_shader_out,
787                                            var_type, var_names[rt]);
788    var->data.location = FRAG_RESULT_DATA0 + rt;
789 
790    return var;
791 }
792 
793 static void
write_frag(nir_builder * b,const struct vk_meta_copy_image_view * view,VkImageAspectFlags aspect,nir_variable * frag_var,nir_def * frag_val)794 write_frag(nir_builder *b, const struct vk_meta_copy_image_view *view,
795            VkImageAspectFlags aspect, nir_variable *frag_var, nir_def *frag_val)
796 {
797    nir_component_mask_t comp_mask;
798 
799    if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
800       VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
801 
802       comp_mask = aspect == VK_IMAGE_ASPECT_DEPTH_BIT
803                      ? view->depth.component_mask
804                      : view->stencil.component_mask;
805       frag_val = place_ds_texel(b, fmt, comp_mask, frag_val);
806    } else {
807       comp_mask = nir_component_mask(4);
808    }
809 
810    if (frag_val->bit_size != 32) {
811       switch (glsl_get_base_type(frag_var->type)) {
812       case GLSL_TYPE_INT:
813          frag_val = nir_i2i32(b, frag_val);
814          break;
815       case GLSL_TYPE_UINT:
816          frag_val = nir_u2u32(b, frag_val);
817          break;
818       case GLSL_TYPE_FLOAT:
819          frag_val = nir_f2f32(b, frag_val);
820          break;
821       default:
822          assert(!"Invalid type");
823          frag_val = NULL;
824          break;
825       }
826    }
827 
828    frag_val = nir_pad_vector_imm_int(b, frag_val, 0, 4);
829 
830    nir_store_var(b, frag_var, frag_val, comp_mask);
831 }
832 
833 static void
write_img(nir_builder * b,const struct vk_meta_copy_image_view * view,VkImageAspectFlags aspect,VkSampleCountFlagBits samples,nir_deref_instr * img_deref,nir_def * coords,nir_def * sample_id,nir_def * val)834 write_img(nir_builder *b, const struct vk_meta_copy_image_view *view,
835           VkImageAspectFlags aspect, VkSampleCountFlagBits samples,
836           nir_deref_instr *img_deref, nir_def *coords, nir_def *sample_id,
837           nir_def *val)
838 {
839    VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
840    enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
841    enum glsl_base_type base_type =
842       util_format_is_pure_sint(pfmt)   ? GLSL_TYPE_INT
843       : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
844                                        : GLSL_TYPE_FLOAT;
845    enum glsl_sampler_dim sampler_dim =
846       samples != VK_SAMPLE_COUNT_1_BIT
847          ? GLSL_SAMPLER_DIM_MS
848          : vk_image_view_type_to_sampler_dim(view->type);
849    bool is_array = vk_image_view_type_is_array(view->type);
850 
851    if (!sample_id) {
852       assert(samples == VK_SAMPLE_COUNT_1_BIT);
853       sample_id = nir_imm_int(b, 0);
854    }
855 
856    unsigned access_flags = ACCESS_NON_READABLE;
857    nir_def *zero_lod = nir_imm_int(b, 0);
858 
859    if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
860       nir_component_mask_t comp_mask = aspect == VK_IMAGE_ASPECT_DEPTH_BIT
861                                           ? view->depth.component_mask
862                                           : view->stencil.component_mask;
863       unsigned num_comps = util_format_get_nr_components(pfmt);
864 
865       val = place_ds_texel(b, fmt, comp_mask, val);
866 
867       if (comp_mask != nir_component_mask(num_comps)) {
868          nir_def *comps[4];
869          access_flags = 0;
870 
871          nir_def *old_val = nir_image_deref_load(b,
872             val->num_components, val->bit_size, &img_deref->def, coords,
873             sample_id, zero_lod, .image_dim = sampler_dim,
874             .image_array = is_array, .format = pfmt, .access = access_flags,
875             .dest_type = nir_get_nir_type_for_glsl_base_type(base_type));
876 
877          for (unsigned i = 0; i < val->num_components; i++) {
878             if (comp_mask & BITFIELD_BIT(i))
879                comps[i] = nir_channel(b, val, i);
880             else
881                comps[i] = nir_channel(b, old_val, i);
882          }
883 
884          val = nir_vec(b, comps, val->num_components);
885       }
886    }
887 
888    nir_image_deref_store(b,
889        &img_deref->def, coords, sample_id, val, zero_lod,
890       .image_dim = sampler_dim, .image_array = is_array, .format = pfmt,
891       .access = access_flags,
892       .src_type = nir_get_nir_type_for_glsl_base_type(base_type));
893 }
894 
895 static nir_shader *
build_image_to_buffer_shader(const struct vk_meta_device * meta,const void * key_data)896 build_image_to_buffer_shader(const struct vk_meta_device *meta,
897                              const void *key_data)
898 {
899    const struct vk_meta_copy_buffer_image_key *key = key_data;
900 
901    assert(key->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
902 
903    nir_builder builder = nir_builder_init_simple_shader(
904       MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-image-to-buffer");
905    nir_builder *b = &builder;
906 
907    b->shader->info.workgroup_size[0] = key->wg_size[0];
908    b->shader->info.workgroup_size[1] = key->wg_size[1];
909    b->shader->info.workgroup_size[2] = key->wg_size[2];
910 
911    VkFormat buf_fmt =
912       copy_img_buf_format_for_aspect(&key->img.view, key->img.aspect);
913    enum pipe_format buf_pfmt = vk_format_to_pipe_format(buf_fmt);
914 
915    nir_def *copy_id = nir_load_global_invocation_id(b, 32);
916    nir_def *copy_id_start =
917       nir_vec3(b,
918                load_info(b, struct vk_meta_copy_buffer_image_info,
919                          copy_id_range.start.x),
920                load_info(b, struct vk_meta_copy_buffer_image_info,
921                          copy_id_range.start.y),
922                load_info(b, struct vk_meta_copy_buffer_image_info,
923                          copy_id_range.start.z));
924    nir_def *copy_id_end = nir_vec3(b,
925       load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.x),
926       load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.y),
927       load_info(b, struct vk_meta_copy_buffer_image_info,
928                 copy_id_range.end.z));
929 
930    nir_def *in_bounds =
931       nir_iand(b, nir_ball(b, nir_uge(b, copy_id, copy_id_start)),
932                nir_ball(b, nir_ult(b, copy_id, copy_id_end)));
933 
934    nir_push_if(b, in_bounds);
935 
936    copy_id = nir_isub(b, copy_id, copy_id_start);
937 
938    nir_def *img_offs = nir_vec3(b,
939       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.x),
940       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.y),
941       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.z));
942 
943    nir_def *img_coords =
944       trim_img_coords(b, key->img.view.type, nir_iadd(b, copy_id, img_offs));
945 
946    VkFormat iview_fmt =
947       copy_img_view_format_for_aspect(&key->img.view, key->img.aspect);
948    nir_deref_instr *tex =
949       tex_deref(b, &key->img.view, key->img.aspect, VK_SAMPLE_COUNT_1_BIT, 0);
950    nir_def *texel = read_texel(b, tex, img_coords, NULL);
951 
952    texel = convert_texel(b, iview_fmt, buf_fmt, texel);
953 
954    unsigned blk_sz = util_format_get_blocksize(buf_pfmt);
955    unsigned comp_count = util_format_get_nr_components(buf_pfmt);
956    assert(blk_sz % comp_count == 0);
957    unsigned comp_sz = (blk_sz / comp_count) * 8;
958 
959    /* nir_format_unpack() (which is called in convert_texel()) always
960     * returns a 32-bit result, which we might have to downsize to match
961     * the component size we want, hence the u2uN().
962     */
963    texel = nir_u2uN(b, texel, comp_sz);
964 
965    /* nir_format_unpack_rgba() (which is called from convert_texel()) returns
966     * a vec4, which means we might have more components than we need, but
967     * that's fine because we pass a write_mask to store_global.
968     */
969    assert(texel->num_components >= comp_count);
970    nir_store_global(b, copy_img_buf_addr(b, buf_pfmt, copy_id),
971                     comp_sz / 8, texel, nir_component_mask(comp_count));
972 
973    nir_pop_if(b, NULL);
974 
975    return b->shader;
976 }
977 
978 static VkResult
get_copy_image_to_buffer_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_image_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)979 get_copy_image_to_buffer_pipeline(
980    struct vk_device *device, struct vk_meta_device *meta,
981    const struct vk_meta_copy_buffer_image_key *key,
982    VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
983 {
984    const VkDescriptorSetLayoutBinding bindings[] = {
985       COPY_SHADER_BINDING(0, SAMPLED_IMAGE, COMPUTE),
986    };
987 
988    VkResult result = get_copy_pipeline_layout(
989       device, meta, "vk-meta-copy-image-to-buffer-pipeline-layout",
990       VK_SHADER_STAGE_COMPUTE_BIT,
991       sizeof(struct vk_meta_copy_buffer_image_info), bindings,
992       ARRAY_SIZE(bindings), layout_out);
993 
994    if (unlikely(result != VK_SUCCESS))
995       return result;
996 
997    return get_compute_copy_pipeline(device, meta, *layout_out,
998                                     build_image_to_buffer_shader, key,
999                                     sizeof(*key), pipeline_out);
1000 }
1001 
1002 static nir_shader *
build_buffer_to_image_fs(const struct vk_meta_device * meta,const void * key_data)1003 build_buffer_to_image_fs(const struct vk_meta_device *meta,
1004                          const void *key_data)
1005 {
1006    const struct vk_meta_copy_buffer_image_key *key = key_data;
1007 
1008    assert(key->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
1009 
1010    nir_builder builder = nir_builder_init_simple_shader(
1011       MESA_SHADER_FRAGMENT, NULL, "vk-meta-copy-buffer-to-image-frag");
1012    nir_builder *b = &builder;
1013 
1014    VkFormat buf_fmt =
1015       copy_img_buf_format_for_aspect(&key->img.view, key->img.aspect);
1016 
1017    enum pipe_format buf_pfmt = vk_format_to_pipe_format(buf_fmt);
1018    nir_def *out_coord_xy = nir_f2u32(b, nir_load_frag_coord(b));
1019    nir_def *out_layer = nir_load_layer_id(b);
1020 
1021    nir_def *img_offs = nir_vec3(b,
1022       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.x),
1023       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.y),
1024       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.z));
1025 
1026    /* Move the layer ID to the second coordinate if we're dealing with a 1D
1027     * array, as this is where the texture instruction expects it. */
1028    nir_def *coords = key->img.view.type == VK_IMAGE_VIEW_TYPE_1D_ARRAY
1029                         ? nir_vec3(b, nir_channel(b, out_coord_xy, 0),
1030                                    out_layer, nir_imm_int(b, 0))
1031                         : nir_vec3(b, nir_channel(b, out_coord_xy, 0),
1032                                    nir_channel(b, out_coord_xy, 1), out_layer);
1033 
1034    unsigned blk_sz = util_format_get_blocksize(buf_pfmt);
1035    unsigned comp_count = util_format_get_nr_components(buf_pfmt);
1036    assert(blk_sz % comp_count == 0);
1037    unsigned comp_sz = (blk_sz / comp_count) * 8;
1038 
1039    coords = nir_isub(b, coords, img_offs);
1040 
1041    nir_def *texel = nir_build_load_global(b,
1042       comp_count, comp_sz, copy_img_buf_addr(b, buf_pfmt, coords),
1043       .align_mul = 1 << (ffs(blk_sz) - 1));
1044 
1045    /* We don't do compressed formats. The driver should select a non-compressed
1046     * format with the same block size. */
1047    assert(!util_format_is_compressed(buf_pfmt));
1048 
1049    VkFormat iview_fmt =
1050       copy_img_view_format_for_aspect(&key->img.view, key->img.aspect);
1051    nir_variable *out_var = frag_var(b, &key->img.view, key->img.aspect, 0);
1052 
1053    texel = convert_texel(b, buf_fmt, iview_fmt, texel);
1054    write_frag(b, &key->img.view, key->img.aspect, out_var, texel);
1055    return b->shader;
1056 }
1057 
1058 static VkResult
get_copy_buffer_to_image_gfx_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_image_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)1059 get_copy_buffer_to_image_gfx_pipeline(
1060    struct vk_device *device, struct vk_meta_device *meta,
1061    const struct vk_meta_copy_buffer_image_key *key,
1062    VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
1063 {
1064    VkResult result = get_copy_pipeline_layout(
1065       device, meta, "vk-meta-copy-buffer-to-image-gfx-pipeline-layout",
1066       VK_SHADER_STAGE_FRAGMENT_BIT,
1067       sizeof(struct vk_meta_copy_buffer_image_info), NULL, 0, layout_out);
1068 
1069    if (unlikely(result != VK_SUCCESS))
1070       return result;
1071 
1072    return get_gfx_copy_pipeline(device, meta, *layout_out,
1073                                 VK_SAMPLE_COUNT_1_BIT, build_buffer_to_image_fs,
1074                                 key->img.aspect, &key->img.view, key,
1075                                 sizeof(*key), pipeline_out);
1076 }
1077 
1078 static nir_shader *
build_buffer_to_image_cs(const struct vk_meta_device * meta,const void * key_data)1079 build_buffer_to_image_cs(const struct vk_meta_device *meta,
1080                          const void *key_data)
1081 {
1082    const struct vk_meta_copy_buffer_image_key *key = key_data;
1083 
1084    assert(key->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
1085 
1086    nir_builder builder = nir_builder_init_simple_shader(
1087       MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-buffer-to-image-compute");
1088    nir_builder *b = &builder;
1089 
1090    b->shader->info.workgroup_size[0] = key->wg_size[0];
1091    b->shader->info.workgroup_size[1] = key->wg_size[1];
1092    b->shader->info.workgroup_size[2] = key->wg_size[2];
1093 
1094    VkFormat buf_fmt =
1095       copy_img_buf_format_for_aspect(&key->img.view, key->img.aspect);
1096    VkFormat img_fmt =
1097       copy_img_view_format_for_aspect(&key->img.view, key->img.aspect);
1098    enum pipe_format buf_pfmt = vk_format_to_pipe_format(buf_fmt);
1099    nir_deref_instr *image_deref =
1100       img_deref(b, &key->img.view, key->img.aspect, VK_SAMPLE_COUNT_1_BIT, 0);
1101 
1102    nir_def *copy_id = nir_load_global_invocation_id(b, 32);
1103    nir_def *copy_id_start =
1104       nir_vec3(b,
1105                load_info(b, struct vk_meta_copy_buffer_image_info,
1106                          copy_id_range.start.x),
1107                load_info(b, struct vk_meta_copy_buffer_image_info,
1108                          copy_id_range.start.y),
1109                load_info(b, struct vk_meta_copy_buffer_image_info,
1110                          copy_id_range.start.z));
1111    nir_def *copy_id_end = nir_vec3(b,
1112       load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.x),
1113       load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.y),
1114       load_info(b, struct vk_meta_copy_buffer_image_info,
1115                 copy_id_range.end.z));
1116 
1117    nir_def *in_bounds =
1118       nir_iand(b, nir_ball(b, nir_uge(b, copy_id, copy_id_start)),
1119                nir_ball(b, nir_ult(b, copy_id, copy_id_end)));
1120 
1121    nir_push_if(b, in_bounds);
1122 
1123    /* Adjust the copy ID such that we can directly deduce the image coords and
1124     * buffer offset from it. */
1125    copy_id = nir_isub(b, copy_id, copy_id_start);
1126 
1127    nir_def *img_offs = nir_vec3(b,
1128       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.x),
1129       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.y),
1130       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.z));
1131 
1132    nir_def *img_coords =
1133       trim_img_coords(b, key->img.view.type, nir_iadd(b, copy_id, img_offs));
1134 
1135    img_coords = nir_pad_vector_imm_int(b, img_coords, 0, 4);
1136 
1137    unsigned blk_sz = util_format_get_blocksize(buf_pfmt);
1138    unsigned bit_sz = blk_sz & 1 ? 8 : blk_sz & 2 ? 16 : 32;
1139    unsigned comp_count = blk_sz * 8 / bit_sz;
1140 
1141    nir_def *texel = nir_build_load_global(b,
1142          comp_count, bit_sz, copy_img_buf_addr(b, buf_pfmt, copy_id),
1143          .align_mul = 1 << (ffs(blk_sz) - 1));
1144 
1145    texel = convert_texel(b, buf_fmt, img_fmt, texel);
1146 
1147    /* If the image view format matches buf_fmt, convert_texel() does nothing,
1148     * but we still need to promote the texel to a 32-bit unsigned integer,
1149     * because write_img() wants a 32-bit value.
1150     */
1151    if (texel->bit_size < 32)
1152       texel = nir_u2u32(b, texel);
1153 
1154    write_img(b, &key->img.view, key->img.aspect, VK_SAMPLE_COUNT_1_BIT,
1155              image_deref, img_coords, NULL, texel);
1156 
1157    nir_pop_if(b, NULL);
1158 
1159    return b->shader;
1160 }
1161 
1162 static VkResult
get_copy_buffer_to_image_compute_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_image_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)1163 get_copy_buffer_to_image_compute_pipeline(
1164    struct vk_device *device, struct vk_meta_device *meta,
1165    const struct vk_meta_copy_buffer_image_key *key,
1166    VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
1167 {
1168    const VkDescriptorSetLayoutBinding bindings[] = {
1169       COPY_SHADER_BINDING(0, STORAGE_IMAGE, COMPUTE),
1170    };
1171 
1172    VkResult result = get_copy_pipeline_layout(
1173       device, meta, "vk-meta-copy-buffer-to-image-compute-pipeline-layout",
1174       VK_SHADER_STAGE_COMPUTE_BIT,
1175       sizeof(struct vk_meta_copy_buffer_image_info), bindings,
1176       ARRAY_SIZE(bindings), layout_out);
1177 
1178    if (unlikely(result != VK_SUCCESS))
1179       return result;
1180 
1181    return get_compute_copy_pipeline(device, meta, *layout_out,
1182                                     build_buffer_to_image_cs, key, sizeof(*key),
1183                                     pipeline_out);
1184 }
1185 
1186 static VkResult
copy_buffer_image_prepare_gfx_push_const(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_image_key * key,VkPipelineLayout pipeline_layout,VkBuffer buffer,const struct vk_image_buffer_layout * buf_layout,struct vk_image * img,const VkBufferImageCopy2 * region)1187 copy_buffer_image_prepare_gfx_push_const(
1188    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1189    const struct vk_meta_copy_buffer_image_key *key,
1190    VkPipelineLayout pipeline_layout, VkBuffer buffer,
1191    const struct vk_image_buffer_layout *buf_layout, struct vk_image *img,
1192    const VkBufferImageCopy2 *region)
1193 {
1194    struct vk_device *dev = cmd->base.device;
1195    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1196 
1197    /* vk_meta_copy_buffer_image_info::image_stride is 32-bit for now.
1198     * We might want to make it a 64-bit integer (and patch the shader code
1199     * accordingly) if that becomes a limiting factor for vk_meta_copy users.
1200     */
1201    assert(buf_layout->image_stride_B <= UINT32_MAX);
1202 
1203    struct vk_meta_copy_buffer_image_info info = {
1204       .buf = {
1205          .row_stride = buf_layout->row_stride_B,
1206          .image_stride = buf_layout->image_stride_B,
1207          .addr = vk_meta_buffer_address(dev, buffer, region->bufferOffset,
1208                                         VK_WHOLE_SIZE),
1209       },
1210       .img.offset = {
1211          .x = region->imageOffset.x,
1212          .y = region->imageOffset.y,
1213          .z = region->imageOffset.z,
1214       },
1215    };
1216 
1217    disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
1218                           VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(info), &info);
1219    return VK_SUCCESS;
1220 }
1221 
1222 static VkResult
copy_buffer_image_prepare_compute_push_const(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_image_key * key,VkPipelineLayout pipeline_layout,VkBuffer buffer,const struct vk_image_buffer_layout * buf_layout,struct vk_image * img,const VkBufferImageCopy2 * region,uint32_t * wg_count)1223 copy_buffer_image_prepare_compute_push_const(
1224    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1225    const struct vk_meta_copy_buffer_image_key *key,
1226    VkPipelineLayout pipeline_layout, VkBuffer buffer,
1227    const struct vk_image_buffer_layout *buf_layout, struct vk_image *img,
1228    const VkBufferImageCopy2 *region, uint32_t *wg_count)
1229 {
1230    struct vk_device *dev = cmd->base.device;
1231    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1232    VkImageViewType img_view_type = key->img.view.type;
1233    VkOffset3D img_offs =
1234       base_layer_as_offset(img_view_type, region->imageOffset,
1235                            region->imageSubresource.baseArrayLayer);
1236    uint32_t layer_count =
1237       vk_image_subresource_layer_count(img, &region->imageSubresource);
1238    VkExtent3D img_extent =
1239       layer_count_as_extent(img_view_type, region->imageExtent, layer_count);
1240 
1241    struct vk_meta_copy_buffer_image_info info = {
1242       .buf = {
1243          .row_stride = buf_layout->row_stride_B,
1244          .image_stride = buf_layout->image_stride_B,
1245          .addr = vk_meta_buffer_address(dev, buffer, region->bufferOffset,
1246                                         VK_WHOLE_SIZE),
1247       },
1248       .img.offset = {
1249          .x = img_offs.x,
1250          .y = img_offs.y,
1251          .z = img_offs.z,
1252       },
1253    };
1254 
1255    info.copy_id_range.start.x = img_offs.x % key->wg_size[0];
1256    info.copy_id_range.start.y = img_offs.y % key->wg_size[1];
1257    info.copy_id_range.start.z = img_offs.z % key->wg_size[2];
1258    info.copy_id_range.end.x = info.copy_id_range.start.x + img_extent.width;
1259    info.copy_id_range.end.y = info.copy_id_range.start.y + img_extent.height;
1260    info.copy_id_range.end.z = info.copy_id_range.start.z + img_extent.depth;
1261    wg_count[0] = DIV_ROUND_UP(info.copy_id_range.end.x, key->wg_size[0]);
1262    wg_count[1] = DIV_ROUND_UP(info.copy_id_range.end.y, key->wg_size[1]);
1263    wg_count[2] = DIV_ROUND_UP(info.copy_id_range.end.z, key->wg_size[2]);
1264 
1265    disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
1266                           VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(info), &info);
1267    return VK_SUCCESS;
1268 }
1269 
1270 static bool
format_is_supported(VkFormat fmt)1271 format_is_supported(VkFormat fmt)
1272 {
1273    enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
1274    const struct util_format_description *fdesc = util_format_description(pfmt);
1275 
1276    /* We only support RGB formats in the copy path to keep things simple. */
1277    return fdesc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
1278           fdesc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB;
1279 }
1280 
1281 static struct vk_meta_copy_image_view
img_copy_view_info(VkImageViewType view_type,VkImageAspectFlags aspects,const struct vk_image * img,const struct vk_meta_copy_image_properties * img_props)1282 img_copy_view_info(VkImageViewType view_type, VkImageAspectFlags aspects,
1283                    const struct vk_image *img,
1284                    const struct vk_meta_copy_image_properties *img_props)
1285 {
1286    struct vk_meta_copy_image_view view = {
1287       .type = view_type,
1288    };
1289 
1290    /* We only support color/depth/stencil aspects. */
1291    assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT |
1292                      VK_IMAGE_ASPECT_STENCIL_BIT));
1293 
1294    if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1295       /* Color aspect can't be combined with other aspects. */
1296       assert(!(aspects & ~VK_IMAGE_ASPECT_COLOR_BIT));
1297       view.color.format = img_props->color.view_format;
1298       assert(format_is_supported(view.color.format));
1299       return view;
1300    }
1301 
1302 
1303    view.depth.format = img_props->depth.view_format;
1304    view.depth.component_mask = img_props->depth.component_mask;
1305    view.stencil.format = img_props->stencil.view_format;
1306    view.stencil.component_mask = img_props->stencil.component_mask;
1307 
1308    assert(view.depth.format == VK_FORMAT_UNDEFINED ||
1309           format_is_supported(view.depth.format));
1310    assert(view.stencil.format == VK_FORMAT_UNDEFINED ||
1311           format_is_supported(view.stencil.format));
1312    return view;
1313 }
1314 
1315 static void
copy_image_to_buffer_region(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * img,VkImageLayout img_layout,const struct vk_meta_copy_image_properties * img_props,VkBuffer buffer,const struct vk_image_buffer_layout * buf_layout,const VkBufferImageCopy2 * region)1316 copy_image_to_buffer_region(
1317    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1318    struct vk_image *img, VkImageLayout img_layout,
1319    const struct vk_meta_copy_image_properties *img_props, VkBuffer buffer,
1320    const struct vk_image_buffer_layout *buf_layout,
1321    const VkBufferImageCopy2 *region)
1322 {
1323    struct vk_device *dev = cmd->base.device;
1324    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1325    struct vk_meta_copy_buffer_image_key key = {
1326       .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
1327       .bind_point = VK_PIPELINE_BIND_POINT_COMPUTE,
1328       .img = {
1329          .view = img_copy_view_info(vk_image_sampled_view_type(img),
1330                                     region->imageSubresource.aspectMask, img,
1331                                     img_props),
1332          .aspect = region->imageSubresource.aspectMask,
1333       },
1334       .wg_size = {
1335          img_props->tile_size.width,
1336          img_props->tile_size.height,
1337          img_props->tile_size.depth,
1338       },
1339    };
1340 
1341    VkPipelineLayout pipeline_layout;
1342    VkPipeline pipeline;
1343    VkResult result = get_copy_image_to_buffer_pipeline(
1344       dev, meta, &key, &pipeline_layout, &pipeline);
1345    if (unlikely(result != VK_SUCCESS)) {
1346       vk_command_buffer_set_error(cmd, result);
1347       return;
1348    }
1349 
1350    disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1351                          VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1352 
1353    VkImageView iview;
1354    result = copy_create_src_image_view(cmd, meta, img, &key.img.view,
1355                                        region->imageSubresource.aspectMask,
1356                                        &region->imageSubresource, &iview);
1357 
1358    if (unlikely(result != VK_SUCCESS)) {
1359       vk_command_buffer_set_error(cmd, result);
1360       return;
1361    }
1362 
1363    const VkWriteDescriptorSet descs[] = {
1364       COPY_PUSH_SET_IMG_DESC(0, SAMPLED, iview, img_layout),
1365    };
1366 
1367    disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1368                                  VK_PIPELINE_BIND_POINT_COMPUTE,
1369                                  pipeline_layout, 0, ARRAY_SIZE(descs), descs);
1370 
1371    uint32_t wg_count[3] = {0};
1372 
1373    result = copy_buffer_image_prepare_compute_push_const(
1374       cmd, meta, &key, pipeline_layout, buffer, buf_layout, img, region,
1375       wg_count);
1376    if (unlikely(result != VK_SUCCESS)) {
1377       vk_command_buffer_set_error(cmd, result);
1378       return;
1379    }
1380 
1381    disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count[0], wg_count[1],
1382                      wg_count[2]);
1383 }
1384 
1385 void
vk_meta_copy_image_to_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyImageToBufferInfo2 * info,const struct vk_meta_copy_image_properties * img_props)1386 vk_meta_copy_image_to_buffer(
1387    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1388    const VkCopyImageToBufferInfo2 *info,
1389    const struct vk_meta_copy_image_properties *img_props)
1390 {
1391    VK_FROM_HANDLE(vk_image, img, info->srcImage);
1392 
1393    for (uint32_t i = 0; i < info->regionCount; i++) {
1394       VkBufferImageCopy2 region = info->pRegions[i];
1395       struct vk_image_buffer_layout buf_layout =
1396          vk_image_buffer_copy_layout(img, &region);
1397 
1398       region.imageExtent = vk_image_extent_to_elements(img, region.imageExtent);
1399       region.imageOffset = vk_image_offset_to_elements(img, region.imageOffset);
1400 
1401       copy_image_to_buffer_region(cmd, meta, img, info->srcImageLayout,
1402                                   img_props, info->dstBuffer, &buf_layout,
1403                                   &region);
1404    }
1405 }
1406 
1407 static void
copy_draw(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * dst_img,VkImageLayout dst_img_layout,const VkImageSubresourceLayers * dst_img_subres,const VkOffset3D * dst_img_offset,const VkExtent3D * copy_extent,const struct vk_meta_copy_image_view * view_info)1408 copy_draw(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1409           struct vk_image *dst_img, VkImageLayout dst_img_layout,
1410           const VkImageSubresourceLayers *dst_img_subres,
1411           const VkOffset3D *dst_img_offset, const VkExtent3D *copy_extent,
1412           const struct vk_meta_copy_image_view *view_info)
1413 {
1414    struct vk_device *dev = cmd->base.device;
1415    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1416    uint32_t depth_or_layer_count =
1417       MAX2(copy_extent->depth,
1418            vk_image_subresource_layer_count(dst_img, dst_img_subres));
1419    struct vk_meta_rect rect = {
1420       .x0 = dst_img_offset->x,
1421       .x1 = dst_img_offset->x + copy_extent->width,
1422       .y0 = dst_img_offset->y,
1423       .y1 = dst_img_offset->y + copy_extent->height,
1424    };
1425    VkRenderingAttachmentInfo vk_atts[2];
1426    VkRenderingInfo vk_render = {
1427       .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
1428       .renderArea = {
1429          .offset = {
1430             dst_img_offset->x,
1431             dst_img_offset->y,
1432          },
1433          .extent = {
1434             copy_extent->width,
1435             copy_extent->height,
1436          },
1437       },
1438       .layerCount = depth_or_layer_count,
1439       .pColorAttachments = vk_atts,
1440    };
1441    VkImageView iview = VK_NULL_HANDLE;
1442 
1443    u_foreach_bit(a, dst_img_subres->aspectMask) {
1444       VkImageAspectFlagBits aspect = 1 << a;
1445 
1446       if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && iview != VK_NULL_HANDLE &&
1447           depth_stencil_interleaved(view_info))
1448          continue;
1449 
1450       VkResult result = copy_create_dst_image_view(
1451          cmd, meta, dst_img, view_info, aspect, dst_img_offset, copy_extent,
1452          dst_img_subres, VK_PIPELINE_BIND_POINT_GRAPHICS, &iview);
1453       if (unlikely(result != VK_SUCCESS)) {
1454          vk_command_buffer_set_error(cmd, result);
1455          return;
1456       }
1457 
1458       vk_atts[vk_render.colorAttachmentCount] = (VkRenderingAttachmentInfo){
1459          .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
1460          .imageView = iview,
1461          .imageLayout = dst_img_layout,
1462          .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1463          .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
1464       };
1465 
1466       /* If we have interleaved depth/stencil and only one aspect is copied, we
1467        * need to load the attachment to preserve the other component. */
1468       if (vk_format_has_depth(dst_img->format) &&
1469           vk_format_has_stencil(dst_img->format) &&
1470           depth_stencil_interleaved(view_info) &&
1471           (dst_img_subres->aspectMask !=
1472            (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
1473          vk_atts[vk_render.colorAttachmentCount].loadOp =
1474             VK_ATTACHMENT_LOAD_OP_LOAD;
1475       }
1476 
1477       vk_render.colorAttachmentCount++;
1478    }
1479 
1480    disp->CmdBeginRendering(vk_command_buffer_to_handle(cmd), &vk_render);
1481    meta->cmd_draw_volume(cmd, meta, &rect, vk_render.layerCount);
1482    disp->CmdEndRendering(vk_command_buffer_to_handle(cmd));
1483 }
1484 
1485 static void
copy_buffer_to_image_region_gfx(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * img,VkImageLayout img_layout,const struct vk_meta_copy_image_properties * img_props,VkBuffer buffer,const struct vk_image_buffer_layout * buf_layout,const VkBufferImageCopy2 * region)1486 copy_buffer_to_image_region_gfx(
1487    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1488    struct vk_image *img, VkImageLayout img_layout,
1489    const struct vk_meta_copy_image_properties *img_props, VkBuffer buffer,
1490    const struct vk_image_buffer_layout *buf_layout,
1491    const VkBufferImageCopy2 *region)
1492 {
1493    struct vk_device *dev = cmd->base.device;
1494    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1495 
1496    /* We only special-case 1D_ARRAY to move the layer ID to the second
1497     * component instead of the third. For all other view types, let's pick an
1498     * invalid VkImageViewType value so we don't end up creating the same
1499     * pipeline multiple times. */
1500    VkImageViewType view_type =
1501       img->image_type == VK_IMAGE_TYPE_1D && img->array_layers > 1
1502          ? VK_IMAGE_VIEW_TYPE_1D_ARRAY
1503          : (VkImageViewType)-1;
1504 
1505    struct vk_meta_copy_buffer_image_key key = {
1506       .key_type = VK_META_OBJECT_KEY_COPY_BUFFER_TO_IMAGE_PIPELINE,
1507       .bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS,
1508       .img = {
1509          .view = img_copy_view_info(view_type,
1510                                     region->imageSubresource.aspectMask, img,
1511                                     img_props),
1512          .aspect = region->imageSubresource.aspectMask,
1513       },
1514    };
1515 
1516    VkPipelineLayout pipeline_layout;
1517    VkPipeline pipeline;
1518    VkResult result = get_copy_buffer_to_image_gfx_pipeline(
1519       dev, meta, &key, &pipeline_layout, &pipeline);
1520    if (unlikely(result != VK_SUCCESS)) {
1521       vk_command_buffer_set_error(cmd, result);
1522       return;
1523    }
1524 
1525    disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1526                          VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
1527 
1528    result = copy_buffer_image_prepare_gfx_push_const(
1529       cmd, meta, &key, pipeline_layout, buffer, buf_layout, img, region);
1530    if (unlikely(result != VK_SUCCESS)) {
1531       vk_command_buffer_set_error(cmd, result);
1532       return;
1533    }
1534 
1535    copy_draw(cmd, meta, img, img_layout, &region->imageSubresource,
1536              &region->imageOffset, &region->imageExtent, &key.img.view);
1537 }
1538 
1539 static void
copy_buffer_to_image_region_compute(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * img,VkImageLayout img_layout,const struct vk_meta_copy_image_properties * img_props,VkBuffer buffer,const struct vk_image_buffer_layout * buf_layout,const VkBufferImageCopy2 * region)1540 copy_buffer_to_image_region_compute(
1541    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1542    struct vk_image *img, VkImageLayout img_layout,
1543    const struct vk_meta_copy_image_properties *img_props, VkBuffer buffer,
1544    const struct vk_image_buffer_layout *buf_layout,
1545    const VkBufferImageCopy2 *region)
1546 {
1547    struct vk_device *dev = cmd->base.device;
1548    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1549    VkImageViewType view_type = vk_image_storage_view_type(img);
1550    struct vk_meta_copy_buffer_image_key key = {
1551       .key_type = VK_META_OBJECT_KEY_COPY_BUFFER_TO_IMAGE_PIPELINE,
1552       .bind_point = VK_PIPELINE_BIND_POINT_COMPUTE,
1553       .img = {
1554          .view = img_copy_view_info(view_type,
1555                                     region->imageSubresource.aspectMask, img,
1556                                     img_props),
1557          .aspect = region->imageSubresource.aspectMask,
1558       },
1559       .wg_size = {
1560          img_props->tile_size.width,
1561          img_props->tile_size.height,
1562          img_props->tile_size.depth,
1563       },
1564    };
1565 
1566    VkPipelineLayout pipeline_layout;
1567    VkPipeline pipeline;
1568    VkResult result = get_copy_buffer_to_image_compute_pipeline(
1569       dev, meta, &key, &pipeline_layout, &pipeline);
1570    if (unlikely(result != VK_SUCCESS)) {
1571       vk_command_buffer_set_error(cmd, result);
1572       return;
1573    }
1574 
1575    disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1576                          VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1577 
1578    VkImageView iview;
1579    result = copy_create_dst_image_view(
1580       cmd, meta, img, &key.img.view, region->imageSubresource.aspectMask,
1581       &region->imageOffset, &region->imageExtent, &region->imageSubresource,
1582       VK_PIPELINE_BIND_POINT_COMPUTE, &iview);
1583 
1584    if (unlikely(result != VK_SUCCESS)) {
1585       vk_command_buffer_set_error(cmd, result);
1586       return;
1587    }
1588 
1589    const VkWriteDescriptorSet descs[] = {
1590       COPY_PUSH_SET_IMG_DESC(0, STORAGE, iview, img_layout),
1591    };
1592 
1593    disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1594                                  VK_PIPELINE_BIND_POINT_COMPUTE,
1595                                  pipeline_layout, 0, ARRAY_SIZE(descs), descs);
1596 
1597    uint32_t wg_count[3] = {0};
1598 
1599    result = copy_buffer_image_prepare_compute_push_const(
1600       cmd, meta, &key, pipeline_layout, buffer, buf_layout, img, region,
1601       wg_count);
1602    if (unlikely(result != VK_SUCCESS)) {
1603       vk_command_buffer_set_error(cmd, result);
1604       return;
1605    }
1606 
1607    disp->CmdDispatch(vk_command_buffer_to_handle(cmd),
1608                      wg_count[0], wg_count[1], wg_count[2]);
1609 }
1610 
1611 void
vk_meta_copy_buffer_to_image(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyBufferToImageInfo2 * info,const struct vk_meta_copy_image_properties * img_props,VkPipelineBindPoint bind_point)1612 vk_meta_copy_buffer_to_image(
1613    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1614    const VkCopyBufferToImageInfo2 *info,
1615    const struct vk_meta_copy_image_properties *img_props,
1616    VkPipelineBindPoint bind_point)
1617 {
1618    VK_FROM_HANDLE(vk_image, img, info->dstImage);
1619 
1620    for (uint32_t i = 0; i < info->regionCount; i++) {
1621       VkBufferImageCopy2 region = info->pRegions[i];
1622       struct vk_image_buffer_layout buf_layout =
1623          vk_image_buffer_copy_layout(img, &region);
1624 
1625       region.imageExtent = vk_image_extent_to_elements(img, region.imageExtent);
1626       region.imageOffset = vk_image_offset_to_elements(img, region.imageOffset);
1627 
1628       if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
1629          copy_buffer_to_image_region_gfx(cmd, meta, img, info->dstImageLayout,
1630                                          img_props, info->srcBuffer,
1631                                          &buf_layout, &region);
1632       } else {
1633          copy_buffer_to_image_region_compute(cmd, meta, img,
1634                                              info->dstImageLayout, img_props,
1635                                              info->srcBuffer, &buf_layout,
1636                                              &region);
1637       }
1638    }
1639 }
1640 
1641 static nir_shader *
build_copy_image_fs(const struct vk_meta_device * meta,const void * key_data)1642 build_copy_image_fs(const struct vk_meta_device *meta, const void *key_data)
1643 {
1644    const struct vk_meta_copy_image_key *key = key_data;
1645 
1646    assert(key->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
1647 
1648    nir_builder builder = nir_builder_init_simple_shader(
1649       MESA_SHADER_FRAGMENT, NULL, "vk-meta-copy-image-frag");
1650    nir_builder *b = &builder;
1651 
1652    b->shader->info.fs.uses_sample_shading =
1653       key->samples != VK_SAMPLE_COUNT_1_BIT;
1654 
1655    nir_def *out_coord_xy = nir_f2u32(b, nir_load_frag_coord(b));
1656    nir_def *out_layer = nir_load_layer_id(b);
1657 
1658    nir_def *src_offset = nir_vec3(b,
1659       load_info(b, struct vk_meta_copy_image_fs_info, dst_to_src_offs.x),
1660       load_info(b, struct vk_meta_copy_image_fs_info, dst_to_src_offs.y),
1661       load_info(b, struct vk_meta_copy_image_fs_info, dst_to_src_offs.z));
1662 
1663    /* Move the layer ID to the second coordinate if we're dealing with a 1D
1664     * array, as this is where the texture instruction expects it. */
1665    nir_def *src_coords =
1666       key->dst.view.type == VK_IMAGE_VIEW_TYPE_1D_ARRAY
1667          ? nir_vec3(b, nir_channel(b, out_coord_xy, 0), out_layer,
1668                     nir_imm_int(b, 0))
1669          : nir_vec3(b, nir_channel(b, out_coord_xy, 0),
1670                     nir_channel(b, out_coord_xy, 1), out_layer);
1671 
1672    src_coords = trim_img_coords(b, key->src.view.type,
1673                                 nir_iadd(b, src_coords, src_offset));
1674 
1675    nir_def *sample_id =
1676       key->samples != VK_SAMPLE_COUNT_1_BIT ? nir_load_sample_id(b) : NULL;
1677    nir_variable *color_var = NULL;
1678    uint32_t tex_binding = 0;
1679 
1680    u_foreach_bit(a, key->aspects) {
1681       VkImageAspectFlagBits aspect = 1 << a;
1682       VkFormat src_fmt =
1683          copy_img_view_format_for_aspect(&key->src.view, aspect);
1684       VkFormat dst_fmt =
1685          copy_img_view_format_for_aspect(&key->dst.view, aspect);
1686       nir_deref_instr *tex =
1687          tex_deref(b, &key->src.view, aspect, key->samples, tex_binding++);
1688       nir_def *texel = read_texel(b, tex, src_coords, sample_id);
1689 
1690       if (!color_var || !depth_stencil_interleaved(&key->dst.view)) {
1691          color_var =
1692             frag_var(b, &key->dst.view, aspect, color_var != NULL ? 1 : 0);
1693       }
1694 
1695       texel = convert_texel(b, src_fmt, dst_fmt, texel);
1696       write_frag(b, &key->dst.view, aspect, color_var, texel);
1697    }
1698 
1699    return b->shader;
1700 }
1701 
1702 static VkResult
get_copy_image_gfx_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)1703 get_copy_image_gfx_pipeline(struct vk_device *device,
1704                             struct vk_meta_device *meta,
1705                             const struct vk_meta_copy_image_key *key,
1706                             VkPipelineLayout *layout_out,
1707                             VkPipeline *pipeline_out)
1708 {
1709    const struct VkDescriptorSetLayoutBinding bindings[] = {
1710       COPY_SHADER_BINDING(0, SAMPLED_IMAGE, FRAGMENT),
1711       COPY_SHADER_BINDING(1, SAMPLED_IMAGE, FRAGMENT),
1712    };
1713 
1714    VkResult result = get_copy_pipeline_layout(
1715       device, meta, "vk-meta-copy-image-gfx-pipeline-layout",
1716       VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(struct vk_meta_copy_image_fs_info),
1717       bindings, ARRAY_SIZE(bindings), layout_out);
1718    if (unlikely(result != VK_SUCCESS))
1719       return result;
1720 
1721    return get_gfx_copy_pipeline(
1722       device, meta, *layout_out, key->samples, build_copy_image_fs,
1723       key->aspects, &key->dst.view, key, sizeof(*key), pipeline_out);
1724 }
1725 
1726 static nir_shader *
build_copy_image_cs(const struct vk_meta_device * meta,const void * key_data)1727 build_copy_image_cs(const struct vk_meta_device *meta, const void *key_data)
1728 {
1729    const struct vk_meta_copy_image_key *key = key_data;
1730 
1731    assert(key->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
1732 
1733    nir_builder builder = nir_builder_init_simple_shader(
1734       MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-image-compute");
1735    nir_builder *b = &builder;
1736 
1737    b->shader->info.workgroup_size[0] = key->wg_size[0];
1738    b->shader->info.workgroup_size[1] = key->wg_size[1];
1739    b->shader->info.workgroup_size[2] = key->wg_size[2];
1740 
1741    nir_def *copy_id = nir_load_global_invocation_id(b, 32);
1742    nir_def *copy_id_start = nir_vec3(b,
1743       load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.start.x),
1744       load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.start.y),
1745       load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.start.z));
1746    nir_def *copy_id_end = nir_vec3(b,
1747       load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.end.x),
1748       load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.end.y),
1749       load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.end.z));
1750 
1751    nir_def *in_bounds =
1752       nir_iand(b, nir_ball(b, nir_uge(b, copy_id, copy_id_start)),
1753                nir_ball(b, nir_ult(b, copy_id, copy_id_end)));
1754 
1755    nir_push_if(b, in_bounds);
1756 
1757    nir_def *src_offset = nir_vec3(b,
1758       load_info(b, struct vk_meta_copy_image_cs_info, src_img.offset.x),
1759       load_info(b, struct vk_meta_copy_image_cs_info, src_img.offset.y),
1760       load_info(b, struct vk_meta_copy_image_cs_info, src_img.offset.z));
1761    nir_def *dst_offset = nir_vec3(b,
1762       load_info(b, struct vk_meta_copy_image_cs_info, dst_img.offset.x),
1763       load_info(b, struct vk_meta_copy_image_cs_info, dst_img.offset.y),
1764       load_info(b, struct vk_meta_copy_image_cs_info, dst_img.offset.z));
1765 
1766    nir_def *src_coords = trim_img_coords(b, key->src.view.type,
1767                                          nir_iadd(b, copy_id, src_offset));
1768    nir_def *dst_coords = trim_img_coords(b, key->dst.view.type,
1769                                          nir_iadd(b, copy_id, dst_offset));
1770 
1771    dst_coords = nir_pad_vector_imm_int(b, dst_coords, 0, 4);
1772 
1773    uint32_t binding = 0;
1774    u_foreach_bit(a, key->aspects) {
1775       VkImageAspectFlagBits aspect = 1 << a;
1776       VkFormat src_fmt =
1777          copy_img_view_format_for_aspect(&key->src.view, aspect);
1778       VkFormat dst_fmt =
1779          copy_img_view_format_for_aspect(&key->dst.view, aspect);
1780       nir_deref_instr *tex =
1781          tex_deref(b, &key->src.view, aspect, key->samples, binding);
1782       nir_deref_instr *img =
1783          img_deref(b, &key->dst.view, aspect, key->samples, binding + 1);
1784 
1785       for (uint32_t s = 0; s < key->samples; s++) {
1786          nir_def *sample_id =
1787             key->samples == VK_SAMPLE_COUNT_1_BIT ? NULL : nir_imm_int(b, s);
1788          nir_def *texel = read_texel(b, tex, src_coords, sample_id);
1789 
1790          texel = convert_texel(b, src_fmt, dst_fmt, texel);
1791          write_img(b, &key->dst.view, aspect, key->samples, img, dst_coords,
1792                    sample_id, texel);
1793       }
1794 
1795       binding += 2;
1796    }
1797 
1798    nir_pop_if(b, NULL);
1799 
1800    return b->shader;
1801 }
1802 
1803 static VkResult
get_copy_image_compute_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)1804 get_copy_image_compute_pipeline(struct vk_device *device,
1805                                 struct vk_meta_device *meta,
1806                                 const struct vk_meta_copy_image_key *key,
1807                                 VkPipelineLayout *layout_out,
1808                                 VkPipeline *pipeline_out)
1809 {
1810    const VkDescriptorSetLayoutBinding bindings[] = {
1811       COPY_SHADER_BINDING(0, SAMPLED_IMAGE, COMPUTE),
1812       COPY_SHADER_BINDING(1, STORAGE_IMAGE, COMPUTE),
1813       COPY_SHADER_BINDING(2, SAMPLED_IMAGE, COMPUTE),
1814       COPY_SHADER_BINDING(3, STORAGE_IMAGE, COMPUTE),
1815    };
1816 
1817    VkResult result = get_copy_pipeline_layout(
1818       device, meta, "vk-meta-copy-image-compute-pipeline-layout",
1819       VK_SHADER_STAGE_COMPUTE_BIT, sizeof(struct vk_meta_copy_image_cs_info),
1820       bindings, ARRAY_SIZE(bindings), layout_out);
1821 
1822    if (unlikely(result != VK_SUCCESS))
1823       return result;
1824 
1825    return get_compute_copy_pipeline(device, meta, *layout_out,
1826                                     build_copy_image_cs, key, sizeof(*key),
1827                                     pipeline_out);
1828 }
1829 
1830 static VkResult
copy_image_prepare_gfx_desc_set(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout pipeline_layout,struct vk_image * src_img,VkImageLayout src_img_layout,struct vk_image * dst_img,VkImageLayout dst_img_layout,const VkImageCopy2 * region)1831 copy_image_prepare_gfx_desc_set(
1832    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1833    const struct vk_meta_copy_image_key *key, VkPipelineLayout pipeline_layout,
1834    struct vk_image *src_img, VkImageLayout src_img_layout,
1835    struct vk_image *dst_img, VkImageLayout dst_img_layout,
1836    const VkImageCopy2 *region)
1837 {
1838    struct vk_device *dev = cmd->base.device;
1839    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1840    VkImageAspectFlags aspects = key->aspects;
1841    VkImageView iviews[] = {
1842       VK_NULL_HANDLE,
1843       VK_NULL_HANDLE,
1844    };
1845    uint32_t desc_count = 0;
1846 
1847    u_foreach_bit(a, aspects) {
1848       assert(desc_count < ARRAY_SIZE(iviews));
1849 
1850       VkResult result = copy_create_src_image_view(
1851          cmd, meta, src_img, &key->src.view, 1 << a, &region->srcSubresource,
1852          &iviews[desc_count++]);
1853       if (unlikely(result != VK_SUCCESS))
1854          return result;
1855    }
1856 
1857    VkWriteDescriptorSet descs[2] = {
1858       COPY_PUSH_SET_IMG_DESC(0, SAMPLED, iviews[0], src_img_layout),
1859       COPY_PUSH_SET_IMG_DESC(1, SAMPLED, iviews[1], src_img_layout),
1860    };
1861 
1862    disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1863                                  VK_PIPELINE_BIND_POINT_GRAPHICS,
1864                                  pipeline_layout, 0, desc_count, descs);
1865    return VK_SUCCESS;
1866 }
1867 
1868 static VkResult
copy_image_prepare_compute_desc_set(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout pipeline_layout,struct vk_image * src_img,VkImageLayout src_img_layout,struct vk_image * dst_img,VkImageLayout dst_img_layout,const VkImageCopy2 * region)1869 copy_image_prepare_compute_desc_set(
1870    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1871    const struct vk_meta_copy_image_key *key, VkPipelineLayout pipeline_layout,
1872    struct vk_image *src_img, VkImageLayout src_img_layout,
1873    struct vk_image *dst_img, VkImageLayout dst_img_layout,
1874    const VkImageCopy2 *region)
1875 {
1876    struct vk_device *dev = cmd->base.device;
1877    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1878    VkImageAspectFlags aspects = key->aspects;
1879    VkImageView iviews[] = {
1880       VK_NULL_HANDLE,
1881       VK_NULL_HANDLE,
1882       VK_NULL_HANDLE,
1883       VK_NULL_HANDLE,
1884    };
1885    unsigned desc_count = 0;
1886 
1887    u_foreach_bit(a, aspects) {
1888       VkImageAspectFlagBits aspect = 1 << a;
1889 
1890       assert(desc_count + 2 <= ARRAY_SIZE(iviews));
1891 
1892       VkResult result = copy_create_src_image_view(
1893          cmd, meta, src_img, &key->src.view, aspect, &region->srcSubresource,
1894          &iviews[desc_count++]);
1895       if (unlikely(result != VK_SUCCESS))
1896          return result;
1897 
1898       result = copy_create_dst_image_view(
1899          cmd, meta, dst_img, &key->dst.view, aspect, &region->dstOffset,
1900          &region->extent, &region->dstSubresource,
1901          VK_PIPELINE_BIND_POINT_COMPUTE, &iviews[desc_count++]);
1902       if (unlikely(result != VK_SUCCESS))
1903          return result;
1904    }
1905 
1906    VkWriteDescriptorSet descs[] = {
1907       COPY_PUSH_SET_IMG_DESC(0, SAMPLED, iviews[0], src_img_layout),
1908       COPY_PUSH_SET_IMG_DESC(1, STORAGE, iviews[1], dst_img_layout),
1909       COPY_PUSH_SET_IMG_DESC(2, SAMPLED, iviews[2], src_img_layout),
1910       COPY_PUSH_SET_IMG_DESC(3, STORAGE, iviews[3], dst_img_layout),
1911    };
1912 
1913    disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1914                                  VK_PIPELINE_BIND_POINT_COMPUTE,
1915                                  pipeline_layout, 0, desc_count, descs);
1916    return VK_SUCCESS;
1917 }
1918 
1919 enum vk_meta_copy_image_align_policy {
1920    VK_META_COPY_IMAGE_ALIGN_ON_SRC_TILE,
1921    VK_META_COPY_IMAGE_ALIGN_ON_DST_TILE,
1922 };
1923 
1924 static VkResult
copy_image_prepare_compute_push_const(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout pipeline_layout,const struct vk_image * src,const struct vk_image * dst,enum vk_meta_copy_image_align_policy align_policy,const VkImageCopy2 * region,uint32_t * wg_count)1925 copy_image_prepare_compute_push_const(
1926    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1927    const struct vk_meta_copy_image_key *key, VkPipelineLayout pipeline_layout,
1928    const struct vk_image *src, const struct vk_image *dst,
1929    enum vk_meta_copy_image_align_policy align_policy,
1930    const VkImageCopy2 *region, uint32_t *wg_count)
1931 {
1932    struct vk_device *dev = cmd->base.device;
1933    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1934    VkOffset3D src_offs =
1935       base_layer_as_offset(key->src.view.type, region->srcOffset,
1936                            region->srcSubresource.baseArrayLayer);
1937    uint32_t layer_count =
1938       vk_image_subresource_layer_count(src, &region->srcSubresource);
1939    VkExtent3D src_extent =
1940       layer_count_as_extent(key->src.view.type, region->extent, layer_count);
1941    VkOffset3D dst_offs =
1942       base_layer_as_offset(key->dst.view.type, region->dstOffset,
1943                            region->dstSubresource.baseArrayLayer);
1944 
1945    struct vk_meta_copy_image_cs_info info = {0};
1946 
1947    /* We can't necessarily optimize the read+write path, so align things
1948     * on the biggest tile size. */
1949    if (align_policy == VK_META_COPY_IMAGE_ALIGN_ON_SRC_TILE) {
1950       info.copy_id_range.start.x = src_offs.x % key->wg_size[0];
1951       info.copy_id_range.start.y = src_offs.y % key->wg_size[1];
1952       info.copy_id_range.start.z = src_offs.z % key->wg_size[2];
1953    } else {
1954       info.copy_id_range.start.x = dst_offs.x % key->wg_size[0];
1955       info.copy_id_range.start.y = dst_offs.y % key->wg_size[1];
1956       info.copy_id_range.start.z = dst_offs.z % key->wg_size[2];
1957    }
1958 
1959    info.copy_id_range.end.x = info.copy_id_range.start.x + src_extent.width;
1960    info.copy_id_range.end.y = info.copy_id_range.start.y + src_extent.height;
1961    info.copy_id_range.end.z = info.copy_id_range.start.z + src_extent.depth;
1962 
1963    info.src_img.offset.x = src_offs.x - info.copy_id_range.start.x;
1964    info.src_img.offset.y = src_offs.y - info.copy_id_range.start.y;
1965    info.src_img.offset.z = src_offs.z - info.copy_id_range.start.z;
1966    info.dst_img.offset.x = dst_offs.x - info.copy_id_range.start.x;
1967    info.dst_img.offset.y = dst_offs.y - info.copy_id_range.start.y;
1968    info.dst_img.offset.z = dst_offs.z - info.copy_id_range.start.z;
1969    wg_count[0] = DIV_ROUND_UP(info.copy_id_range.end.x, key->wg_size[0]);
1970    wg_count[1] = DIV_ROUND_UP(info.copy_id_range.end.y, key->wg_size[1]);
1971    wg_count[2] = DIV_ROUND_UP(info.copy_id_range.end.z, key->wg_size[2]);
1972 
1973    disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
1974                           VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(info), &info);
1975 
1976    return VK_SUCCESS;
1977 }
1978 
1979 static VkResult
copy_image_prepare_gfx_push_const(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout pipeline_layout,struct vk_image * src_img,struct vk_image * dst_img,const VkImageCopy2 * region)1980 copy_image_prepare_gfx_push_const(struct vk_command_buffer *cmd,
1981                                   struct vk_meta_device *meta,
1982                                   const struct vk_meta_copy_image_key *key,
1983                                   VkPipelineLayout pipeline_layout,
1984                                   struct vk_image *src_img,
1985                                   struct vk_image *dst_img,
1986                                   const VkImageCopy2 *region)
1987 {
1988    struct vk_device *dev = cmd->base.device;
1989    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1990    VkOffset3D src_img_offs =
1991       base_layer_as_offset(key->src.view.type, region->srcOffset,
1992                            region->srcSubresource.baseArrayLayer);
1993 
1994    struct vk_meta_copy_image_fs_info info = {
1995       .dst_to_src_offs = {
1996          /* The subtraction may lead to negative values, but that's fine
1997 	  * because the shader does the mirror operation thus guaranteeing
1998 	  * a src_coords >= 0. */
1999          .x = src_img_offs.x - region->dstOffset.x,
2000          .y = src_img_offs.y - region->dstOffset.y,
2001          /* Render image view only contains the layers needed for rendering,
2002           * so we consider the coordinate containing the layer to always be
2003           * zero.
2004 	  */
2005          .z = src_img_offs.z,
2006       },
2007    };
2008 
2009    disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
2010                           VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(info), &info);
2011 
2012    return VK_SUCCESS;
2013 }
2014 
2015 static void
copy_image_region_gfx(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * src_img,VkImageLayout src_image_layout,const struct vk_meta_copy_image_properties * src_props,struct vk_image * dst_img,VkImageLayout dst_image_layout,const struct vk_meta_copy_image_properties * dst_props,const VkImageCopy2 * region)2016 copy_image_region_gfx(struct vk_command_buffer *cmd,
2017                       struct vk_meta_device *meta, struct vk_image *src_img,
2018                       VkImageLayout src_image_layout,
2019                       const struct vk_meta_copy_image_properties *src_props,
2020                       struct vk_image *dst_img, VkImageLayout dst_image_layout,
2021                       const struct vk_meta_copy_image_properties *dst_props,
2022                       const VkImageCopy2 *region)
2023 {
2024    struct vk_device *dev = cmd->base.device;
2025    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
2026 
2027    /* We only special-case 1D_ARRAY to move the layer ID to the second
2028     * component instead of the third. For all other view types, let's pick an
2029     * invalid VkImageViewType value so we don't end up creating the same
2030     * pipeline multiple times. */
2031    VkImageViewType dst_view_type =
2032       dst_img->image_type == VK_IMAGE_TYPE_1D && dst_img->array_layers > 1
2033          ? VK_IMAGE_VIEW_TYPE_1D_ARRAY
2034          : (VkImageViewType)-1;
2035 
2036    assert(region->srcSubresource.aspectMask ==
2037           region->dstSubresource.aspectMask);
2038 
2039    struct vk_meta_copy_image_key key = {
2040       .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_PIPELINE,
2041       .bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS,
2042       .samples = src_img->samples,
2043       .aspects = region->srcSubresource.aspectMask,
2044       .src.view = img_copy_view_info(vk_image_sampled_view_type(src_img),
2045                                      region->srcSubresource.aspectMask, src_img,
2046                                      src_props),
2047       .dst.view = img_copy_view_info(dst_view_type,
2048                                      region->dstSubresource.aspectMask, dst_img,
2049                                      dst_props),
2050    };
2051 
2052    VkPipelineLayout pipeline_layout;
2053    VkPipeline pipeline;
2054    VkResult result =
2055       get_copy_image_gfx_pipeline(dev, meta, &key, &pipeline_layout, &pipeline);
2056    if (unlikely(result != VK_SUCCESS)) {
2057       vk_command_buffer_set_error(cmd, result);
2058       return;
2059    }
2060 
2061    disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
2062                          VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
2063 
2064    result = copy_image_prepare_gfx_desc_set(cmd, meta, &key, pipeline_layout,
2065                                             src_img, src_image_layout, dst_img,
2066                                             dst_image_layout, region);
2067    if (unlikely(result != VK_SUCCESS)) {
2068       vk_command_buffer_set_error(cmd, result);
2069       return;
2070    }
2071 
2072    result = copy_image_prepare_gfx_push_const(cmd, meta, &key, pipeline_layout,
2073                                               src_img, dst_img, region);
2074    if (unlikely(result != VK_SUCCESS)) {
2075       vk_command_buffer_set_error(cmd, result);
2076       return;
2077    }
2078 
2079    copy_draw(cmd, meta, dst_img, dst_image_layout, &region->dstSubresource,
2080              &region->dstOffset, &region->extent, &key.dst.view);
2081 }
2082 
2083 static void
copy_image_region_compute(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * src_img,VkImageLayout src_image_layout,const struct vk_meta_copy_image_properties * src_props,struct vk_image * dst_img,VkImageLayout dst_image_layout,const struct vk_meta_copy_image_properties * dst_props,const VkImageCopy2 * region)2084 copy_image_region_compute(struct vk_command_buffer *cmd,
2085                           struct vk_meta_device *meta, struct vk_image *src_img,
2086                           VkImageLayout src_image_layout,
2087                           const struct vk_meta_copy_image_properties *src_props,
2088                           struct vk_image *dst_img,
2089                           VkImageLayout dst_image_layout,
2090                           const struct vk_meta_copy_image_properties *dst_props,
2091                           const VkImageCopy2 *region)
2092 {
2093    struct vk_device *dev = cmd->base.device;
2094    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
2095    VkImageViewType dst_view_type = vk_image_storage_view_type(dst_img);
2096 
2097    assert(region->srcSubresource.aspectMask ==
2098           region->dstSubresource.aspectMask);
2099 
2100    struct vk_meta_copy_image_key key = {
2101       .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_PIPELINE,
2102       .bind_point = VK_PIPELINE_BIND_POINT_COMPUTE,
2103       .samples = src_img->samples,
2104       .aspects = region->srcSubresource.aspectMask,
2105       .src.view = img_copy_view_info(vk_image_sampled_view_type(src_img),
2106                                      region->srcSubresource.aspectMask, src_img,
2107                                      src_props),
2108       .dst.view = img_copy_view_info(
2109          dst_view_type, region->dstSubresource.aspectMask, dst_img, dst_props),
2110    };
2111 
2112    uint32_t src_pix_per_tile = src_props->tile_size.width *
2113                                src_props->tile_size.height *
2114                                src_props->tile_size.depth;
2115    uint32_t dst_pix_per_tile = dst_props->tile_size.width *
2116                                dst_props->tile_size.height *
2117                                dst_props->tile_size.depth;
2118    enum vk_meta_copy_image_align_policy align_policy;
2119 
2120    if (src_pix_per_tile >= dst_pix_per_tile) {
2121       key.wg_size[0] = src_props->tile_size.width;
2122       key.wg_size[1] = src_props->tile_size.height;
2123       key.wg_size[2] = src_props->tile_size.depth;
2124       align_policy = VK_META_COPY_IMAGE_ALIGN_ON_SRC_TILE;
2125    } else {
2126       key.wg_size[0] = dst_props->tile_size.width;
2127       key.wg_size[1] = dst_props->tile_size.height;
2128       key.wg_size[2] = dst_props->tile_size.depth;
2129       align_policy = VK_META_COPY_IMAGE_ALIGN_ON_DST_TILE;
2130    }
2131 
2132    VkPipelineLayout pipeline_layout;
2133    VkPipeline pipeline;
2134    VkResult result = get_copy_image_compute_pipeline(
2135       dev, meta, &key, &pipeline_layout, &pipeline);
2136    if (unlikely(result != VK_SUCCESS)) {
2137       vk_command_buffer_set_error(cmd, result);
2138       return;
2139    }
2140 
2141    disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
2142                          VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2143 
2144    result = copy_image_prepare_compute_desc_set(
2145       cmd, meta, &key, pipeline_layout, src_img, src_image_layout, dst_img,
2146       dst_image_layout, region);
2147    if (unlikely(result != VK_SUCCESS)) {
2148       vk_command_buffer_set_error(cmd, result);
2149       return;
2150    }
2151 
2152    assert(key.wg_size[0] && key.wg_size[1] && key.wg_size[2]);
2153 
2154    uint32_t wg_count[3] = {0};
2155 
2156    result = copy_image_prepare_compute_push_const(
2157       cmd, meta, &key, pipeline_layout, src_img, dst_img, align_policy, region,
2158       wg_count);
2159    if (unlikely(result != VK_SUCCESS)) {
2160       vk_command_buffer_set_error(cmd, result);
2161       return;
2162    }
2163 
2164    disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count[0], wg_count[1],
2165                      wg_count[2]);
2166 }
2167 
2168 void
vk_meta_copy_image(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyImageInfo2 * info,const struct vk_meta_copy_image_properties * src_props,const struct vk_meta_copy_image_properties * dst_props,VkPipelineBindPoint bind_point)2169 vk_meta_copy_image(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
2170                    const VkCopyImageInfo2 *info,
2171                    const struct vk_meta_copy_image_properties *src_props,
2172                    const struct vk_meta_copy_image_properties *dst_props,
2173                    VkPipelineBindPoint bind_point)
2174 {
2175    VK_FROM_HANDLE(vk_image, src_img, info->srcImage);
2176    VK_FROM_HANDLE(vk_image, dst_img, info->dstImage);
2177 
2178    for (uint32_t i = 0; i < info->regionCount; i++) {
2179       VkImageCopy2 region = info->pRegions[i];
2180 
2181       region.extent = vk_image_extent_to_elements(src_img, region.extent);
2182       region.srcOffset = vk_image_offset_to_elements(src_img, region.srcOffset);
2183       region.dstOffset = vk_image_offset_to_elements(dst_img, region.dstOffset);
2184 
2185       if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
2186          copy_image_region_gfx(cmd, meta, src_img, info->srcImageLayout,
2187                                src_props, dst_img, info->dstImageLayout,
2188                                dst_props, &region);
2189       } else {
2190          copy_image_region_compute(cmd, meta, src_img, info->srcImageLayout,
2191                                    src_props, dst_img, info->dstImageLayout,
2192                                    dst_props, &region);
2193       }
2194    }
2195 }
2196 
2197 static nir_shader *
build_copy_buffer_shader(const struct vk_meta_device * meta,const void * key_data)2198 build_copy_buffer_shader(const struct vk_meta_device *meta,
2199                          const void *key_data)
2200 {
2201    const struct vk_meta_copy_buffer_key *key = key_data;
2202    nir_builder builder = nir_builder_init_simple_shader(
2203       MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-buffer");
2204    nir_builder *b = &builder;
2205 
2206    b->shader->info.workgroup_size[0] =
2207       vk_meta_buffer_access_wg_size(meta, key->chunk_size);
2208    b->shader->info.workgroup_size[1] = 1;
2209    b->shader->info.workgroup_size[2] = 1;
2210 
2211    uint32_t chunk_bit_size, chunk_comp_count;
2212 
2213    assert(util_is_power_of_two_nonzero(key->chunk_size));
2214    if (key->chunk_size <= 4) {
2215       chunk_bit_size = key->chunk_size * 8;
2216       chunk_comp_count = 1;
2217    } else {
2218       chunk_bit_size = 32;
2219       chunk_comp_count = key->chunk_size / 4;
2220    }
2221 
2222    assert(chunk_comp_count < NIR_MAX_VEC_COMPONENTS);
2223 
2224    nir_def *global_id = nir_load_global_invocation_id(b, 32);
2225    nir_def *copy_id = nir_channel(b, global_id, 0);
2226    nir_def *offset = nir_imul_imm(b, copy_id, key->chunk_size);
2227    nir_def *size = load_info(b, struct vk_meta_copy_buffer_info, size);
2228 
2229    nir_push_if(b, nir_ult(b, offset, size));
2230 
2231    offset = nir_u2u64(b, offset);
2232 
2233    nir_def *src_addr = load_info(b, struct vk_meta_copy_buffer_info, src_addr);
2234    nir_def *dst_addr = nir_load_push_constant(b, 1, 64, nir_imm_int(b, 8));
2235    nir_def *data = nir_build_load_global(b, chunk_comp_count, chunk_bit_size,
2236                                          nir_iadd(b, src_addr, offset),
2237                                          .align_mul = chunk_bit_size / 8);
2238 
2239    nir_build_store_global(b, data, nir_iadd(b, dst_addr, offset),
2240                           .align_mul = key->chunk_size);
2241 
2242    nir_pop_if(b, NULL);
2243 
2244    return b->shader;
2245 }
2246 
2247 static VkResult
get_copy_buffer_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)2248 get_copy_buffer_pipeline(struct vk_device *device, struct vk_meta_device *meta,
2249                          const struct vk_meta_copy_buffer_key *key,
2250                          VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
2251 {
2252    VkResult result = get_copy_pipeline_layout(
2253       device, meta, "vk-meta-copy-buffer-pipeline-layout",
2254       VK_SHADER_STAGE_COMPUTE_BIT, sizeof(struct vk_meta_copy_buffer_info),
2255       NULL, 0, layout_out);
2256 
2257    if (unlikely(result != VK_SUCCESS))
2258       return result;
2259 
2260    return get_compute_copy_pipeline(device, meta, *layout_out,
2261                                     build_copy_buffer_shader, key, sizeof(*key),
2262                                     pipeline_out);
2263 }
2264 
2265 static void
copy_buffer_region(struct vk_command_buffer * cmd,struct vk_meta_device * meta,VkBuffer src,VkBuffer dst,const VkBufferCopy2 * region)2266 copy_buffer_region(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
2267                    VkBuffer src, VkBuffer dst, const VkBufferCopy2 *region)
2268 {
2269    struct vk_device *dev = cmd->base.device;
2270    const struct vk_physical_device *pdev = dev->physical;
2271    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
2272    VkResult result;
2273 
2274    struct vk_meta_copy_buffer_key key = {
2275       .key_type = VK_META_OBJECT_KEY_COPY_BUFFER_PIPELINE,
2276    };
2277 
2278    VkDeviceSize size = region->size;
2279    VkDeviceAddress src_addr =
2280       vk_meta_buffer_address(dev, src, region->srcOffset, size);
2281    VkDeviceAddress dst_addr =
2282       vk_meta_buffer_address(dev, dst, region->dstOffset, size);
2283 
2284    /* Combine the size and src/dst address to extract the alignment. */
2285    uint64_t align = src_addr | dst_addr | size;
2286 
2287    assert(align != 0);
2288 
2289    /* Pick the first power-of-two of the combined src/dst address and size as
2290     * our alignment. We limit the chunk size to 16 bytes (a uvec4) for now.
2291     */
2292    key.chunk_size = MIN2(16, 1 << (ffs(align) - 1));
2293 
2294    VkPipelineLayout pipeline_layout;
2295    VkPipeline pipeline;
2296    result =
2297       get_copy_buffer_pipeline(dev, meta, &key, &pipeline_layout, &pipeline);
2298    if (unlikely(result != VK_SUCCESS)) {
2299       vk_command_buffer_set_error(cmd, result);
2300       return;
2301    }
2302 
2303    disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
2304                          VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2305 
2306    const uint32_t optimal_wg_size =
2307       vk_meta_buffer_access_wg_size(meta, key.chunk_size);
2308    const uint32_t per_wg_copy_size = optimal_wg_size * key.chunk_size;
2309    uint32_t max_per_dispatch_size =
2310       pdev->properties.maxComputeWorkGroupCount[0] * per_wg_copy_size;
2311 
2312    assert(optimal_wg_size <= pdev->properties.maxComputeWorkGroupSize[0]);
2313 
2314    while (size) {
2315       struct vk_meta_copy_buffer_info args = {
2316          .size = MIN2(size, max_per_dispatch_size),
2317          .src_addr = src_addr,
2318          .dst_addr = dst_addr,
2319       };
2320       uint32_t wg_count = DIV_ROUND_UP(args.size, per_wg_copy_size);
2321 
2322       disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
2323                              VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args),
2324                              &args);
2325 
2326       disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count, 1, 1);
2327 
2328       src_addr += args.size;
2329       dst_addr += args.size;
2330       size -= args.size;
2331    }
2332 }
2333 
2334 void
vk_meta_copy_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyBufferInfo2 * info)2335 vk_meta_copy_buffer(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
2336                     const VkCopyBufferInfo2 *info)
2337 {
2338    for (unsigned i = 0; i < info->regionCount; i++) {
2339       const VkBufferCopy2 *region = &info->pRegions[i];
2340 
2341       copy_buffer_region(cmd, meta, info->srcBuffer, info->dstBuffer, region);
2342    }
2343 }
2344 
2345 void
vk_meta_update_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,VkBuffer buffer,VkDeviceSize offset,VkDeviceSize size,const void * data)2346 vk_meta_update_buffer(struct vk_command_buffer *cmd,
2347                       struct vk_meta_device *meta, VkBuffer buffer,
2348                       VkDeviceSize offset, VkDeviceSize size, const void *data)
2349 {
2350    VkResult result;
2351 
2352    const VkBufferCreateInfo tmp_buffer_info = {
2353       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
2354       .size = size,
2355       .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
2356       .queueFamilyIndexCount = 1,
2357       .pQueueFamilyIndices = &cmd->pool->queue_family_index,
2358    };
2359 
2360    VkBuffer tmp_buffer;
2361    result = vk_meta_create_buffer(cmd, meta, &tmp_buffer_info, &tmp_buffer);
2362    if (unlikely(result != VK_SUCCESS)) {
2363       vk_command_buffer_set_error(cmd, result);
2364       return;
2365    }
2366 
2367    void *tmp_buffer_map;
2368    result = meta->cmd_bind_map_buffer(cmd, meta, tmp_buffer, &tmp_buffer_map);
2369    if (unlikely(result != VK_SUCCESS)) {
2370       vk_command_buffer_set_error(cmd, result);
2371       return;
2372    }
2373 
2374    memcpy(tmp_buffer_map, data, size);
2375 
2376    const VkBufferCopy2 copy_region = {
2377       .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
2378       .srcOffset = 0,
2379       .dstOffset = offset,
2380       .size = size,
2381    };
2382    const VkCopyBufferInfo2 copy_info = {
2383       .sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2,
2384       .srcBuffer = tmp_buffer,
2385       .dstBuffer = buffer,
2386       .regionCount = 1,
2387       .pRegions = &copy_region,
2388    };
2389 
2390    vk_meta_copy_buffer(cmd, meta, &copy_info);
2391 }
2392 
2393 static nir_shader *
build_fill_buffer_shader(const struct vk_meta_device * meta,UNUSED const void * key_data)2394 build_fill_buffer_shader(const struct vk_meta_device *meta,
2395                          UNUSED const void *key_data)
2396 {
2397    nir_builder builder = nir_builder_init_simple_shader(
2398       MESA_SHADER_COMPUTE, NULL, "vk-meta-fill-buffer");
2399    nir_builder *b = &builder;
2400 
2401    b->shader->info.workgroup_size[0] = vk_meta_buffer_access_wg_size(meta, 4);
2402    b->shader->info.workgroup_size[1] = 1;
2403    b->shader->info.workgroup_size[2] = 1;
2404 
2405    nir_def *global_id = nir_load_global_invocation_id(b, 32);
2406    nir_def *copy_id = nir_channel(b, global_id, 0);
2407    nir_def *offset = nir_imul_imm(b, copy_id, 4);
2408    nir_def *size = load_info(b, struct vk_meta_fill_buffer_info, size);
2409    nir_def *data = load_info(b, struct vk_meta_fill_buffer_info, data);
2410 
2411    nir_push_if(b, nir_ult(b, offset, size));
2412 
2413    offset = nir_u2u64(b, offset);
2414 
2415    nir_def *buf_addr =
2416       load_info(b, struct vk_meta_fill_buffer_info, buf_addr);
2417 
2418    nir_build_store_global(b, data, nir_iadd(b, buf_addr, offset),
2419                           .align_mul = 4);
2420 
2421    nir_pop_if(b, NULL);
2422 
2423    return b->shader;
2424 }
2425 
2426 static VkResult
get_fill_buffer_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_fill_buffer_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)2427 get_fill_buffer_pipeline(struct vk_device *device, struct vk_meta_device *meta,
2428                          const struct vk_meta_fill_buffer_key *key,
2429                          VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
2430 {
2431    VkResult result = get_copy_pipeline_layout(
2432       device, meta, "vk-meta-fill-buffer-pipeline-layout",
2433       VK_SHADER_STAGE_COMPUTE_BIT, sizeof(struct vk_meta_fill_buffer_info), NULL, 0,
2434       layout_out);
2435    if (unlikely(result != VK_SUCCESS))
2436       return result;
2437 
2438    return get_compute_copy_pipeline(device, meta, *layout_out,
2439                                     build_fill_buffer_shader, key, sizeof(*key),
2440                                     pipeline_out);
2441 }
2442 
2443 void
vk_meta_fill_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,VkBuffer buffer,VkDeviceSize offset,VkDeviceSize size,uint32_t data)2444 vk_meta_fill_buffer(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
2445                     VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size,
2446                     uint32_t data)
2447 {
2448    VK_FROM_HANDLE(vk_buffer, buf, buffer);
2449    struct vk_device *dev = cmd->base.device;
2450    const struct vk_physical_device *pdev = dev->physical;
2451    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
2452    VkResult result;
2453 
2454    struct vk_meta_fill_buffer_key key = {
2455       .key_type = VK_META_OBJECT_KEY_FILL_BUFFER_PIPELINE,
2456    };
2457 
2458    VkPipelineLayout pipeline_layout;
2459    VkPipeline pipeline;
2460    result =
2461       get_fill_buffer_pipeline(dev, meta, &key, &pipeline_layout, &pipeline);
2462    if (unlikely(result != VK_SUCCESS)) {
2463       vk_command_buffer_set_error(cmd, result);
2464       return;
2465    }
2466 
2467    disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
2468                          VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2469 
2470    /* From the Vulkan 1.3.290 spec:
2471     *
2472     *   "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not a
2473     *    multiple of 4, then the nearest smaller multiple is used."
2474     *
2475     * hence the mask to align the size on 4 bytes here.
2476     */
2477    size = vk_buffer_range(buf, offset, size) & ~3u;
2478 
2479    const uint32_t optimal_wg_size = vk_meta_buffer_access_wg_size(meta, 4);
2480    const uint32_t per_wg_copy_size = optimal_wg_size * 4;
2481    uint32_t max_per_dispatch_size =
2482       pdev->properties.maxComputeWorkGroupCount[0] * per_wg_copy_size;
2483 
2484    while (size > 0) {
2485       struct vk_meta_fill_buffer_info args = {
2486          .size = MIN2(size, max_per_dispatch_size),
2487          .buf_addr = vk_meta_buffer_address(dev, buffer, offset, size),
2488          .data = data,
2489       };
2490       uint32_t wg_count = DIV_ROUND_UP(args.size, per_wg_copy_size);
2491 
2492       disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
2493                              VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args),
2494                              &args);
2495 
2496       disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count, 1, 1);
2497 
2498       offset += args.size;
2499       size -= args.size;
2500    }
2501 }
2502