• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Valve Corporation
3  * Copyright 2024 Alyssa Rosenzweig
4  * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5  * SPDX-License-Identifier: MIT
6  */
7 #include "util/format/u_format.h"
8 #include "util/format/u_formats.h"
9 #include "util/u_math.h"
10 #include "vulkan/vulkan_core.h"
11 #include "agx_pack.h"
12 #include "hk_buffer.h"
13 #include "hk_cmd_buffer.h"
14 #include "hk_device.h"
15 #include "hk_entrypoints.h"
16 #include "hk_image.h"
17 #include "hk_physical_device.h"
18 
19 #include "layout.h"
20 #include "nir_builder.h"
21 #include "nir_builder_opcodes.h"
22 #include "nir_format_convert.h"
23 #include "shader_enums.h"
24 #include "vk_enum_to_str.h"
25 #include "vk_format.h"
26 #include "vk_meta.h"
27 #include "vk_pipeline.h"
28 
29 /* For block based blit kernels, we hardcode the maximum tile size which we can
30  * always achieve. This simplifies our life.
31  */
32 #define TILE_WIDTH  32
33 #define TILE_HEIGHT 32
34 
35 static VkResult
hk_cmd_bind_map_buffer(struct vk_command_buffer * vk_cmd,struct vk_meta_device * meta,VkBuffer _buffer,void ** map_out)36 hk_cmd_bind_map_buffer(struct vk_command_buffer *vk_cmd,
37                        struct vk_meta_device *meta, VkBuffer _buffer,
38                        void **map_out)
39 {
40    struct hk_cmd_buffer *cmd = container_of(vk_cmd, struct hk_cmd_buffer, vk);
41    VK_FROM_HANDLE(hk_buffer, buffer, _buffer);
42 
43    assert(buffer->vk.size < UINT_MAX);
44    struct agx_ptr T = hk_pool_alloc(cmd, buffer->vk.size, 16);
45    if (unlikely(T.cpu == NULL))
46       return VK_ERROR_OUT_OF_POOL_MEMORY;
47 
48    buffer->addr = T.gpu;
49    *map_out = T.cpu;
50    return VK_SUCCESS;
51 }
52 
53 VkResult
hk_device_init_meta(struct hk_device * dev)54 hk_device_init_meta(struct hk_device *dev)
55 {
56    VkResult result = vk_meta_device_init(&dev->vk, &dev->meta);
57    if (result != VK_SUCCESS)
58       return result;
59 
60    dev->meta.use_gs_for_layer = false;
61    dev->meta.use_stencil_export = true;
62    dev->meta.use_rect_list_pipeline = true;
63    dev->meta.cmd_bind_map_buffer = hk_cmd_bind_map_buffer;
64    dev->meta.max_bind_map_buffer_size_B = 64 * 1024;
65 
66    for (unsigned i = 0; i < VK_META_BUFFER_CHUNK_SIZE_COUNT; ++i) {
67       dev->meta.buffer_access.optimal_wg_size[i] = 64;
68    }
69 
70    return VK_SUCCESS;
71 }
72 
73 void
hk_device_finish_meta(struct hk_device * dev)74 hk_device_finish_meta(struct hk_device *dev)
75 {
76    vk_meta_device_finish(&dev->vk, &dev->meta);
77 }
78 
79 struct hk_meta_save {
80    struct vk_vertex_input_state _dynamic_vi;
81    struct vk_sample_locations_state _dynamic_sl;
82    struct vk_dynamic_graphics_state dynamic;
83    struct hk_api_shader *shaders[MESA_SHADER_MESH + 1];
84    struct hk_addr_range vb0;
85    struct hk_descriptor_set *desc0;
86    bool has_push_desc0;
87    enum agx_visibility_mode occlusion;
88    struct hk_push_descriptor_set push_desc0;
89    VkQueryPipelineStatisticFlags pipeline_stats_flags;
90    uint8_t push[HK_MAX_PUSH_SIZE];
91 };
92 
93 static void
hk_meta_begin(struct hk_cmd_buffer * cmd,struct hk_meta_save * save,VkPipelineBindPoint bind_point)94 hk_meta_begin(struct hk_cmd_buffer *cmd, struct hk_meta_save *save,
95               VkPipelineBindPoint bind_point)
96 {
97    struct hk_descriptor_state *desc = hk_get_descriptors_state(cmd, bind_point);
98 
99    if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
100       save->dynamic = cmd->vk.dynamic_graphics_state;
101       save->_dynamic_vi = cmd->state.gfx._dynamic_vi;
102       save->_dynamic_sl = cmd->state.gfx._dynamic_sl;
103 
104       static_assert(sizeof(cmd->state.gfx.shaders) == sizeof(save->shaders));
105       memcpy(save->shaders, cmd->state.gfx.shaders, sizeof(save->shaders));
106 
107       /* Pause queries */
108       save->occlusion = cmd->state.gfx.occlusion.mode;
109       cmd->state.gfx.occlusion.mode = AGX_VISIBILITY_MODE_NONE;
110       cmd->state.gfx.dirty |= HK_DIRTY_OCCLUSION;
111 
112       save->pipeline_stats_flags = desc->root.draw.pipeline_stats_flags;
113       desc->root.draw.pipeline_stats_flags = 0;
114       desc->root_dirty = true;
115    } else {
116       save->shaders[MESA_SHADER_COMPUTE] = cmd->state.cs.shader;
117    }
118 
119    save->vb0 = cmd->state.gfx.vb[0];
120 
121    save->desc0 = desc->sets[0];
122    save->has_push_desc0 = desc->push[0];
123    if (save->has_push_desc0)
124       save->push_desc0 = *desc->push[0];
125 
126    static_assert(sizeof(save->push) == sizeof(desc->root.push));
127    memcpy(save->push, desc->root.push, sizeof(save->push));
128 
129    cmd->in_meta = true;
130 }
131 
132 static void
hk_meta_init_render(struct hk_cmd_buffer * cmd,struct vk_meta_rendering_info * info)133 hk_meta_init_render(struct hk_cmd_buffer *cmd,
134                     struct vk_meta_rendering_info *info)
135 {
136    const struct hk_rendering_state *render = &cmd->state.gfx.render;
137 
138    *info = (struct vk_meta_rendering_info){
139       .samples = MAX2(render->tilebuffer.nr_samples, 1),
140       .view_mask = render->view_mask,
141       .color_attachment_count = render->color_att_count,
142       .depth_attachment_format = render->depth_att.vk_format,
143       .stencil_attachment_format = render->stencil_att.vk_format,
144    };
145    for (uint32_t a = 0; a < render->color_att_count; a++) {
146       info->color_attachment_formats[a] = render->color_att[a].vk_format;
147       info->color_attachment_write_masks[a] =
148          VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
149          VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
150    }
151 }
152 
153 static void
hk_meta_end(struct hk_cmd_buffer * cmd,struct hk_meta_save * save,VkPipelineBindPoint bind_point)154 hk_meta_end(struct hk_cmd_buffer *cmd, struct hk_meta_save *save,
155             VkPipelineBindPoint bind_point)
156 {
157    struct hk_descriptor_state *desc = hk_get_descriptors_state(cmd, bind_point);
158    desc->root_dirty = true;
159 
160    if (save->desc0) {
161       desc->sets[0] = save->desc0;
162       desc->root.sets[0] = hk_descriptor_set_addr(save->desc0);
163       desc->sets_dirty |= BITFIELD_BIT(0);
164       desc->push_dirty &= ~BITFIELD_BIT(0);
165    } else if (save->has_push_desc0) {
166       *desc->push[0] = save->push_desc0;
167       desc->push_dirty |= BITFIELD_BIT(0);
168    }
169 
170    if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
171       /* Restore the dynamic state */
172       assert(save->dynamic.vi == &cmd->state.gfx._dynamic_vi);
173       assert(save->dynamic.ms.sample_locations == &cmd->state.gfx._dynamic_sl);
174       cmd->vk.dynamic_graphics_state = save->dynamic;
175       cmd->state.gfx._dynamic_vi = save->_dynamic_vi;
176       cmd->state.gfx._dynamic_sl = save->_dynamic_sl;
177       memcpy(cmd->vk.dynamic_graphics_state.dirty,
178              cmd->vk.dynamic_graphics_state.set,
179              sizeof(cmd->vk.dynamic_graphics_state.set));
180 
181       for (uint32_t stage = 0; stage < ARRAY_SIZE(save->shaders); stage++) {
182          hk_cmd_bind_graphics_shader(cmd, stage, save->shaders[stage]);
183       }
184 
185       hk_cmd_bind_vertex_buffer(cmd, 0, save->vb0);
186 
187       /* Restore queries */
188       cmd->state.gfx.occlusion.mode = save->occlusion;
189       cmd->state.gfx.dirty |= HK_DIRTY_OCCLUSION;
190 
191       desc->root.draw.pipeline_stats_flags = save->pipeline_stats_flags;
192       desc->root_dirty = true;
193    } else {
194       hk_cmd_bind_compute_shader(cmd, save->shaders[MESA_SHADER_COMPUTE]);
195    }
196 
197    memcpy(desc->root.push, save->push, sizeof(save->push));
198    cmd->in_meta = false;
199 }
200 
201 #define BINDING_OUTPUT 0
202 #define BINDING_INPUT  1
203 
204 static VkFormat
aspect_format(VkFormat fmt,VkImageAspectFlags aspect)205 aspect_format(VkFormat fmt, VkImageAspectFlags aspect)
206 {
207    bool depth = (aspect & VK_IMAGE_ASPECT_DEPTH_BIT);
208    bool stencil = (aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
209 
210    enum pipe_format p_format = hk_format_to_pipe_format(fmt);
211 
212    if (util_format_is_depth_or_stencil(p_format)) {
213       assert(depth ^ stencil);
214       if (depth) {
215          switch (fmt) {
216          case VK_FORMAT_D32_SFLOAT:
217          case VK_FORMAT_D32_SFLOAT_S8_UINT:
218             return VK_FORMAT_D32_SFLOAT;
219          case VK_FORMAT_D16_UNORM:
220          case VK_FORMAT_D16_UNORM_S8_UINT:
221             return VK_FORMAT_D16_UNORM;
222          default:
223             unreachable("invalid depth");
224          }
225       } else {
226          switch (fmt) {
227          case VK_FORMAT_S8_UINT:
228          case VK_FORMAT_D32_SFLOAT_S8_UINT:
229          case VK_FORMAT_D16_UNORM_S8_UINT:
230             return VK_FORMAT_S8_UINT;
231          default:
232             unreachable("invalid stencil");
233          }
234       }
235    }
236 
237    assert(!depth && !stencil);
238 
239    const struct vk_format_ycbcr_info *ycbcr_info =
240       vk_format_get_ycbcr_info(fmt);
241 
242    if (ycbcr_info) {
243       switch (aspect) {
244       case VK_IMAGE_ASPECT_PLANE_0_BIT:
245          return ycbcr_info->planes[0].format;
246       case VK_IMAGE_ASPECT_PLANE_1_BIT:
247          return ycbcr_info->planes[1].format;
248       case VK_IMAGE_ASPECT_PLANE_2_BIT:
249          return ycbcr_info->planes[2].format;
250       default:
251          unreachable("invalid ycbcr aspect");
252       }
253    }
254 
255    return fmt;
256 }
257 
258 /*
259  * Canonicalize formats to simplify the copies. The returned format must in the
260  * same compression class, and should roundtrip lossless (minifloat formats are
261  * the unfortunate exception).
262  */
263 static enum pipe_format
canonical_format_pipe(enum pipe_format fmt,bool canonicalize_zs)264 canonical_format_pipe(enum pipe_format fmt, bool canonicalize_zs)
265 {
266    if (!canonicalize_zs && util_format_is_depth_or_stencil(fmt))
267       return fmt;
268 
269    assert(ail_is_valid_pixel_format(fmt));
270 
271    if (util_format_is_compressed(fmt)) {
272       unsigned size_B = util_format_get_blocksize(fmt);
273       assert(size_B == 8 || size_B == 16);
274 
275       return size_B == 16 ? PIPE_FORMAT_R32G32B32A32_UINT
276                           : PIPE_FORMAT_R32G32_UINT;
277    }
278 
279 #define CASE(x, y) [AGX_CHANNELS_##x] = PIPE_FORMAT_##y
280    /* clang-format off */
281    static enum pipe_format map[] = {
282       CASE(R8,           R8_UINT),
283       CASE(R16,          R16_UNORM /* XXX: Hack for Z16 copies */),
284       CASE(R8G8,         R8G8_UINT),
285       CASE(R5G6B5,       R5G6B5_UNORM),
286       CASE(R4G4B4A4,     R4G4B4A4_UNORM),
287       CASE(A1R5G5B5,     A1R5G5B5_UNORM),
288       CASE(R5G5B5A1,     B5G5R5A1_UNORM),
289       CASE(R32,          R32_UINT),
290       CASE(R16G16,       R16G16_UINT),
291       CASE(R11G11B10,    R11G11B10_FLOAT),
292       CASE(R10G10B10A2,  R10G10B10A2_UNORM),
293       CASE(R9G9B9E5,     R9G9B9E5_FLOAT),
294       CASE(R8G8B8A8,     R8G8B8A8_UINT),
295       CASE(R32G32,       R32G32_UINT),
296       CASE(R16G16B16A16, R16G16B16A16_UINT),
297       CASE(R32G32B32A32, R32G32B32A32_UINT),
298    };
299    /* clang-format on */
300 #undef CASE
301 
302    enum agx_channels channels = ail_pixel_format[fmt].channels;
303    assert(channels < ARRAY_SIZE(map) && "all valid channels handled");
304    assert(map[channels] != PIPE_FORMAT_NONE && "all valid channels handled");
305    return map[channels];
306 }
307 
308 static VkFormat
canonical_format(VkFormat fmt)309 canonical_format(VkFormat fmt)
310 {
311    return vk_format_from_pipe_format(
312       canonical_format_pipe(hk_format_to_pipe_format(fmt), false));
313 }
314 
315 enum copy_type {
316    BUF2IMG,
317    IMG2BUF,
318    IMG2IMG,
319 };
320 
321 struct vk_meta_push_data {
322    uint64_t buffer;
323 
324    uint32_t row_extent;
325    uint32_t slice_or_layer_extent;
326 
327    int32_t src_offset_el[4];
328    int32_t dst_offset_el[4];
329    uint32_t grid_el[3];
330 } PACKED;
331 
332 #define get_push(b, name)                                                      \
333    nir_load_push_constant(                                                     \
334       b, 1, sizeof(((struct vk_meta_push_data *)0)->name) * 8,                 \
335       nir_imm_int(b, offsetof(struct vk_meta_push_data, name)))
336 
337 struct vk_meta_image_copy_key {
338    enum vk_meta_object_key_type key_type;
339    enum copy_type type;
340    enum pipe_format src_format, dst_format;
341    unsigned block_size;
342    unsigned nr_samples;
343    bool block_based;
344 };
345 
346 static nir_def *
linearize_coords(nir_builder * b,nir_def * coord,const struct vk_meta_image_copy_key * key)347 linearize_coords(nir_builder *b, nir_def *coord,
348                  const struct vk_meta_image_copy_key *key)
349 {
350    assert(key->nr_samples == 1 && "buffer<-->image copies not multisampled");
351 
352    nir_def *row_extent = get_push(b, row_extent);
353    nir_def *slice_or_layer_extent = get_push(b, slice_or_layer_extent);
354    nir_def *x = nir_channel(b, coord, 0);
355    nir_def *y = nir_channel(b, coord, 1);
356    nir_def *z_or_layer = nir_channel(b, coord, 2);
357 
358    nir_def *v = nir_imul_imm(b, x, key->block_size);
359 
360    v = nir_iadd(b, v, nir_imul(b, y, row_extent));
361    v = nir_iadd(b, v, nir_imul(b, z_or_layer, slice_or_layer_extent));
362 
363    return nir_udiv_imm(b, v, key->block_size);
364 }
365 
366 static bool
is_format_native(enum pipe_format format)367 is_format_native(enum pipe_format format)
368 {
369    switch (format) {
370    case PIPE_FORMAT_R8_UINT:
371    case PIPE_FORMAT_R8G8_UINT:
372    case PIPE_FORMAT_R32_UINT:
373    case PIPE_FORMAT_R32G32_UINT:
374    case PIPE_FORMAT_R16G16_UINT:
375    case PIPE_FORMAT_R16_UNORM:
376       /* TODO: debug me .. why do these fail */
377       return false;
378    case PIPE_FORMAT_R11G11B10_FLOAT:
379    case PIPE_FORMAT_R9G9B9E5_FLOAT:
380    case PIPE_FORMAT_R16G16B16A16_UINT:
381    case PIPE_FORMAT_R32G32B32A32_UINT:
382    case PIPE_FORMAT_R8G8B8A8_UINT:
383    case PIPE_FORMAT_R10G10B10A2_UNORM:
384       return true;
385    case PIPE_FORMAT_R5G6B5_UNORM:
386    case PIPE_FORMAT_R4G4B4A4_UNORM:
387    case PIPE_FORMAT_A1R5G5B5_UNORM:
388    case PIPE_FORMAT_B5G5R5A1_UNORM:
389       return false;
390    default:
391       unreachable("expected canonical");
392    }
393 }
394 
395 static nir_def *
load_store_formatted(nir_builder * b,nir_def * base,nir_def * index,nir_def * value,enum pipe_format format)396 load_store_formatted(nir_builder *b, nir_def *base, nir_def *index,
397                      nir_def *value, enum pipe_format format)
398 {
399    if (util_format_is_depth_or_stencil(format))
400       format = canonical_format_pipe(format, true);
401 
402    if (is_format_native(format)) {
403       enum pipe_format isa = ail_pixel_format[format].renderable;
404       unsigned isa_size = util_format_get_blocksize(isa);
405       unsigned isa_components = util_format_get_blocksize(format) / isa_size;
406       unsigned shift = util_logbase2(isa_components);
407 
408       if (value) {
409          nir_store_agx(b, value, base, index, .format = isa, .base = shift);
410       } else {
411          return nir_load_agx(b, 4, 32, base, index, .format = isa,
412                              .base = shift);
413       }
414    } else {
415       unsigned blocksize_B = util_format_get_blocksize(format);
416       nir_def *addr =
417          nir_iadd(b, base, nir_imul_imm(b, nir_u2u64(b, index), blocksize_B));
418 
419       if (value) {
420          nir_def *raw = nir_format_pack_rgba(b, format, value);
421 
422          if (blocksize_B <= 4) {
423             assert(raw->num_components == 1);
424             raw = nir_u2uN(b, raw, blocksize_B * 8);
425          } else {
426             assert(raw->bit_size == 32);
427             raw = nir_trim_vector(b, raw, blocksize_B / 4);
428          }
429 
430          nir_store_global(b, addr, blocksize_B, raw,
431                           nir_component_mask(raw->num_components));
432       } else {
433          nir_def *raw =
434             nir_load_global(b, addr, blocksize_B, DIV_ROUND_UP(blocksize_B, 4),
435                             MIN2(32, blocksize_B * 8));
436 
437          return nir_format_unpack_rgba(b, raw, format);
438       }
439    }
440 
441    return NULL;
442 }
443 
444 static nir_shader *
build_image_copy_shader(const struct vk_meta_image_copy_key * key)445 build_image_copy_shader(const struct vk_meta_image_copy_key *key)
446 {
447    nir_builder build =
448       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "hk-meta-copy");
449 
450    nir_builder *b = &build;
451    b->shader->info.workgroup_size[0] = TILE_WIDTH;
452    b->shader->info.workgroup_size[1] = TILE_HEIGHT;
453 
454    bool src_is_buf = key->type == BUF2IMG;
455    bool dst_is_buf = key->type == IMG2BUF;
456 
457    bool msaa = key->nr_samples > 1;
458    enum glsl_sampler_dim dim_2d =
459       msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
460    enum glsl_sampler_dim dim_src = src_is_buf ? GLSL_SAMPLER_DIM_BUF : dim_2d;
461    enum glsl_sampler_dim dim_dst = dst_is_buf ? GLSL_SAMPLER_DIM_BUF : dim_2d;
462 
463    const struct glsl_type *texture_type =
464       glsl_sampler_type(dim_src, false, !src_is_buf, GLSL_TYPE_UINT);
465 
466    const struct glsl_type *image_type =
467       glsl_image_type(dim_dst, !dst_is_buf, GLSL_TYPE_UINT);
468 
469    nir_variable *texture =
470       nir_variable_create(b->shader, nir_var_uniform, texture_type, "source");
471    nir_variable *image =
472       nir_variable_create(b->shader, nir_var_image, image_type, "dest");
473 
474    image->data.descriptor_set = 0;
475    image->data.binding = BINDING_OUTPUT;
476    image->data.access = ACCESS_NON_READABLE;
477 
478    texture->data.descriptor_set = 0;
479    texture->data.binding = BINDING_INPUT;
480 
481    /* Grab the offset vectors */
482    nir_def *src_offset_el = nir_load_push_constant(
483       b, 3, 32,
484       nir_imm_int(b, offsetof(struct vk_meta_push_data, src_offset_el)));
485 
486    nir_def *dst_offset_el = nir_load_push_constant(
487       b, 3, 32,
488       nir_imm_int(b, offsetof(struct vk_meta_push_data, dst_offset_el)));
489 
490    nir_def *grid_2d_el = nir_load_push_constant(
491       b, 2, 32, nir_imm_int(b, offsetof(struct vk_meta_push_data, grid_el)));
492 
493    /* We're done setting up variables, do the copy */
494    nir_def *coord = nir_load_global_invocation_id(b, 32);
495 
496    /* The destination format is already canonical, convert to an ISA format */
497    enum pipe_format isa_format = PIPE_FORMAT_NONE;
498    if (key->block_based) {
499       enum pipe_format pipe = canonical_format_pipe(key->dst_format, true);
500       isa_format = ail_pixel_format[pipe].renderable;
501       assert(isa_format != PIPE_FORMAT_NONE);
502    }
503 
504    nir_def *local_offset = nir_imm_intN_t(b, 0, 16);
505    nir_def *lid = nir_trim_vector(b, nir_load_local_invocation_id(b), 2);
506    lid = nir_u2u16(b, lid);
507 
508    nir_def *src_coord = src_is_buf ? coord : nir_iadd(b, coord, src_offset_el);
509    nir_def *dst_coord = dst_is_buf ? coord : nir_iadd(b, coord, dst_offset_el);
510 
511    nir_def *image_deref = &nir_build_deref_var(b, image)->def;
512 
513    nir_def *coord_2d_el = nir_trim_vector(b, coord, 2);
514    nir_def *in_bounds;
515    if (key->block_based) {
516       nir_def *offset_in_block_el =
517          nir_umod_imm(b, nir_trim_vector(b, dst_offset_el, 2), TILE_WIDTH);
518 
519       dst_coord =
520          nir_vector_insert_imm(b, nir_isub(b, dst_coord, offset_in_block_el),
521                                nir_channel(b, dst_coord, 2), 2);
522 
523       src_coord =
524          nir_vector_insert_imm(b, nir_isub(b, src_coord, offset_in_block_el),
525                                nir_channel(b, src_coord, 2), 2);
526 
527       in_bounds = nir_uge(b, coord_2d_el, offset_in_block_el);
528       in_bounds = nir_iand(
529          b, in_bounds,
530          nir_ult(b, coord_2d_el, nir_iadd(b, offset_in_block_el, grid_2d_el)));
531    } else {
532       in_bounds = nir_ult(b, coord_2d_el, grid_2d_el);
533    }
534 
535    /* Special case handle buffer indexing */
536    if (dst_is_buf) {
537       assert(!key->block_based);
538       dst_coord = linearize_coords(b, dst_coord, key);
539    } else if (src_is_buf) {
540       src_coord = linearize_coords(b, src_coord, key);
541    }
542 
543    for (unsigned s = 0; s < key->nr_samples; ++s) {
544       nir_def *ms_index = nir_imm_int(b, s);
545       nir_def *value1 = NULL, *value2 = NULL;
546 
547       nir_push_if(b, nir_ball(b, in_bounds));
548       {
549          /* Copy formatted texel from texture to storage image */
550          nir_deref_instr *deref = nir_build_deref_var(b, texture);
551 
552          if (src_is_buf) {
553             value1 = load_store_formatted(b, get_push(b, buffer), src_coord,
554                                           NULL, key->dst_format);
555          } else {
556             if (msaa) {
557                value1 = nir_txf_ms_deref(b, deref, src_coord, ms_index);
558             } else {
559                value1 = nir_txf_deref(b, deref, src_coord, NULL);
560             }
561 
562             /* Munge according to the implicit conversions so we get a bit copy */
563             if (key->src_format != key->dst_format) {
564                nir_def *packed =
565                   nir_format_pack_rgba(b, key->src_format, value1);
566 
567                value1 = nir_format_unpack_rgba(b, packed, key->dst_format);
568             }
569          }
570 
571          if (dst_is_buf) {
572             load_store_formatted(b, get_push(b, buffer), dst_coord, value1,
573                                  key->dst_format);
574          } else if (!key->block_based) {
575             nir_image_deref_store(b, image_deref, nir_pad_vec4(b, dst_coord),
576                                   ms_index, value1, nir_imm_int(b, 0),
577                                   .image_dim = dim_dst,
578                                   .image_array = !dst_is_buf);
579          }
580       }
581       nir_push_else(b, NULL);
582       if (key->block_based) {
583          /* Copy back the existing destination content */
584          value2 = nir_image_deref_load(b, 4, 32, image_deref,
585                                        nir_pad_vec4(b, dst_coord), ms_index,
586                                        nir_imm_int(b, 0), .image_dim = dim_dst,
587                                        .image_array = !dst_is_buf);
588       }
589       nir_pop_if(b, NULL);
590 
591       if (key->block_based) {
592          /* Must define the phi first so we validate. */
593          nir_def *phi = nir_if_phi(b, value1, value2);
594          nir_def *mask = nir_imm_intN_t(b, 1 << s, 16);
595 
596          nir_store_local_pixel_agx(b, phi, mask, lid, .base = 0,
597                                    .write_mask = 0xf, .format = isa_format,
598                                    .explicit_coord = true);
599       }
600    }
601 
602    if (key->block_based) {
603       assert(!dst_is_buf);
604 
605       nir_barrier(b, .execution_scope = SCOPE_WORKGROUP);
606 
607       nir_push_if(b, nir_ball(b, nir_ieq_imm(b, lid, 0)));
608       {
609          nir_image_deref_store_block_agx(
610             b, image_deref, local_offset, dst_coord, .format = isa_format,
611             .image_dim = dim_2d, .image_array = true, .explicit_coord = true);
612       }
613       nir_pop_if(b, NULL);
614       b->shader->info.cs.image_block_size_per_thread_agx =
615          util_format_get_blocksize(key->dst_format);
616    }
617 
618    return b->shader;
619 }
620 
621 static VkResult
get_image_copy_descriptor_set_layout(struct vk_device * device,struct vk_meta_device * meta,VkDescriptorSetLayout * layout_out,enum copy_type type)622 get_image_copy_descriptor_set_layout(struct vk_device *device,
623                                      struct vk_meta_device *meta,
624                                      VkDescriptorSetLayout *layout_out,
625                                      enum copy_type type)
626 {
627    const char *keys[] = {
628       [IMG2BUF] = "vk-meta-copy-image-to-buffer-descriptor-set-layout",
629       [BUF2IMG] = "vk-meta-copy-buffer-to-image-descriptor-set-layout",
630       [IMG2IMG] = "vk-meta-copy-image-to-image-descriptor-set-layout",
631    };
632 
633    VkDescriptorSetLayout from_cache = vk_meta_lookup_descriptor_set_layout(
634       meta, keys[type], strlen(keys[type]));
635    if (from_cache != VK_NULL_HANDLE) {
636       *layout_out = from_cache;
637       return VK_SUCCESS;
638    }
639 
640    const VkDescriptorSetLayoutBinding bindings[] = {
641       {
642          .binding = BINDING_OUTPUT,
643          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
644          .descriptorCount = 1,
645          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
646       },
647       {
648          .binding = BINDING_INPUT,
649          .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
650          .descriptorCount = 1,
651          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
652       },
653    };
654 
655    const VkDescriptorSetLayoutCreateInfo info = {
656       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
657       .bindingCount = ARRAY_SIZE(bindings),
658       .pBindings = bindings,
659    };
660 
661    return vk_meta_create_descriptor_set_layout(device, meta, &info, keys[type],
662                                                strlen(keys[type]), layout_out);
663 }
664 
665 static VkResult
get_image_copy_pipeline_layout(struct vk_device * device,struct vk_meta_device * meta,struct vk_meta_image_copy_key * key,VkDescriptorSetLayout set_layout,VkPipelineLayout * layout_out,enum copy_type type)666 get_image_copy_pipeline_layout(struct vk_device *device,
667                                struct vk_meta_device *meta,
668                                struct vk_meta_image_copy_key *key,
669                                VkDescriptorSetLayout set_layout,
670                                VkPipelineLayout *layout_out,
671                                enum copy_type type)
672 {
673    const char *keys[] = {
674       [IMG2BUF] = "vk-meta-copy-image-to-buffer-pipeline-layout",
675       [BUF2IMG] = "vk-meta-copy-buffer-to-image-pipeline-layout",
676       [IMG2IMG] = "vk-meta-copy-image-to-image-pipeline-layout",
677    };
678 
679    VkPipelineLayout from_cache =
680       vk_meta_lookup_pipeline_layout(meta, keys[type], strlen(keys[type]));
681    if (from_cache != VK_NULL_HANDLE) {
682       *layout_out = from_cache;
683       return VK_SUCCESS;
684    }
685 
686    VkPipelineLayoutCreateInfo info = {
687       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
688       .setLayoutCount = 1,
689       .pSetLayouts = &set_layout,
690    };
691 
692    const VkPushConstantRange push_range = {
693       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
694       .offset = 0,
695       .size = sizeof(struct vk_meta_push_data),
696    };
697 
698    info.pushConstantRangeCount = 1;
699    info.pPushConstantRanges = &push_range;
700 
701    return vk_meta_create_pipeline_layout(device, meta, &info, keys[type],
702                                          strlen(keys[type]), layout_out);
703 }
704 
705 static VkResult
get_image_copy_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_image_copy_key * key,VkPipelineLayout layout,VkPipeline * pipeline_out)706 get_image_copy_pipeline(struct vk_device *device, struct vk_meta_device *meta,
707                         const struct vk_meta_image_copy_key *key,
708                         VkPipelineLayout layout, VkPipeline *pipeline_out)
709 {
710    VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key, sizeof(*key));
711    if (from_cache != VK_NULL_HANDLE) {
712       *pipeline_out = from_cache;
713       return VK_SUCCESS;
714    }
715 
716    const VkPipelineShaderStageNirCreateInfoMESA nir_info = {
717       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
718       .nir = build_image_copy_shader(key),
719    };
720    const VkPipelineShaderStageCreateInfo cs_info = {
721       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
722       .pNext = &nir_info,
723       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
724       .pName = "main",
725    };
726 
727    const VkComputePipelineCreateInfo info = {
728       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
729       .stage = cs_info,
730       .layout = layout,
731    };
732 
733    VkResult result = vk_meta_create_compute_pipeline(
734       device, meta, &info, key, sizeof(*key), pipeline_out);
735    ralloc_free(nir_info.nir);
736 
737    return result;
738 }
739 
740 static void
hk_meta_copy_image_to_buffer2(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyImageToBufferInfo2 * pCopyBufferInfo)741 hk_meta_copy_image_to_buffer2(struct vk_command_buffer *cmd,
742                               struct vk_meta_device *meta,
743                               const VkCopyImageToBufferInfo2 *pCopyBufferInfo)
744 {
745    VK_FROM_HANDLE(vk_image, image, pCopyBufferInfo->srcImage);
746    VK_FROM_HANDLE(vk_image, src_image, pCopyBufferInfo->srcImage);
747    VK_FROM_HANDLE(hk_buffer, buffer, pCopyBufferInfo->dstBuffer);
748 
749    struct vk_device *device = cmd->base.device;
750    const struct vk_device_dispatch_table *disp = &device->dispatch_table;
751 
752    VkResult result;
753 
754    VkDescriptorSetLayout set_layout;
755    result =
756       get_image_copy_descriptor_set_layout(device, meta, &set_layout, IMG2BUF);
757    if (unlikely(result != VK_SUCCESS)) {
758       vk_command_buffer_set_error(cmd, result);
759       return;
760    }
761 
762    bool per_layer =
763       util_format_is_compressed(hk_format_to_pipe_format(image->format));
764 
765    for (unsigned i = 0; i < pCopyBufferInfo->regionCount; ++i) {
766       const VkBufferImageCopy2 *region = &pCopyBufferInfo->pRegions[i];
767 
768       unsigned layers = MAX2(region->imageExtent.depth,
769                              vk_image_subresource_layer_count(
770                                 src_image, &region->imageSubresource));
771       unsigned layer_iters = per_layer ? layers : 1;
772 
773       for (unsigned layer_offs = 0; layer_offs < layer_iters; ++layer_offs) {
774 
775          VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
776          VkFormat aspect_fmt = aspect_format(image->format, aspect);
777          VkFormat canonical = canonical_format(aspect_fmt);
778 
779          uint32_t blocksize_B =
780             util_format_get_blocksize(hk_format_to_pipe_format(canonical));
781 
782          enum pipe_format p_format = hk_format_to_pipe_format(image->format);
783 
784          unsigned row_extent = util_format_get_nblocksx(
785                                   p_format, MAX2(region->bufferRowLength,
786                                                  region->imageExtent.width)) *
787                                blocksize_B;
788          unsigned slice_extent =
789             util_format_get_nblocksy(
790                p_format,
791                MAX2(region->bufferImageHeight, region->imageExtent.height)) *
792             row_extent;
793          unsigned layer_extent =
794             util_format_get_nblocksz(p_format, region->imageExtent.depth) *
795             slice_extent;
796 
797          bool is_3d = region->imageExtent.depth > 1;
798 
799          struct vk_meta_image_copy_key key = {
800             .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
801             .type = IMG2BUF,
802             .block_size = blocksize_B,
803             .nr_samples = image->samples,
804             .src_format = hk_format_to_pipe_format(canonical),
805             .dst_format = hk_format_to_pipe_format(canonical),
806          };
807 
808          VkPipelineLayout pipeline_layout;
809          result = get_image_copy_pipeline_layout(device, meta, &key, set_layout,
810                                                  &pipeline_layout, false);
811          if (unlikely(result != VK_SUCCESS)) {
812             vk_command_buffer_set_error(cmd, result);
813             return;
814          }
815 
816          VkImageView src_view;
817          const VkImageViewUsageCreateInfo src_view_usage = {
818             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
819             .usage = VK_IMAGE_USAGE_SAMPLED_BIT,
820          };
821          const VkImageViewCreateInfo src_view_info = {
822             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
823             .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
824             .pNext = &src_view_usage,
825             .image = pCopyBufferInfo->srcImage,
826             .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
827             .format = canonical,
828             .subresourceRange =
829                {
830                   .aspectMask = region->imageSubresource.aspectMask,
831                   .baseMipLevel = region->imageSubresource.mipLevel,
832                   .baseArrayLayer =
833                      MAX2(region->imageOffset.z,
834                           region->imageSubresource.baseArrayLayer) +
835                      layer_offs,
836                   .layerCount = per_layer ? 1 : layers,
837                   .levelCount = 1,
838                },
839          };
840 
841          result =
842             vk_meta_create_image_view(cmd, meta, &src_view_info, &src_view);
843          if (unlikely(result != VK_SUCCESS)) {
844             vk_command_buffer_set_error(cmd, result);
845             return;
846          }
847 
848          VkDescriptorImageInfo src_info = {
849             .imageLayout = pCopyBufferInfo->srcImageLayout,
850             .imageView = src_view,
851          };
852 
853          VkWriteDescriptorSet desc_write = {
854             .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
855             .dstSet = 0,
856             .dstBinding = BINDING_INPUT,
857             .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
858             .descriptorCount = 1,
859             .pImageInfo = &src_info,
860          };
861 
862          disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
863                                        VK_PIPELINE_BIND_POINT_COMPUTE,
864                                        pipeline_layout, 0, 1, &desc_write);
865 
866          VkPipeline pipeline;
867          result = get_image_copy_pipeline(device, meta, &key, pipeline_layout,
868                                           &pipeline);
869          if (unlikely(result != VK_SUCCESS)) {
870             vk_command_buffer_set_error(cmd, result);
871             return;
872          }
873 
874          disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
875                                VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
876 
877          enum pipe_format p_src_fmt =
878             hk_format_to_pipe_format(src_image->format);
879 
880          struct vk_meta_push_data push = {
881             .buffer = hk_buffer_address(buffer, region->bufferOffset),
882             .row_extent = row_extent,
883             .slice_or_layer_extent = is_3d ? slice_extent : layer_extent,
884 
885             .src_offset_el[0] =
886                util_format_get_nblocksx(p_src_fmt, region->imageOffset.x),
887             .src_offset_el[1] =
888                util_format_get_nblocksy(p_src_fmt, region->imageOffset.y),
889 
890             .grid_el[0] =
891                util_format_get_nblocksx(p_format, region->imageExtent.width),
892             .grid_el[1] =
893                util_format_get_nblocksy(p_format, region->imageExtent.height),
894             .grid_el[2] = per_layer ? 1 : layers,
895          };
896 
897          push.buffer += push.slice_or_layer_extent * layer_offs;
898 
899          disp->CmdPushConstants(vk_command_buffer_to_handle(cmd),
900                                 pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
901                                 sizeof(push), &push);
902 
903          disp->CmdDispatch(vk_command_buffer_to_handle(cmd),
904                            DIV_ROUND_UP(push.grid_el[0], 32),
905                            DIV_ROUND_UP(push.grid_el[1], 32), push.grid_el[2]);
906       }
907    }
908 }
909 
910 static void
hk_meta_dispatch_to_image(struct vk_command_buffer * cmd,const struct vk_device_dispatch_table * disp,VkPipelineLayout pipeline_layout,struct vk_meta_push_data * push,VkOffset3D offset,VkExtent3D extent,bool per_layer,unsigned layers,enum pipe_format p_dst_fmt,enum pipe_format p_format)911 hk_meta_dispatch_to_image(struct vk_command_buffer *cmd,
912                           const struct vk_device_dispatch_table *disp,
913                           VkPipelineLayout pipeline_layout,
914                           struct vk_meta_push_data *push, VkOffset3D offset,
915                           VkExtent3D extent, bool per_layer, unsigned layers,
916                           enum pipe_format p_dst_fmt, enum pipe_format p_format)
917 {
918    push->dst_offset_el[0] = util_format_get_nblocksx(p_dst_fmt, offset.x);
919    push->dst_offset_el[1] = util_format_get_nblocksy(p_dst_fmt, offset.y);
920    push->dst_offset_el[2] = 0;
921 
922    push->grid_el[0] = util_format_get_nblocksx(p_format, extent.width);
923    push->grid_el[1] = util_format_get_nblocksy(p_format, extent.height);
924    push->grid_el[2] = per_layer ? 1 : layers;
925 
926    unsigned w_el = util_format_get_nblocksx(p_format, extent.width);
927    unsigned h_el = util_format_get_nblocksy(p_format, extent.height);
928 
929    /* Expand the grid so destinations are in tiles */
930    unsigned expanded_x0 = push->dst_offset_el[0] & ~(TILE_WIDTH - 1);
931    unsigned expanded_y0 = push->dst_offset_el[1] & ~(TILE_HEIGHT - 1);
932    unsigned expanded_x1 = align(push->dst_offset_el[0] + w_el, TILE_WIDTH);
933    unsigned expanded_y1 = align(push->dst_offset_el[1] + h_el, TILE_HEIGHT);
934 
935    /* TODO: clamp to the destination size to save some redundant threads? */
936 
937    disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
938                           VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(*push), push);
939 
940    disp->CmdDispatch(vk_command_buffer_to_handle(cmd),
941                      (expanded_x1 - expanded_x0) / TILE_WIDTH,
942                      (expanded_y1 - expanded_y0) / TILE_HEIGHT,
943                      push->grid_el[2]);
944 }
945 
946 static void
hk_meta_copy_buffer_to_image2(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct VkCopyBufferToImageInfo2 * info)947 hk_meta_copy_buffer_to_image2(struct vk_command_buffer *cmd,
948                               struct vk_meta_device *meta,
949                               const struct VkCopyBufferToImageInfo2 *info)
950 {
951    VK_FROM_HANDLE(vk_image, image, info->dstImage);
952    VK_FROM_HANDLE(hk_buffer, buffer, info->srcBuffer);
953 
954    struct vk_device *device = cmd->base.device;
955    const struct vk_device_dispatch_table *disp = &device->dispatch_table;
956 
957    VkDescriptorSetLayout set_layout;
958    VkResult result =
959       get_image_copy_descriptor_set_layout(device, meta, &set_layout, BUF2IMG);
960    if (unlikely(result != VK_SUCCESS)) {
961       vk_command_buffer_set_error(cmd, result);
962       return;
963    }
964 
965    bool per_layer =
966       util_format_is_compressed(hk_format_to_pipe_format(image->format));
967 
968    for (unsigned r = 0; r < info->regionCount; ++r) {
969       const VkBufferImageCopy2 *region = &info->pRegions[r];
970 
971       unsigned layers = MAX2(
972          region->imageExtent.depth,
973          vk_image_subresource_layer_count(image, &region->imageSubresource));
974       unsigned layer_iters = per_layer ? layers : 1;
975 
976       for (unsigned layer_offs = 0; layer_offs < layer_iters; ++layer_offs) {
977          VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
978          VkFormat aspect_fmt = aspect_format(image->format, aspect);
979          VkFormat canonical = canonical_format(aspect_fmt);
980          enum pipe_format p_format = hk_format_to_pipe_format(aspect_fmt);
981          uint32_t blocksize_B = util_format_get_blocksize(p_format);
982          bool is_3d = region->imageExtent.depth > 1;
983 
984          struct vk_meta_image_copy_key key = {
985             .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
986             .type = BUF2IMG,
987             .block_size = blocksize_B,
988             .nr_samples = image->samples,
989             .src_format = hk_format_to_pipe_format(canonical),
990             .dst_format = canonical_format_pipe(
991                hk_format_to_pipe_format(aspect_format(image->format, aspect)),
992                false),
993 
994             /* TODO: MSAA path */
995             .block_based =
996                (image->image_type != VK_IMAGE_TYPE_1D) && image->samples == 1,
997          };
998 
999          VkPipelineLayout pipeline_layout;
1000          result = get_image_copy_pipeline_layout(device, meta, &key, set_layout,
1001                                                  &pipeline_layout, true);
1002          if (unlikely(result != VK_SUCCESS)) {
1003             vk_command_buffer_set_error(cmd, result);
1004             return;
1005          }
1006 
1007          unsigned row_extent = util_format_get_nblocksx(
1008                                   p_format, MAX2(region->bufferRowLength,
1009                                                  region->imageExtent.width)) *
1010                                blocksize_B;
1011          unsigned slice_extent =
1012             util_format_get_nblocksy(
1013                p_format,
1014                MAX2(region->bufferImageHeight, region->imageExtent.height)) *
1015             row_extent;
1016          unsigned layer_extent =
1017             util_format_get_nblocksz(p_format, region->imageExtent.depth) *
1018             slice_extent;
1019 
1020          VkImageView dst_view;
1021          const VkImageViewUsageCreateInfo dst_view_usage = {
1022             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
1023             .usage = VK_IMAGE_USAGE_STORAGE_BIT,
1024          };
1025          const VkImageViewCreateInfo dst_view_info = {
1026             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1027             .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
1028             .pNext = &dst_view_usage,
1029             .image = info->dstImage,
1030             .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
1031             .format = canonical,
1032             .subresourceRange =
1033                {
1034                   .aspectMask = region->imageSubresource.aspectMask,
1035                   .baseMipLevel = region->imageSubresource.mipLevel,
1036                   .baseArrayLayer =
1037                      MAX2(region->imageOffset.z,
1038                           region->imageSubresource.baseArrayLayer) +
1039                      layer_offs,
1040                   .layerCount = per_layer ? 1 : layers,
1041                   .levelCount = 1,
1042                },
1043          };
1044 
1045          result =
1046             vk_meta_create_image_view(cmd, meta, &dst_view_info, &dst_view);
1047          if (unlikely(result != VK_SUCCESS)) {
1048             vk_command_buffer_set_error(cmd, result);
1049             return;
1050          }
1051 
1052          const VkDescriptorImageInfo dst_info = {
1053             .imageView = dst_view,
1054             .imageLayout = info->dstImageLayout,
1055          };
1056 
1057          VkWriteDescriptorSet desc_write = {
1058             .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1059             .dstSet = 0,
1060             .dstBinding = BINDING_OUTPUT,
1061             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1062             .descriptorCount = 1,
1063             .pImageInfo = &dst_info,
1064          };
1065 
1066          disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1067                                        VK_PIPELINE_BIND_POINT_COMPUTE,
1068                                        pipeline_layout, 0, 1, &desc_write);
1069 
1070          VkPipeline pipeline;
1071          result = get_image_copy_pipeline(device, meta, &key, pipeline_layout,
1072                                           &pipeline);
1073          if (unlikely(result != VK_SUCCESS)) {
1074             vk_command_buffer_set_error(cmd, result);
1075             return;
1076          }
1077 
1078          disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1079                                VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1080 
1081          struct vk_meta_push_data push = {
1082             .buffer = hk_buffer_address(buffer, region->bufferOffset),
1083             .row_extent = row_extent,
1084             .slice_or_layer_extent = is_3d ? slice_extent : layer_extent,
1085          };
1086 
1087          push.buffer += push.slice_or_layer_extent * layer_offs;
1088 
1089          hk_meta_dispatch_to_image(cmd, disp, pipeline_layout, &push,
1090                                    region->imageOffset, region->imageExtent,
1091                                    per_layer, layers, p_format, p_format);
1092       }
1093    }
1094 }
1095 
1096 static void
hk_meta_copy_image2(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct VkCopyImageInfo2 * info)1097 hk_meta_copy_image2(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1098                     const struct VkCopyImageInfo2 *info)
1099 {
1100    VK_FROM_HANDLE(vk_image, src_image, info->srcImage);
1101    VK_FROM_HANDLE(vk_image, dst_image, info->dstImage);
1102 
1103    struct vk_device *device = cmd->base.device;
1104    const struct vk_device_dispatch_table *disp = &device->dispatch_table;
1105 
1106    VkDescriptorSetLayout set_layout;
1107    VkResult result =
1108       get_image_copy_descriptor_set_layout(device, meta, &set_layout, BUF2IMG);
1109    if (unlikely(result != VK_SUCCESS)) {
1110       vk_command_buffer_set_error(cmd, result);
1111       return;
1112    }
1113 
1114    bool per_layer =
1115       util_format_is_compressed(hk_format_to_pipe_format(src_image->format)) ||
1116       util_format_is_compressed(hk_format_to_pipe_format(dst_image->format));
1117 
1118    for (unsigned r = 0; r < info->regionCount; ++r) {
1119       const VkImageCopy2 *region = &info->pRegions[r];
1120 
1121       unsigned layers = MAX2(
1122          vk_image_subresource_layer_count(src_image, &region->srcSubresource),
1123          region->extent.depth);
1124       unsigned layer_iters = per_layer ? layers : 1;
1125 
1126       for (unsigned layer_offs = 0; layer_offs < layer_iters; ++layer_offs) {
1127          u_foreach_bit(aspect, region->srcSubresource.aspectMask) {
1128             /* We use the source format throughout for consistent scaling with
1129              * compressed<-->uncompressed copies, where the extents are defined
1130              * to follow the source.
1131              */
1132             VkFormat aspect_fmt = aspect_format(src_image->format, 1 << aspect);
1133             VkFormat canonical = canonical_format(aspect_fmt);
1134             uint32_t blocksize_B =
1135                util_format_get_blocksize(hk_format_to_pipe_format(canonical));
1136 
1137             VkImageAspectFlagBits dst_aspect_mask =
1138                vk_format_get_ycbcr_info(dst_image->format) ||
1139                      vk_format_get_ycbcr_info(src_image->format)
1140                   ? region->dstSubresource.aspectMask
1141                   : (1 << aspect);
1142 
1143             struct vk_meta_image_copy_key key = {
1144                .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
1145                .type = IMG2IMG,
1146                .block_size = blocksize_B,
1147                .nr_samples = dst_image->samples,
1148                .src_format = hk_format_to_pipe_format(canonical),
1149                .dst_format =
1150                   canonical_format_pipe(hk_format_to_pipe_format(aspect_format(
1151                                            dst_image->format, dst_aspect_mask)),
1152                                         false),
1153 
1154                /* TODO: MSAA path */
1155                .block_based = (dst_image->image_type != VK_IMAGE_TYPE_1D) &&
1156                               dst_image->samples == 1,
1157             };
1158 
1159             assert(key.nr_samples == src_image->samples);
1160 
1161             VkPipelineLayout pipeline_layout;
1162             result = get_image_copy_pipeline_layout(
1163                device, meta, &key, set_layout, &pipeline_layout, true);
1164             if (unlikely(result != VK_SUCCESS)) {
1165                vk_command_buffer_set_error(cmd, result);
1166                return;
1167             }
1168 
1169             VkWriteDescriptorSet desc_writes[2];
1170 
1171             VkImageView src_view;
1172             const VkImageViewUsageCreateInfo src_view_usage = {
1173                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
1174                .usage = VK_IMAGE_USAGE_SAMPLED_BIT,
1175             };
1176             const VkImageViewCreateInfo src_view_info = {
1177                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1178                .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
1179                .pNext = &src_view_usage,
1180                .image = info->srcImage,
1181                .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
1182                .format = canonical,
1183                .subresourceRange =
1184                   {
1185                      .aspectMask =
1186                         region->srcSubresource.aspectMask & (1 << aspect),
1187                      .baseMipLevel = region->srcSubresource.mipLevel,
1188                      .baseArrayLayer =
1189                         MAX2(region->srcOffset.z,
1190                              region->srcSubresource.baseArrayLayer) +
1191                         layer_offs,
1192                      .layerCount = per_layer ? 1 : layers,
1193                      .levelCount = 1,
1194                   },
1195             };
1196 
1197             result =
1198                vk_meta_create_image_view(cmd, meta, &src_view_info, &src_view);
1199             if (unlikely(result != VK_SUCCESS)) {
1200                vk_command_buffer_set_error(cmd, result);
1201                return;
1202             }
1203 
1204             VkDescriptorImageInfo src_info = {
1205                .imageLayout = info->srcImageLayout,
1206                .imageView = src_view,
1207             };
1208 
1209             VkImageView dst_view;
1210             const VkImageViewUsageCreateInfo dst_view_usage = {
1211                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
1212                .usage = VK_IMAGE_USAGE_STORAGE_BIT,
1213             };
1214             const VkImageViewCreateInfo dst_view_info = {
1215                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1216                .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
1217                .pNext = &dst_view_usage,
1218                .image = info->dstImage,
1219                .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
1220                .format = vk_format_from_pipe_format(key.dst_format),
1221                .subresourceRange =
1222                   {
1223                      .aspectMask = dst_aspect_mask,
1224                      .baseMipLevel = region->dstSubresource.mipLevel,
1225                      .baseArrayLayer =
1226                         MAX2(region->dstOffset.z,
1227                              region->dstSubresource.baseArrayLayer) +
1228                         layer_offs,
1229                      .layerCount = per_layer ? 1 : layers,
1230                      .levelCount = 1,
1231                   },
1232             };
1233 
1234             result =
1235                vk_meta_create_image_view(cmd, meta, &dst_view_info, &dst_view);
1236             if (unlikely(result != VK_SUCCESS)) {
1237                vk_command_buffer_set_error(cmd, result);
1238                return;
1239             }
1240 
1241             const VkDescriptorImageInfo dst_info = {
1242                .imageView = dst_view,
1243                .imageLayout = info->dstImageLayout,
1244             };
1245 
1246             desc_writes[0] = (VkWriteDescriptorSet){
1247                .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1248                .dstSet = 0,
1249                .dstBinding = BINDING_OUTPUT,
1250                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1251                .descriptorCount = 1,
1252                .pImageInfo = &dst_info,
1253             };
1254 
1255             desc_writes[1] = (VkWriteDescriptorSet){
1256                .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1257                .dstSet = 0,
1258                .dstBinding = BINDING_INPUT,
1259                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1260                .descriptorCount = 1,
1261                .pImageInfo = &src_info,
1262             };
1263 
1264             disp->CmdPushDescriptorSetKHR(
1265                vk_command_buffer_to_handle(cmd), VK_PIPELINE_BIND_POINT_COMPUTE,
1266                pipeline_layout, 0, ARRAY_SIZE(desc_writes), desc_writes);
1267 
1268             VkPipeline pipeline;
1269             result = get_image_copy_pipeline(device, meta, &key,
1270                                              pipeline_layout, &pipeline);
1271             if (unlikely(result != VK_SUCCESS)) {
1272                vk_command_buffer_set_error(cmd, result);
1273                return;
1274             }
1275 
1276             disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1277                                   VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1278 
1279             enum pipe_format p_src_fmt =
1280                hk_format_to_pipe_format(src_image->format);
1281             enum pipe_format p_dst_fmt =
1282                hk_format_to_pipe_format(dst_image->format);
1283             enum pipe_format p_format = hk_format_to_pipe_format(aspect_fmt);
1284 
1285             struct vk_meta_push_data push = {
1286                .src_offset_el[0] =
1287                   util_format_get_nblocksx(p_src_fmt, region->srcOffset.x),
1288                .src_offset_el[1] =
1289                   util_format_get_nblocksy(p_src_fmt, region->srcOffset.y),
1290             };
1291 
1292             hk_meta_dispatch_to_image(cmd, disp, pipeline_layout, &push,
1293                                       region->dstOffset, region->extent,
1294                                       per_layer, layers, p_dst_fmt, p_format);
1295          }
1296       }
1297    }
1298 }
1299 
1300 static inline struct vk_meta_copy_image_properties
hk_meta_copy_get_image_properties(struct hk_image * img)1301 hk_meta_copy_get_image_properties(struct hk_image *img)
1302 {
1303    struct vk_meta_copy_image_properties props = {.tile_size = {16, 16, 1}};
1304 
1305    if (!vk_format_is_depth_or_stencil(img->vk.format)) {
1306       props.color.view_format = img->vk.format;
1307    } else {
1308       switch (img->vk.format) {
1309       case VK_FORMAT_S8_UINT:
1310          props.stencil.view_format = VK_FORMAT_R8_UINT;
1311          props.stencil.component_mask = BITFIELD_MASK(1);
1312          break;
1313       case VK_FORMAT_D32_SFLOAT_S8_UINT:
1314          props.depth.view_format = VK_FORMAT_R32G32_UINT;
1315          props.depth.component_mask = BITFIELD_BIT(0);
1316          props.stencil.view_format = VK_FORMAT_R32G32_UINT;
1317          props.stencil.component_mask = BITFIELD_BIT(1);
1318          break;
1319       case VK_FORMAT_D16_UNORM:
1320          props.depth.view_format = VK_FORMAT_R16_UINT;
1321          props.depth.component_mask = BITFIELD_BIT(0);
1322          break;
1323       case VK_FORMAT_D32_SFLOAT:
1324          props.depth.view_format = VK_FORMAT_R32_UINT;
1325          props.depth.component_mask = BITFIELD_BIT(0);
1326          break;
1327       default:
1328          unreachable("Invalid ZS format");
1329       }
1330    }
1331 
1332    return props;
1333 }
1334 
1335 VKAPI_ATTR void VKAPI_CALL
hk_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)1336 hk_CmdBlitImage2(VkCommandBuffer commandBuffer,
1337                  const VkBlitImageInfo2 *pBlitImageInfo)
1338 {
1339    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1340    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1341    perf_debug(dev, "Blit image");
1342 
1343    struct hk_meta_save save;
1344    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1345    vk_meta_blit_image2(&cmd->vk, &dev->meta, pBlitImageInfo);
1346    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1347 }
1348 
1349 VKAPI_ATTR void VKAPI_CALL
hk_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)1350 hk_CmdResolveImage2(VkCommandBuffer commandBuffer,
1351                     const VkResolveImageInfo2 *pResolveImageInfo)
1352 {
1353    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1354    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1355    perf_debug(dev, "Resolve");
1356 
1357    struct hk_meta_save save;
1358    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1359    vk_meta_resolve_image2(&cmd->vk, &dev->meta, pResolveImageInfo);
1360    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1361 }
1362 
1363 void
hk_meta_resolve_rendering(struct hk_cmd_buffer * cmd,const VkRenderingInfo * pRenderingInfo)1364 hk_meta_resolve_rendering(struct hk_cmd_buffer *cmd,
1365                           const VkRenderingInfo *pRenderingInfo)
1366 {
1367    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1368 
1369    struct hk_meta_save save;
1370    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1371    vk_meta_resolve_rendering(&cmd->vk, &dev->meta, pRenderingInfo);
1372    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1373 }
1374 
1375 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1376 hk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
1377                   const VkCopyBufferInfo2 *pCopyBufferInfo)
1378 {
1379    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1380    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1381 
1382    struct hk_meta_save save;
1383    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1384    vk_meta_copy_buffer(&cmd->vk, &dev->meta, pCopyBufferInfo);
1385    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1386 }
1387 
1388 static bool
hk_copy_requires_gfx(struct hk_image * img)1389 hk_copy_requires_gfx(struct hk_image *img)
1390 {
1391    return img->vk.samples > 1 && ail_is_compressed(&img->planes[0].layout);
1392 }
1393 
1394 static bool
hk_bind_point(bool gfx)1395 hk_bind_point(bool gfx)
1396 {
1397    return gfx ? VK_PIPELINE_BIND_POINT_GRAPHICS
1398               : VK_PIPELINE_BIND_POINT_COMPUTE;
1399 }
1400 
1401 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)1402 hk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
1403                          const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
1404 {
1405    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1406    VK_FROM_HANDLE(hk_image, dst_image, pCopyBufferToImageInfo->dstImage);
1407    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1408 
1409    bool gfx = hk_copy_requires_gfx(dst_image);
1410    VkPipelineBindPoint bind_point = hk_bind_point(gfx);
1411 
1412    struct hk_meta_save save;
1413    hk_meta_begin(cmd, &save, bind_point);
1414 
1415    if (gfx) {
1416       struct vk_meta_copy_image_properties dst_props =
1417          hk_meta_copy_get_image_properties(dst_image);
1418 
1419       vk_meta_copy_buffer_to_image(&cmd->vk, &dev->meta, pCopyBufferToImageInfo,
1420                                    &dst_props, bind_point);
1421    } else {
1422       hk_meta_copy_buffer_to_image2(&cmd->vk, &dev->meta,
1423                                     pCopyBufferToImageInfo);
1424    }
1425 
1426    hk_meta_end(cmd, &save, bind_point);
1427 }
1428 
1429 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)1430 hk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
1431                          const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1432 {
1433    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1434    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1435 
1436    struct hk_meta_save save;
1437    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1438    hk_meta_copy_image_to_buffer2(&cmd->vk, &dev->meta, pCopyImageToBufferInfo);
1439    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1440 }
1441 
1442 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)1443 hk_CmdCopyImage2(VkCommandBuffer commandBuffer,
1444                  const VkCopyImageInfo2 *pCopyImageInfo)
1445 {
1446    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1447    VK_FROM_HANDLE(hk_image, src_image, pCopyImageInfo->srcImage);
1448    VK_FROM_HANDLE(hk_image, dst_image, pCopyImageInfo->dstImage);
1449    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1450    bool gfx = hk_copy_requires_gfx(dst_image);
1451    VkPipelineBindPoint bind_point = hk_bind_point(gfx);
1452 
1453    struct hk_meta_save save;
1454    hk_meta_begin(cmd, &save, bind_point);
1455 
1456    if (gfx) {
1457       struct vk_meta_copy_image_properties src_props =
1458          hk_meta_copy_get_image_properties(src_image);
1459       struct vk_meta_copy_image_properties dst_props =
1460          hk_meta_copy_get_image_properties(dst_image);
1461 
1462       vk_meta_copy_image(&cmd->vk, &dev->meta, pCopyImageInfo, &src_props,
1463                          &dst_props, bind_point);
1464    } else {
1465       hk_meta_copy_image2(&cmd->vk, &dev->meta, pCopyImageInfo);
1466    }
1467 
1468    hk_meta_end(cmd, &save, bind_point);
1469 }
1470 
1471 VKAPI_ATTR void VKAPI_CALL
hk_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dstRange,uint32_t data)1472 hk_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer,
1473                  VkDeviceSize dstOffset, VkDeviceSize dstRange, uint32_t data)
1474 {
1475    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1476    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1477 
1478    struct hk_meta_save save;
1479    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1480    vk_meta_fill_buffer(&cmd->vk, &dev->meta, dstBuffer, dstOffset, dstRange,
1481                        data);
1482    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1483 }
1484 
1485 VKAPI_ATTR void VKAPI_CALL
hk_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dstRange,const void * pData)1486 hk_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer,
1487                    VkDeviceSize dstOffset, VkDeviceSize dstRange,
1488                    const void *pData)
1489 {
1490    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1491    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1492 
1493    struct hk_meta_save save;
1494    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1495    vk_meta_update_buffer(&cmd->vk, &dev->meta, dstBuffer, dstOffset, dstRange,
1496                          pData);
1497    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1498 }
1499 
1500 VKAPI_ATTR void VKAPI_CALL
hk_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1501 hk_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
1502                        const VkClearAttachment *pAttachments,
1503                        uint32_t rectCount, const VkClearRect *pRects)
1504 {
1505    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1506    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1507 
1508    struct vk_meta_rendering_info render_info;
1509    hk_meta_init_render(cmd, &render_info);
1510 
1511    struct hk_meta_save save;
1512    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1513    vk_meta_clear_attachments(&cmd->vk, &dev->meta, &render_info,
1514                              attachmentCount, pAttachments, rectCount, pRects);
1515    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1516 }
1517