1 /*
2 * Copyright 2024 Valve Corporation
3 * Copyright 2024 Alyssa Rosenzweig
4 * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5 * SPDX-License-Identifier: MIT
6 */
7 #include "util/format/u_format.h"
8 #include "util/format/u_formats.h"
9 #include "util/u_math.h"
10 #include "vulkan/vulkan_core.h"
11 #include "agx_pack.h"
12 #include "hk_buffer.h"
13 #include "hk_cmd_buffer.h"
14 #include "hk_device.h"
15 #include "hk_entrypoints.h"
16 #include "hk_image.h"
17 #include "hk_physical_device.h"
18
19 #include "layout.h"
20 #include "nir_builder.h"
21 #include "nir_builder_opcodes.h"
22 #include "nir_format_convert.h"
23 #include "shader_enums.h"
24 #include "vk_enum_to_str.h"
25 #include "vk_format.h"
26 #include "vk_meta.h"
27 #include "vk_pipeline.h"
28
29 /* For block based blit kernels, we hardcode the maximum tile size which we can
30 * always achieve. This simplifies our life.
31 */
32 #define TILE_WIDTH 32
33 #define TILE_HEIGHT 32
34
35 static VkResult
hk_cmd_bind_map_buffer(struct vk_command_buffer * vk_cmd,struct vk_meta_device * meta,VkBuffer _buffer,void ** map_out)36 hk_cmd_bind_map_buffer(struct vk_command_buffer *vk_cmd,
37 struct vk_meta_device *meta, VkBuffer _buffer,
38 void **map_out)
39 {
40 struct hk_cmd_buffer *cmd = container_of(vk_cmd, struct hk_cmd_buffer, vk);
41 VK_FROM_HANDLE(hk_buffer, buffer, _buffer);
42
43 assert(buffer->vk.size < UINT_MAX);
44 struct agx_ptr T = hk_pool_alloc(cmd, buffer->vk.size, 16);
45 if (unlikely(T.cpu == NULL))
46 return VK_ERROR_OUT_OF_POOL_MEMORY;
47
48 buffer->addr = T.gpu;
49 *map_out = T.cpu;
50 return VK_SUCCESS;
51 }
52
53 VkResult
hk_device_init_meta(struct hk_device * dev)54 hk_device_init_meta(struct hk_device *dev)
55 {
56 VkResult result = vk_meta_device_init(&dev->vk, &dev->meta);
57 if (result != VK_SUCCESS)
58 return result;
59
60 dev->meta.use_gs_for_layer = false;
61 dev->meta.use_stencil_export = true;
62 dev->meta.use_rect_list_pipeline = true;
63 dev->meta.cmd_bind_map_buffer = hk_cmd_bind_map_buffer;
64 dev->meta.max_bind_map_buffer_size_B = 64 * 1024;
65
66 for (unsigned i = 0; i < VK_META_BUFFER_CHUNK_SIZE_COUNT; ++i) {
67 dev->meta.buffer_access.optimal_wg_size[i] = 64;
68 }
69
70 return VK_SUCCESS;
71 }
72
73 void
hk_device_finish_meta(struct hk_device * dev)74 hk_device_finish_meta(struct hk_device *dev)
75 {
76 vk_meta_device_finish(&dev->vk, &dev->meta);
77 }
78
79 struct hk_meta_save {
80 struct vk_vertex_input_state _dynamic_vi;
81 struct vk_sample_locations_state _dynamic_sl;
82 struct vk_dynamic_graphics_state dynamic;
83 struct hk_api_shader *shaders[MESA_SHADER_MESH + 1];
84 struct hk_addr_range vb0;
85 struct hk_descriptor_set *desc0;
86 bool has_push_desc0;
87 enum agx_visibility_mode occlusion;
88 struct hk_push_descriptor_set push_desc0;
89 VkQueryPipelineStatisticFlags pipeline_stats_flags;
90 uint8_t push[HK_MAX_PUSH_SIZE];
91 };
92
93 static void
hk_meta_begin(struct hk_cmd_buffer * cmd,struct hk_meta_save * save,VkPipelineBindPoint bind_point)94 hk_meta_begin(struct hk_cmd_buffer *cmd, struct hk_meta_save *save,
95 VkPipelineBindPoint bind_point)
96 {
97 struct hk_descriptor_state *desc = hk_get_descriptors_state(cmd, bind_point);
98
99 if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
100 save->dynamic = cmd->vk.dynamic_graphics_state;
101 save->_dynamic_vi = cmd->state.gfx._dynamic_vi;
102 save->_dynamic_sl = cmd->state.gfx._dynamic_sl;
103
104 static_assert(sizeof(cmd->state.gfx.shaders) == sizeof(save->shaders));
105 memcpy(save->shaders, cmd->state.gfx.shaders, sizeof(save->shaders));
106
107 /* Pause queries */
108 save->occlusion = cmd->state.gfx.occlusion.mode;
109 cmd->state.gfx.occlusion.mode = AGX_VISIBILITY_MODE_NONE;
110 cmd->state.gfx.dirty |= HK_DIRTY_OCCLUSION;
111
112 save->pipeline_stats_flags = desc->root.draw.pipeline_stats_flags;
113 desc->root.draw.pipeline_stats_flags = 0;
114 desc->root_dirty = true;
115 } else {
116 save->shaders[MESA_SHADER_COMPUTE] = cmd->state.cs.shader;
117 }
118
119 save->vb0 = cmd->state.gfx.vb[0];
120
121 save->desc0 = desc->sets[0];
122 save->has_push_desc0 = desc->push[0];
123 if (save->has_push_desc0)
124 save->push_desc0 = *desc->push[0];
125
126 static_assert(sizeof(save->push) == sizeof(desc->root.push));
127 memcpy(save->push, desc->root.push, sizeof(save->push));
128
129 cmd->in_meta = true;
130 }
131
132 static void
hk_meta_init_render(struct hk_cmd_buffer * cmd,struct vk_meta_rendering_info * info)133 hk_meta_init_render(struct hk_cmd_buffer *cmd,
134 struct vk_meta_rendering_info *info)
135 {
136 const struct hk_rendering_state *render = &cmd->state.gfx.render;
137
138 *info = (struct vk_meta_rendering_info){
139 .samples = MAX2(render->tilebuffer.nr_samples, 1),
140 .view_mask = render->view_mask,
141 .color_attachment_count = render->color_att_count,
142 .depth_attachment_format = render->depth_att.vk_format,
143 .stencil_attachment_format = render->stencil_att.vk_format,
144 };
145 for (uint32_t a = 0; a < render->color_att_count; a++) {
146 info->color_attachment_formats[a] = render->color_att[a].vk_format;
147 info->color_attachment_write_masks[a] =
148 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
149 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
150 }
151 }
152
153 static void
hk_meta_end(struct hk_cmd_buffer * cmd,struct hk_meta_save * save,VkPipelineBindPoint bind_point)154 hk_meta_end(struct hk_cmd_buffer *cmd, struct hk_meta_save *save,
155 VkPipelineBindPoint bind_point)
156 {
157 struct hk_descriptor_state *desc = hk_get_descriptors_state(cmd, bind_point);
158 desc->root_dirty = true;
159
160 if (save->desc0) {
161 desc->sets[0] = save->desc0;
162 desc->root.sets[0] = hk_descriptor_set_addr(save->desc0);
163 desc->sets_dirty |= BITFIELD_BIT(0);
164 desc->push_dirty &= ~BITFIELD_BIT(0);
165 } else if (save->has_push_desc0) {
166 *desc->push[0] = save->push_desc0;
167 desc->push_dirty |= BITFIELD_BIT(0);
168 }
169
170 if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
171 /* Restore the dynamic state */
172 assert(save->dynamic.vi == &cmd->state.gfx._dynamic_vi);
173 assert(save->dynamic.ms.sample_locations == &cmd->state.gfx._dynamic_sl);
174 cmd->vk.dynamic_graphics_state = save->dynamic;
175 cmd->state.gfx._dynamic_vi = save->_dynamic_vi;
176 cmd->state.gfx._dynamic_sl = save->_dynamic_sl;
177 memcpy(cmd->vk.dynamic_graphics_state.dirty,
178 cmd->vk.dynamic_graphics_state.set,
179 sizeof(cmd->vk.dynamic_graphics_state.set));
180
181 for (uint32_t stage = 0; stage < ARRAY_SIZE(save->shaders); stage++) {
182 hk_cmd_bind_graphics_shader(cmd, stage, save->shaders[stage]);
183 }
184
185 hk_cmd_bind_vertex_buffer(cmd, 0, save->vb0);
186
187 /* Restore queries */
188 cmd->state.gfx.occlusion.mode = save->occlusion;
189 cmd->state.gfx.dirty |= HK_DIRTY_OCCLUSION;
190
191 desc->root.draw.pipeline_stats_flags = save->pipeline_stats_flags;
192 desc->root_dirty = true;
193 } else {
194 hk_cmd_bind_compute_shader(cmd, save->shaders[MESA_SHADER_COMPUTE]);
195 }
196
197 memcpy(desc->root.push, save->push, sizeof(save->push));
198 cmd->in_meta = false;
199 }
200
201 #define BINDING_OUTPUT 0
202 #define BINDING_INPUT 1
203
204 static VkFormat
aspect_format(VkFormat fmt,VkImageAspectFlags aspect)205 aspect_format(VkFormat fmt, VkImageAspectFlags aspect)
206 {
207 bool depth = (aspect & VK_IMAGE_ASPECT_DEPTH_BIT);
208 bool stencil = (aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
209
210 enum pipe_format p_format = hk_format_to_pipe_format(fmt);
211
212 if (util_format_is_depth_or_stencil(p_format)) {
213 assert(depth ^ stencil);
214 if (depth) {
215 switch (fmt) {
216 case VK_FORMAT_D32_SFLOAT:
217 case VK_FORMAT_D32_SFLOAT_S8_UINT:
218 return VK_FORMAT_D32_SFLOAT;
219 case VK_FORMAT_D16_UNORM:
220 case VK_FORMAT_D16_UNORM_S8_UINT:
221 return VK_FORMAT_D16_UNORM;
222 default:
223 unreachable("invalid depth");
224 }
225 } else {
226 switch (fmt) {
227 case VK_FORMAT_S8_UINT:
228 case VK_FORMAT_D32_SFLOAT_S8_UINT:
229 case VK_FORMAT_D16_UNORM_S8_UINT:
230 return VK_FORMAT_S8_UINT;
231 default:
232 unreachable("invalid stencil");
233 }
234 }
235 }
236
237 assert(!depth && !stencil);
238
239 const struct vk_format_ycbcr_info *ycbcr_info =
240 vk_format_get_ycbcr_info(fmt);
241
242 if (ycbcr_info) {
243 switch (aspect) {
244 case VK_IMAGE_ASPECT_PLANE_0_BIT:
245 return ycbcr_info->planes[0].format;
246 case VK_IMAGE_ASPECT_PLANE_1_BIT:
247 return ycbcr_info->planes[1].format;
248 case VK_IMAGE_ASPECT_PLANE_2_BIT:
249 return ycbcr_info->planes[2].format;
250 default:
251 unreachable("invalid ycbcr aspect");
252 }
253 }
254
255 return fmt;
256 }
257
258 /*
259 * Canonicalize formats to simplify the copies. The returned format must in the
260 * same compression class, and should roundtrip lossless (minifloat formats are
261 * the unfortunate exception).
262 */
263 static enum pipe_format
canonical_format_pipe(enum pipe_format fmt,bool canonicalize_zs)264 canonical_format_pipe(enum pipe_format fmt, bool canonicalize_zs)
265 {
266 if (!canonicalize_zs && util_format_is_depth_or_stencil(fmt))
267 return fmt;
268
269 assert(ail_is_valid_pixel_format(fmt));
270
271 if (util_format_is_compressed(fmt)) {
272 unsigned size_B = util_format_get_blocksize(fmt);
273 assert(size_B == 8 || size_B == 16);
274
275 return size_B == 16 ? PIPE_FORMAT_R32G32B32A32_UINT
276 : PIPE_FORMAT_R32G32_UINT;
277 }
278
279 #define CASE(x, y) [AGX_CHANNELS_##x] = PIPE_FORMAT_##y
280 /* clang-format off */
281 static enum pipe_format map[] = {
282 CASE(R8, R8_UINT),
283 CASE(R16, R16_UNORM /* XXX: Hack for Z16 copies */),
284 CASE(R8G8, R8G8_UINT),
285 CASE(R5G6B5, R5G6B5_UNORM),
286 CASE(R4G4B4A4, R4G4B4A4_UNORM),
287 CASE(A1R5G5B5, A1R5G5B5_UNORM),
288 CASE(R5G5B5A1, B5G5R5A1_UNORM),
289 CASE(R32, R32_UINT),
290 CASE(R16G16, R16G16_UINT),
291 CASE(R11G11B10, R11G11B10_FLOAT),
292 CASE(R10G10B10A2, R10G10B10A2_UNORM),
293 CASE(R9G9B9E5, R9G9B9E5_FLOAT),
294 CASE(R8G8B8A8, R8G8B8A8_UINT),
295 CASE(R32G32, R32G32_UINT),
296 CASE(R16G16B16A16, R16G16B16A16_UINT),
297 CASE(R32G32B32A32, R32G32B32A32_UINT),
298 };
299 /* clang-format on */
300 #undef CASE
301
302 enum agx_channels channels = ail_pixel_format[fmt].channels;
303 assert(channels < ARRAY_SIZE(map) && "all valid channels handled");
304 assert(map[channels] != PIPE_FORMAT_NONE && "all valid channels handled");
305 return map[channels];
306 }
307
308 static VkFormat
canonical_format(VkFormat fmt)309 canonical_format(VkFormat fmt)
310 {
311 return vk_format_from_pipe_format(
312 canonical_format_pipe(hk_format_to_pipe_format(fmt), false));
313 }
314
315 enum copy_type {
316 BUF2IMG,
317 IMG2BUF,
318 IMG2IMG,
319 };
320
321 struct vk_meta_push_data {
322 uint64_t buffer;
323
324 uint32_t row_extent;
325 uint32_t slice_or_layer_extent;
326
327 int32_t src_offset_el[4];
328 int32_t dst_offset_el[4];
329 uint32_t grid_el[3];
330 } PACKED;
331
332 #define get_push(b, name) \
333 nir_load_push_constant( \
334 b, 1, sizeof(((struct vk_meta_push_data *)0)->name) * 8, \
335 nir_imm_int(b, offsetof(struct vk_meta_push_data, name)))
336
337 struct vk_meta_image_copy_key {
338 enum vk_meta_object_key_type key_type;
339 enum copy_type type;
340 enum pipe_format src_format, dst_format;
341 unsigned block_size;
342 unsigned nr_samples;
343 bool block_based;
344 };
345
346 static nir_def *
linearize_coords(nir_builder * b,nir_def * coord,const struct vk_meta_image_copy_key * key)347 linearize_coords(nir_builder *b, nir_def *coord,
348 const struct vk_meta_image_copy_key *key)
349 {
350 assert(key->nr_samples == 1 && "buffer<-->image copies not multisampled");
351
352 nir_def *row_extent = get_push(b, row_extent);
353 nir_def *slice_or_layer_extent = get_push(b, slice_or_layer_extent);
354 nir_def *x = nir_channel(b, coord, 0);
355 nir_def *y = nir_channel(b, coord, 1);
356 nir_def *z_or_layer = nir_channel(b, coord, 2);
357
358 nir_def *v = nir_imul_imm(b, x, key->block_size);
359
360 v = nir_iadd(b, v, nir_imul(b, y, row_extent));
361 v = nir_iadd(b, v, nir_imul(b, z_or_layer, slice_or_layer_extent));
362
363 return nir_udiv_imm(b, v, key->block_size);
364 }
365
366 static bool
is_format_native(enum pipe_format format)367 is_format_native(enum pipe_format format)
368 {
369 switch (format) {
370 case PIPE_FORMAT_R8_UINT:
371 case PIPE_FORMAT_R8G8_UINT:
372 case PIPE_FORMAT_R32_UINT:
373 case PIPE_FORMAT_R32G32_UINT:
374 case PIPE_FORMAT_R16G16_UINT:
375 case PIPE_FORMAT_R16_UNORM:
376 /* TODO: debug me .. why do these fail */
377 return false;
378 case PIPE_FORMAT_R11G11B10_FLOAT:
379 case PIPE_FORMAT_R9G9B9E5_FLOAT:
380 case PIPE_FORMAT_R16G16B16A16_UINT:
381 case PIPE_FORMAT_R32G32B32A32_UINT:
382 case PIPE_FORMAT_R8G8B8A8_UINT:
383 case PIPE_FORMAT_R10G10B10A2_UNORM:
384 return true;
385 case PIPE_FORMAT_R5G6B5_UNORM:
386 case PIPE_FORMAT_R4G4B4A4_UNORM:
387 case PIPE_FORMAT_A1R5G5B5_UNORM:
388 case PIPE_FORMAT_B5G5R5A1_UNORM:
389 return false;
390 default:
391 unreachable("expected canonical");
392 }
393 }
394
395 static nir_def *
load_store_formatted(nir_builder * b,nir_def * base,nir_def * index,nir_def * value,enum pipe_format format)396 load_store_formatted(nir_builder *b, nir_def *base, nir_def *index,
397 nir_def *value, enum pipe_format format)
398 {
399 if (util_format_is_depth_or_stencil(format))
400 format = canonical_format_pipe(format, true);
401
402 if (is_format_native(format)) {
403 enum pipe_format isa = ail_pixel_format[format].renderable;
404 unsigned isa_size = util_format_get_blocksize(isa);
405 unsigned isa_components = util_format_get_blocksize(format) / isa_size;
406 unsigned shift = util_logbase2(isa_components);
407
408 if (value) {
409 nir_store_agx(b, value, base, index, .format = isa, .base = shift);
410 } else {
411 return nir_load_agx(b, 4, 32, base, index, .format = isa,
412 .base = shift);
413 }
414 } else {
415 unsigned blocksize_B = util_format_get_blocksize(format);
416 nir_def *addr =
417 nir_iadd(b, base, nir_imul_imm(b, nir_u2u64(b, index), blocksize_B));
418
419 if (value) {
420 nir_def *raw = nir_format_pack_rgba(b, format, value);
421
422 if (blocksize_B <= 4) {
423 assert(raw->num_components == 1);
424 raw = nir_u2uN(b, raw, blocksize_B * 8);
425 } else {
426 assert(raw->bit_size == 32);
427 raw = nir_trim_vector(b, raw, blocksize_B / 4);
428 }
429
430 nir_store_global(b, addr, blocksize_B, raw,
431 nir_component_mask(raw->num_components));
432 } else {
433 nir_def *raw =
434 nir_load_global(b, addr, blocksize_B, DIV_ROUND_UP(blocksize_B, 4),
435 MIN2(32, blocksize_B * 8));
436
437 return nir_format_unpack_rgba(b, raw, format);
438 }
439 }
440
441 return NULL;
442 }
443
444 static nir_shader *
build_image_copy_shader(const struct vk_meta_image_copy_key * key)445 build_image_copy_shader(const struct vk_meta_image_copy_key *key)
446 {
447 nir_builder build =
448 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "hk-meta-copy");
449
450 nir_builder *b = &build;
451 b->shader->info.workgroup_size[0] = TILE_WIDTH;
452 b->shader->info.workgroup_size[1] = TILE_HEIGHT;
453
454 bool src_is_buf = key->type == BUF2IMG;
455 bool dst_is_buf = key->type == IMG2BUF;
456
457 bool msaa = key->nr_samples > 1;
458 enum glsl_sampler_dim dim_2d =
459 msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
460 enum glsl_sampler_dim dim_src = src_is_buf ? GLSL_SAMPLER_DIM_BUF : dim_2d;
461 enum glsl_sampler_dim dim_dst = dst_is_buf ? GLSL_SAMPLER_DIM_BUF : dim_2d;
462
463 const struct glsl_type *texture_type =
464 glsl_sampler_type(dim_src, false, !src_is_buf, GLSL_TYPE_UINT);
465
466 const struct glsl_type *image_type =
467 glsl_image_type(dim_dst, !dst_is_buf, GLSL_TYPE_UINT);
468
469 nir_variable *texture =
470 nir_variable_create(b->shader, nir_var_uniform, texture_type, "source");
471 nir_variable *image =
472 nir_variable_create(b->shader, nir_var_image, image_type, "dest");
473
474 image->data.descriptor_set = 0;
475 image->data.binding = BINDING_OUTPUT;
476 image->data.access = ACCESS_NON_READABLE;
477
478 texture->data.descriptor_set = 0;
479 texture->data.binding = BINDING_INPUT;
480
481 /* Grab the offset vectors */
482 nir_def *src_offset_el = nir_load_push_constant(
483 b, 3, 32,
484 nir_imm_int(b, offsetof(struct vk_meta_push_data, src_offset_el)));
485
486 nir_def *dst_offset_el = nir_load_push_constant(
487 b, 3, 32,
488 nir_imm_int(b, offsetof(struct vk_meta_push_data, dst_offset_el)));
489
490 nir_def *grid_2d_el = nir_load_push_constant(
491 b, 2, 32, nir_imm_int(b, offsetof(struct vk_meta_push_data, grid_el)));
492
493 /* We're done setting up variables, do the copy */
494 nir_def *coord = nir_load_global_invocation_id(b, 32);
495
496 /* The destination format is already canonical, convert to an ISA format */
497 enum pipe_format isa_format = PIPE_FORMAT_NONE;
498 if (key->block_based) {
499 enum pipe_format pipe = canonical_format_pipe(key->dst_format, true);
500 isa_format = ail_pixel_format[pipe].renderable;
501 assert(isa_format != PIPE_FORMAT_NONE);
502 }
503
504 nir_def *local_offset = nir_imm_intN_t(b, 0, 16);
505 nir_def *lid = nir_trim_vector(b, nir_load_local_invocation_id(b), 2);
506 lid = nir_u2u16(b, lid);
507
508 nir_def *src_coord = src_is_buf ? coord : nir_iadd(b, coord, src_offset_el);
509 nir_def *dst_coord = dst_is_buf ? coord : nir_iadd(b, coord, dst_offset_el);
510
511 nir_def *image_deref = &nir_build_deref_var(b, image)->def;
512
513 nir_def *coord_2d_el = nir_trim_vector(b, coord, 2);
514 nir_def *in_bounds;
515 if (key->block_based) {
516 nir_def *offset_in_block_el =
517 nir_umod_imm(b, nir_trim_vector(b, dst_offset_el, 2), TILE_WIDTH);
518
519 dst_coord =
520 nir_vector_insert_imm(b, nir_isub(b, dst_coord, offset_in_block_el),
521 nir_channel(b, dst_coord, 2), 2);
522
523 src_coord =
524 nir_vector_insert_imm(b, nir_isub(b, src_coord, offset_in_block_el),
525 nir_channel(b, src_coord, 2), 2);
526
527 in_bounds = nir_uge(b, coord_2d_el, offset_in_block_el);
528 in_bounds = nir_iand(
529 b, in_bounds,
530 nir_ult(b, coord_2d_el, nir_iadd(b, offset_in_block_el, grid_2d_el)));
531 } else {
532 in_bounds = nir_ult(b, coord_2d_el, grid_2d_el);
533 }
534
535 /* Special case handle buffer indexing */
536 if (dst_is_buf) {
537 assert(!key->block_based);
538 dst_coord = linearize_coords(b, dst_coord, key);
539 } else if (src_is_buf) {
540 src_coord = linearize_coords(b, src_coord, key);
541 }
542
543 for (unsigned s = 0; s < key->nr_samples; ++s) {
544 nir_def *ms_index = nir_imm_int(b, s);
545 nir_def *value1 = NULL, *value2 = NULL;
546
547 nir_push_if(b, nir_ball(b, in_bounds));
548 {
549 /* Copy formatted texel from texture to storage image */
550 nir_deref_instr *deref = nir_build_deref_var(b, texture);
551
552 if (src_is_buf) {
553 value1 = load_store_formatted(b, get_push(b, buffer), src_coord,
554 NULL, key->dst_format);
555 } else {
556 if (msaa) {
557 value1 = nir_txf_ms_deref(b, deref, src_coord, ms_index);
558 } else {
559 value1 = nir_txf_deref(b, deref, src_coord, NULL);
560 }
561
562 /* Munge according to the implicit conversions so we get a bit copy */
563 if (key->src_format != key->dst_format) {
564 nir_def *packed =
565 nir_format_pack_rgba(b, key->src_format, value1);
566
567 value1 = nir_format_unpack_rgba(b, packed, key->dst_format);
568 }
569 }
570
571 if (dst_is_buf) {
572 load_store_formatted(b, get_push(b, buffer), dst_coord, value1,
573 key->dst_format);
574 } else if (!key->block_based) {
575 nir_image_deref_store(b, image_deref, nir_pad_vec4(b, dst_coord),
576 ms_index, value1, nir_imm_int(b, 0),
577 .image_dim = dim_dst,
578 .image_array = !dst_is_buf);
579 }
580 }
581 nir_push_else(b, NULL);
582 if (key->block_based) {
583 /* Copy back the existing destination content */
584 value2 = nir_image_deref_load(b, 4, 32, image_deref,
585 nir_pad_vec4(b, dst_coord), ms_index,
586 nir_imm_int(b, 0), .image_dim = dim_dst,
587 .image_array = !dst_is_buf);
588 }
589 nir_pop_if(b, NULL);
590
591 if (key->block_based) {
592 /* Must define the phi first so we validate. */
593 nir_def *phi = nir_if_phi(b, value1, value2);
594 nir_def *mask = nir_imm_intN_t(b, 1 << s, 16);
595
596 nir_store_local_pixel_agx(b, phi, mask, lid, .base = 0,
597 .write_mask = 0xf, .format = isa_format,
598 .explicit_coord = true);
599 }
600 }
601
602 if (key->block_based) {
603 assert(!dst_is_buf);
604
605 nir_barrier(b, .execution_scope = SCOPE_WORKGROUP);
606
607 nir_push_if(b, nir_ball(b, nir_ieq_imm(b, lid, 0)));
608 {
609 nir_image_deref_store_block_agx(
610 b, image_deref, local_offset, dst_coord, .format = isa_format,
611 .image_dim = dim_2d, .image_array = true, .explicit_coord = true);
612 }
613 nir_pop_if(b, NULL);
614 b->shader->info.cs.image_block_size_per_thread_agx =
615 util_format_get_blocksize(key->dst_format);
616 }
617
618 return b->shader;
619 }
620
621 static VkResult
get_image_copy_descriptor_set_layout(struct vk_device * device,struct vk_meta_device * meta,VkDescriptorSetLayout * layout_out,enum copy_type type)622 get_image_copy_descriptor_set_layout(struct vk_device *device,
623 struct vk_meta_device *meta,
624 VkDescriptorSetLayout *layout_out,
625 enum copy_type type)
626 {
627 const char *keys[] = {
628 [IMG2BUF] = "vk-meta-copy-image-to-buffer-descriptor-set-layout",
629 [BUF2IMG] = "vk-meta-copy-buffer-to-image-descriptor-set-layout",
630 [IMG2IMG] = "vk-meta-copy-image-to-image-descriptor-set-layout",
631 };
632
633 VkDescriptorSetLayout from_cache = vk_meta_lookup_descriptor_set_layout(
634 meta, keys[type], strlen(keys[type]));
635 if (from_cache != VK_NULL_HANDLE) {
636 *layout_out = from_cache;
637 return VK_SUCCESS;
638 }
639
640 const VkDescriptorSetLayoutBinding bindings[] = {
641 {
642 .binding = BINDING_OUTPUT,
643 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
644 .descriptorCount = 1,
645 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
646 },
647 {
648 .binding = BINDING_INPUT,
649 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
650 .descriptorCount = 1,
651 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
652 },
653 };
654
655 const VkDescriptorSetLayoutCreateInfo info = {
656 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
657 .bindingCount = ARRAY_SIZE(bindings),
658 .pBindings = bindings,
659 };
660
661 return vk_meta_create_descriptor_set_layout(device, meta, &info, keys[type],
662 strlen(keys[type]), layout_out);
663 }
664
665 static VkResult
get_image_copy_pipeline_layout(struct vk_device * device,struct vk_meta_device * meta,struct vk_meta_image_copy_key * key,VkDescriptorSetLayout set_layout,VkPipelineLayout * layout_out,enum copy_type type)666 get_image_copy_pipeline_layout(struct vk_device *device,
667 struct vk_meta_device *meta,
668 struct vk_meta_image_copy_key *key,
669 VkDescriptorSetLayout set_layout,
670 VkPipelineLayout *layout_out,
671 enum copy_type type)
672 {
673 const char *keys[] = {
674 [IMG2BUF] = "vk-meta-copy-image-to-buffer-pipeline-layout",
675 [BUF2IMG] = "vk-meta-copy-buffer-to-image-pipeline-layout",
676 [IMG2IMG] = "vk-meta-copy-image-to-image-pipeline-layout",
677 };
678
679 VkPipelineLayout from_cache =
680 vk_meta_lookup_pipeline_layout(meta, keys[type], strlen(keys[type]));
681 if (from_cache != VK_NULL_HANDLE) {
682 *layout_out = from_cache;
683 return VK_SUCCESS;
684 }
685
686 VkPipelineLayoutCreateInfo info = {
687 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
688 .setLayoutCount = 1,
689 .pSetLayouts = &set_layout,
690 };
691
692 const VkPushConstantRange push_range = {
693 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
694 .offset = 0,
695 .size = sizeof(struct vk_meta_push_data),
696 };
697
698 info.pushConstantRangeCount = 1;
699 info.pPushConstantRanges = &push_range;
700
701 return vk_meta_create_pipeline_layout(device, meta, &info, keys[type],
702 strlen(keys[type]), layout_out);
703 }
704
705 static VkResult
get_image_copy_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_image_copy_key * key,VkPipelineLayout layout,VkPipeline * pipeline_out)706 get_image_copy_pipeline(struct vk_device *device, struct vk_meta_device *meta,
707 const struct vk_meta_image_copy_key *key,
708 VkPipelineLayout layout, VkPipeline *pipeline_out)
709 {
710 VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key, sizeof(*key));
711 if (from_cache != VK_NULL_HANDLE) {
712 *pipeline_out = from_cache;
713 return VK_SUCCESS;
714 }
715
716 const VkPipelineShaderStageNirCreateInfoMESA nir_info = {
717 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
718 .nir = build_image_copy_shader(key),
719 };
720 const VkPipelineShaderStageCreateInfo cs_info = {
721 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
722 .pNext = &nir_info,
723 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
724 .pName = "main",
725 };
726
727 const VkComputePipelineCreateInfo info = {
728 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
729 .stage = cs_info,
730 .layout = layout,
731 };
732
733 VkResult result = vk_meta_create_compute_pipeline(
734 device, meta, &info, key, sizeof(*key), pipeline_out);
735 ralloc_free(nir_info.nir);
736
737 return result;
738 }
739
740 static void
hk_meta_copy_image_to_buffer2(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyImageToBufferInfo2 * pCopyBufferInfo)741 hk_meta_copy_image_to_buffer2(struct vk_command_buffer *cmd,
742 struct vk_meta_device *meta,
743 const VkCopyImageToBufferInfo2 *pCopyBufferInfo)
744 {
745 VK_FROM_HANDLE(vk_image, image, pCopyBufferInfo->srcImage);
746 VK_FROM_HANDLE(vk_image, src_image, pCopyBufferInfo->srcImage);
747 VK_FROM_HANDLE(hk_buffer, buffer, pCopyBufferInfo->dstBuffer);
748
749 struct vk_device *device = cmd->base.device;
750 const struct vk_device_dispatch_table *disp = &device->dispatch_table;
751
752 VkResult result;
753
754 VkDescriptorSetLayout set_layout;
755 result =
756 get_image_copy_descriptor_set_layout(device, meta, &set_layout, IMG2BUF);
757 if (unlikely(result != VK_SUCCESS)) {
758 vk_command_buffer_set_error(cmd, result);
759 return;
760 }
761
762 bool per_layer =
763 util_format_is_compressed(hk_format_to_pipe_format(image->format));
764
765 for (unsigned i = 0; i < pCopyBufferInfo->regionCount; ++i) {
766 const VkBufferImageCopy2 *region = &pCopyBufferInfo->pRegions[i];
767
768 unsigned layers = MAX2(region->imageExtent.depth,
769 vk_image_subresource_layer_count(
770 src_image, ®ion->imageSubresource));
771 unsigned layer_iters = per_layer ? layers : 1;
772
773 for (unsigned layer_offs = 0; layer_offs < layer_iters; ++layer_offs) {
774
775 VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
776 VkFormat aspect_fmt = aspect_format(image->format, aspect);
777 VkFormat canonical = canonical_format(aspect_fmt);
778
779 uint32_t blocksize_B =
780 util_format_get_blocksize(hk_format_to_pipe_format(canonical));
781
782 enum pipe_format p_format = hk_format_to_pipe_format(image->format);
783
784 unsigned row_extent = util_format_get_nblocksx(
785 p_format, MAX2(region->bufferRowLength,
786 region->imageExtent.width)) *
787 blocksize_B;
788 unsigned slice_extent =
789 util_format_get_nblocksy(
790 p_format,
791 MAX2(region->bufferImageHeight, region->imageExtent.height)) *
792 row_extent;
793 unsigned layer_extent =
794 util_format_get_nblocksz(p_format, region->imageExtent.depth) *
795 slice_extent;
796
797 bool is_3d = region->imageExtent.depth > 1;
798
799 struct vk_meta_image_copy_key key = {
800 .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
801 .type = IMG2BUF,
802 .block_size = blocksize_B,
803 .nr_samples = image->samples,
804 .src_format = hk_format_to_pipe_format(canonical),
805 .dst_format = hk_format_to_pipe_format(canonical),
806 };
807
808 VkPipelineLayout pipeline_layout;
809 result = get_image_copy_pipeline_layout(device, meta, &key, set_layout,
810 &pipeline_layout, false);
811 if (unlikely(result != VK_SUCCESS)) {
812 vk_command_buffer_set_error(cmd, result);
813 return;
814 }
815
816 VkImageView src_view;
817 const VkImageViewUsageCreateInfo src_view_usage = {
818 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
819 .usage = VK_IMAGE_USAGE_SAMPLED_BIT,
820 };
821 const VkImageViewCreateInfo src_view_info = {
822 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
823 .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
824 .pNext = &src_view_usage,
825 .image = pCopyBufferInfo->srcImage,
826 .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
827 .format = canonical,
828 .subresourceRange =
829 {
830 .aspectMask = region->imageSubresource.aspectMask,
831 .baseMipLevel = region->imageSubresource.mipLevel,
832 .baseArrayLayer =
833 MAX2(region->imageOffset.z,
834 region->imageSubresource.baseArrayLayer) +
835 layer_offs,
836 .layerCount = per_layer ? 1 : layers,
837 .levelCount = 1,
838 },
839 };
840
841 result =
842 vk_meta_create_image_view(cmd, meta, &src_view_info, &src_view);
843 if (unlikely(result != VK_SUCCESS)) {
844 vk_command_buffer_set_error(cmd, result);
845 return;
846 }
847
848 VkDescriptorImageInfo src_info = {
849 .imageLayout = pCopyBufferInfo->srcImageLayout,
850 .imageView = src_view,
851 };
852
853 VkWriteDescriptorSet desc_write = {
854 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
855 .dstSet = 0,
856 .dstBinding = BINDING_INPUT,
857 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
858 .descriptorCount = 1,
859 .pImageInfo = &src_info,
860 };
861
862 disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
863 VK_PIPELINE_BIND_POINT_COMPUTE,
864 pipeline_layout, 0, 1, &desc_write);
865
866 VkPipeline pipeline;
867 result = get_image_copy_pipeline(device, meta, &key, pipeline_layout,
868 &pipeline);
869 if (unlikely(result != VK_SUCCESS)) {
870 vk_command_buffer_set_error(cmd, result);
871 return;
872 }
873
874 disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
875 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
876
877 enum pipe_format p_src_fmt =
878 hk_format_to_pipe_format(src_image->format);
879
880 struct vk_meta_push_data push = {
881 .buffer = hk_buffer_address(buffer, region->bufferOffset),
882 .row_extent = row_extent,
883 .slice_or_layer_extent = is_3d ? slice_extent : layer_extent,
884
885 .src_offset_el[0] =
886 util_format_get_nblocksx(p_src_fmt, region->imageOffset.x),
887 .src_offset_el[1] =
888 util_format_get_nblocksy(p_src_fmt, region->imageOffset.y),
889
890 .grid_el[0] =
891 util_format_get_nblocksx(p_format, region->imageExtent.width),
892 .grid_el[1] =
893 util_format_get_nblocksy(p_format, region->imageExtent.height),
894 .grid_el[2] = per_layer ? 1 : layers,
895 };
896
897 push.buffer += push.slice_or_layer_extent * layer_offs;
898
899 disp->CmdPushConstants(vk_command_buffer_to_handle(cmd),
900 pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
901 sizeof(push), &push);
902
903 disp->CmdDispatch(vk_command_buffer_to_handle(cmd),
904 DIV_ROUND_UP(push.grid_el[0], 32),
905 DIV_ROUND_UP(push.grid_el[1], 32), push.grid_el[2]);
906 }
907 }
908 }
909
910 static void
hk_meta_dispatch_to_image(struct vk_command_buffer * cmd,const struct vk_device_dispatch_table * disp,VkPipelineLayout pipeline_layout,struct vk_meta_push_data * push,VkOffset3D offset,VkExtent3D extent,bool per_layer,unsigned layers,enum pipe_format p_dst_fmt,enum pipe_format p_format)911 hk_meta_dispatch_to_image(struct vk_command_buffer *cmd,
912 const struct vk_device_dispatch_table *disp,
913 VkPipelineLayout pipeline_layout,
914 struct vk_meta_push_data *push, VkOffset3D offset,
915 VkExtent3D extent, bool per_layer, unsigned layers,
916 enum pipe_format p_dst_fmt, enum pipe_format p_format)
917 {
918 push->dst_offset_el[0] = util_format_get_nblocksx(p_dst_fmt, offset.x);
919 push->dst_offset_el[1] = util_format_get_nblocksy(p_dst_fmt, offset.y);
920 push->dst_offset_el[2] = 0;
921
922 push->grid_el[0] = util_format_get_nblocksx(p_format, extent.width);
923 push->grid_el[1] = util_format_get_nblocksy(p_format, extent.height);
924 push->grid_el[2] = per_layer ? 1 : layers;
925
926 unsigned w_el = util_format_get_nblocksx(p_format, extent.width);
927 unsigned h_el = util_format_get_nblocksy(p_format, extent.height);
928
929 /* Expand the grid so destinations are in tiles */
930 unsigned expanded_x0 = push->dst_offset_el[0] & ~(TILE_WIDTH - 1);
931 unsigned expanded_y0 = push->dst_offset_el[1] & ~(TILE_HEIGHT - 1);
932 unsigned expanded_x1 = align(push->dst_offset_el[0] + w_el, TILE_WIDTH);
933 unsigned expanded_y1 = align(push->dst_offset_el[1] + h_el, TILE_HEIGHT);
934
935 /* TODO: clamp to the destination size to save some redundant threads? */
936
937 disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
938 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(*push), push);
939
940 disp->CmdDispatch(vk_command_buffer_to_handle(cmd),
941 (expanded_x1 - expanded_x0) / TILE_WIDTH,
942 (expanded_y1 - expanded_y0) / TILE_HEIGHT,
943 push->grid_el[2]);
944 }
945
946 static void
hk_meta_copy_buffer_to_image2(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct VkCopyBufferToImageInfo2 * info)947 hk_meta_copy_buffer_to_image2(struct vk_command_buffer *cmd,
948 struct vk_meta_device *meta,
949 const struct VkCopyBufferToImageInfo2 *info)
950 {
951 VK_FROM_HANDLE(vk_image, image, info->dstImage);
952 VK_FROM_HANDLE(hk_buffer, buffer, info->srcBuffer);
953
954 struct vk_device *device = cmd->base.device;
955 const struct vk_device_dispatch_table *disp = &device->dispatch_table;
956
957 VkDescriptorSetLayout set_layout;
958 VkResult result =
959 get_image_copy_descriptor_set_layout(device, meta, &set_layout, BUF2IMG);
960 if (unlikely(result != VK_SUCCESS)) {
961 vk_command_buffer_set_error(cmd, result);
962 return;
963 }
964
965 bool per_layer =
966 util_format_is_compressed(hk_format_to_pipe_format(image->format));
967
968 for (unsigned r = 0; r < info->regionCount; ++r) {
969 const VkBufferImageCopy2 *region = &info->pRegions[r];
970
971 unsigned layers = MAX2(
972 region->imageExtent.depth,
973 vk_image_subresource_layer_count(image, ®ion->imageSubresource));
974 unsigned layer_iters = per_layer ? layers : 1;
975
976 for (unsigned layer_offs = 0; layer_offs < layer_iters; ++layer_offs) {
977 VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
978 VkFormat aspect_fmt = aspect_format(image->format, aspect);
979 VkFormat canonical = canonical_format(aspect_fmt);
980 enum pipe_format p_format = hk_format_to_pipe_format(aspect_fmt);
981 uint32_t blocksize_B = util_format_get_blocksize(p_format);
982 bool is_3d = region->imageExtent.depth > 1;
983
984 struct vk_meta_image_copy_key key = {
985 .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
986 .type = BUF2IMG,
987 .block_size = blocksize_B,
988 .nr_samples = image->samples,
989 .src_format = hk_format_to_pipe_format(canonical),
990 .dst_format = canonical_format_pipe(
991 hk_format_to_pipe_format(aspect_format(image->format, aspect)),
992 false),
993
994 /* TODO: MSAA path */
995 .block_based =
996 (image->image_type != VK_IMAGE_TYPE_1D) && image->samples == 1,
997 };
998
999 VkPipelineLayout pipeline_layout;
1000 result = get_image_copy_pipeline_layout(device, meta, &key, set_layout,
1001 &pipeline_layout, true);
1002 if (unlikely(result != VK_SUCCESS)) {
1003 vk_command_buffer_set_error(cmd, result);
1004 return;
1005 }
1006
1007 unsigned row_extent = util_format_get_nblocksx(
1008 p_format, MAX2(region->bufferRowLength,
1009 region->imageExtent.width)) *
1010 blocksize_B;
1011 unsigned slice_extent =
1012 util_format_get_nblocksy(
1013 p_format,
1014 MAX2(region->bufferImageHeight, region->imageExtent.height)) *
1015 row_extent;
1016 unsigned layer_extent =
1017 util_format_get_nblocksz(p_format, region->imageExtent.depth) *
1018 slice_extent;
1019
1020 VkImageView dst_view;
1021 const VkImageViewUsageCreateInfo dst_view_usage = {
1022 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
1023 .usage = VK_IMAGE_USAGE_STORAGE_BIT,
1024 };
1025 const VkImageViewCreateInfo dst_view_info = {
1026 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1027 .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
1028 .pNext = &dst_view_usage,
1029 .image = info->dstImage,
1030 .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
1031 .format = canonical,
1032 .subresourceRange =
1033 {
1034 .aspectMask = region->imageSubresource.aspectMask,
1035 .baseMipLevel = region->imageSubresource.mipLevel,
1036 .baseArrayLayer =
1037 MAX2(region->imageOffset.z,
1038 region->imageSubresource.baseArrayLayer) +
1039 layer_offs,
1040 .layerCount = per_layer ? 1 : layers,
1041 .levelCount = 1,
1042 },
1043 };
1044
1045 result =
1046 vk_meta_create_image_view(cmd, meta, &dst_view_info, &dst_view);
1047 if (unlikely(result != VK_SUCCESS)) {
1048 vk_command_buffer_set_error(cmd, result);
1049 return;
1050 }
1051
1052 const VkDescriptorImageInfo dst_info = {
1053 .imageView = dst_view,
1054 .imageLayout = info->dstImageLayout,
1055 };
1056
1057 VkWriteDescriptorSet desc_write = {
1058 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1059 .dstSet = 0,
1060 .dstBinding = BINDING_OUTPUT,
1061 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1062 .descriptorCount = 1,
1063 .pImageInfo = &dst_info,
1064 };
1065
1066 disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1067 VK_PIPELINE_BIND_POINT_COMPUTE,
1068 pipeline_layout, 0, 1, &desc_write);
1069
1070 VkPipeline pipeline;
1071 result = get_image_copy_pipeline(device, meta, &key, pipeline_layout,
1072 &pipeline);
1073 if (unlikely(result != VK_SUCCESS)) {
1074 vk_command_buffer_set_error(cmd, result);
1075 return;
1076 }
1077
1078 disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1079 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1080
1081 struct vk_meta_push_data push = {
1082 .buffer = hk_buffer_address(buffer, region->bufferOffset),
1083 .row_extent = row_extent,
1084 .slice_or_layer_extent = is_3d ? slice_extent : layer_extent,
1085 };
1086
1087 push.buffer += push.slice_or_layer_extent * layer_offs;
1088
1089 hk_meta_dispatch_to_image(cmd, disp, pipeline_layout, &push,
1090 region->imageOffset, region->imageExtent,
1091 per_layer, layers, p_format, p_format);
1092 }
1093 }
1094 }
1095
1096 static void
hk_meta_copy_image2(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct VkCopyImageInfo2 * info)1097 hk_meta_copy_image2(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1098 const struct VkCopyImageInfo2 *info)
1099 {
1100 VK_FROM_HANDLE(vk_image, src_image, info->srcImage);
1101 VK_FROM_HANDLE(vk_image, dst_image, info->dstImage);
1102
1103 struct vk_device *device = cmd->base.device;
1104 const struct vk_device_dispatch_table *disp = &device->dispatch_table;
1105
1106 VkDescriptorSetLayout set_layout;
1107 VkResult result =
1108 get_image_copy_descriptor_set_layout(device, meta, &set_layout, BUF2IMG);
1109 if (unlikely(result != VK_SUCCESS)) {
1110 vk_command_buffer_set_error(cmd, result);
1111 return;
1112 }
1113
1114 bool per_layer =
1115 util_format_is_compressed(hk_format_to_pipe_format(src_image->format)) ||
1116 util_format_is_compressed(hk_format_to_pipe_format(dst_image->format));
1117
1118 for (unsigned r = 0; r < info->regionCount; ++r) {
1119 const VkImageCopy2 *region = &info->pRegions[r];
1120
1121 unsigned layers = MAX2(
1122 vk_image_subresource_layer_count(src_image, ®ion->srcSubresource),
1123 region->extent.depth);
1124 unsigned layer_iters = per_layer ? layers : 1;
1125
1126 for (unsigned layer_offs = 0; layer_offs < layer_iters; ++layer_offs) {
1127 u_foreach_bit(aspect, region->srcSubresource.aspectMask) {
1128 /* We use the source format throughout for consistent scaling with
1129 * compressed<-->uncompressed copies, where the extents are defined
1130 * to follow the source.
1131 */
1132 VkFormat aspect_fmt = aspect_format(src_image->format, 1 << aspect);
1133 VkFormat canonical = canonical_format(aspect_fmt);
1134 uint32_t blocksize_B =
1135 util_format_get_blocksize(hk_format_to_pipe_format(canonical));
1136
1137 VkImageAspectFlagBits dst_aspect_mask =
1138 vk_format_get_ycbcr_info(dst_image->format) ||
1139 vk_format_get_ycbcr_info(src_image->format)
1140 ? region->dstSubresource.aspectMask
1141 : (1 << aspect);
1142
1143 struct vk_meta_image_copy_key key = {
1144 .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
1145 .type = IMG2IMG,
1146 .block_size = blocksize_B,
1147 .nr_samples = dst_image->samples,
1148 .src_format = hk_format_to_pipe_format(canonical),
1149 .dst_format =
1150 canonical_format_pipe(hk_format_to_pipe_format(aspect_format(
1151 dst_image->format, dst_aspect_mask)),
1152 false),
1153
1154 /* TODO: MSAA path */
1155 .block_based = (dst_image->image_type != VK_IMAGE_TYPE_1D) &&
1156 dst_image->samples == 1,
1157 };
1158
1159 assert(key.nr_samples == src_image->samples);
1160
1161 VkPipelineLayout pipeline_layout;
1162 result = get_image_copy_pipeline_layout(
1163 device, meta, &key, set_layout, &pipeline_layout, true);
1164 if (unlikely(result != VK_SUCCESS)) {
1165 vk_command_buffer_set_error(cmd, result);
1166 return;
1167 }
1168
1169 VkWriteDescriptorSet desc_writes[2];
1170
1171 VkImageView src_view;
1172 const VkImageViewUsageCreateInfo src_view_usage = {
1173 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
1174 .usage = VK_IMAGE_USAGE_SAMPLED_BIT,
1175 };
1176 const VkImageViewCreateInfo src_view_info = {
1177 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1178 .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
1179 .pNext = &src_view_usage,
1180 .image = info->srcImage,
1181 .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
1182 .format = canonical,
1183 .subresourceRange =
1184 {
1185 .aspectMask =
1186 region->srcSubresource.aspectMask & (1 << aspect),
1187 .baseMipLevel = region->srcSubresource.mipLevel,
1188 .baseArrayLayer =
1189 MAX2(region->srcOffset.z,
1190 region->srcSubresource.baseArrayLayer) +
1191 layer_offs,
1192 .layerCount = per_layer ? 1 : layers,
1193 .levelCount = 1,
1194 },
1195 };
1196
1197 result =
1198 vk_meta_create_image_view(cmd, meta, &src_view_info, &src_view);
1199 if (unlikely(result != VK_SUCCESS)) {
1200 vk_command_buffer_set_error(cmd, result);
1201 return;
1202 }
1203
1204 VkDescriptorImageInfo src_info = {
1205 .imageLayout = info->srcImageLayout,
1206 .imageView = src_view,
1207 };
1208
1209 VkImageView dst_view;
1210 const VkImageViewUsageCreateInfo dst_view_usage = {
1211 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
1212 .usage = VK_IMAGE_USAGE_STORAGE_BIT,
1213 };
1214 const VkImageViewCreateInfo dst_view_info = {
1215 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1216 .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
1217 .pNext = &dst_view_usage,
1218 .image = info->dstImage,
1219 .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
1220 .format = vk_format_from_pipe_format(key.dst_format),
1221 .subresourceRange =
1222 {
1223 .aspectMask = dst_aspect_mask,
1224 .baseMipLevel = region->dstSubresource.mipLevel,
1225 .baseArrayLayer =
1226 MAX2(region->dstOffset.z,
1227 region->dstSubresource.baseArrayLayer) +
1228 layer_offs,
1229 .layerCount = per_layer ? 1 : layers,
1230 .levelCount = 1,
1231 },
1232 };
1233
1234 result =
1235 vk_meta_create_image_view(cmd, meta, &dst_view_info, &dst_view);
1236 if (unlikely(result != VK_SUCCESS)) {
1237 vk_command_buffer_set_error(cmd, result);
1238 return;
1239 }
1240
1241 const VkDescriptorImageInfo dst_info = {
1242 .imageView = dst_view,
1243 .imageLayout = info->dstImageLayout,
1244 };
1245
1246 desc_writes[0] = (VkWriteDescriptorSet){
1247 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1248 .dstSet = 0,
1249 .dstBinding = BINDING_OUTPUT,
1250 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1251 .descriptorCount = 1,
1252 .pImageInfo = &dst_info,
1253 };
1254
1255 desc_writes[1] = (VkWriteDescriptorSet){
1256 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1257 .dstSet = 0,
1258 .dstBinding = BINDING_INPUT,
1259 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1260 .descriptorCount = 1,
1261 .pImageInfo = &src_info,
1262 };
1263
1264 disp->CmdPushDescriptorSetKHR(
1265 vk_command_buffer_to_handle(cmd), VK_PIPELINE_BIND_POINT_COMPUTE,
1266 pipeline_layout, 0, ARRAY_SIZE(desc_writes), desc_writes);
1267
1268 VkPipeline pipeline;
1269 result = get_image_copy_pipeline(device, meta, &key,
1270 pipeline_layout, &pipeline);
1271 if (unlikely(result != VK_SUCCESS)) {
1272 vk_command_buffer_set_error(cmd, result);
1273 return;
1274 }
1275
1276 disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1277 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1278
1279 enum pipe_format p_src_fmt =
1280 hk_format_to_pipe_format(src_image->format);
1281 enum pipe_format p_dst_fmt =
1282 hk_format_to_pipe_format(dst_image->format);
1283 enum pipe_format p_format = hk_format_to_pipe_format(aspect_fmt);
1284
1285 struct vk_meta_push_data push = {
1286 .src_offset_el[0] =
1287 util_format_get_nblocksx(p_src_fmt, region->srcOffset.x),
1288 .src_offset_el[1] =
1289 util_format_get_nblocksy(p_src_fmt, region->srcOffset.y),
1290 };
1291
1292 hk_meta_dispatch_to_image(cmd, disp, pipeline_layout, &push,
1293 region->dstOffset, region->extent,
1294 per_layer, layers, p_dst_fmt, p_format);
1295 }
1296 }
1297 }
1298 }
1299
1300 static inline struct vk_meta_copy_image_properties
hk_meta_copy_get_image_properties(struct hk_image * img)1301 hk_meta_copy_get_image_properties(struct hk_image *img)
1302 {
1303 struct vk_meta_copy_image_properties props = {.tile_size = {16, 16, 1}};
1304
1305 if (!vk_format_is_depth_or_stencil(img->vk.format)) {
1306 props.color.view_format = img->vk.format;
1307 } else {
1308 switch (img->vk.format) {
1309 case VK_FORMAT_S8_UINT:
1310 props.stencil.view_format = VK_FORMAT_R8_UINT;
1311 props.stencil.component_mask = BITFIELD_MASK(1);
1312 break;
1313 case VK_FORMAT_D32_SFLOAT_S8_UINT:
1314 props.depth.view_format = VK_FORMAT_R32G32_UINT;
1315 props.depth.component_mask = BITFIELD_BIT(0);
1316 props.stencil.view_format = VK_FORMAT_R32G32_UINT;
1317 props.stencil.component_mask = BITFIELD_BIT(1);
1318 break;
1319 case VK_FORMAT_D16_UNORM:
1320 props.depth.view_format = VK_FORMAT_R16_UINT;
1321 props.depth.component_mask = BITFIELD_BIT(0);
1322 break;
1323 case VK_FORMAT_D32_SFLOAT:
1324 props.depth.view_format = VK_FORMAT_R32_UINT;
1325 props.depth.component_mask = BITFIELD_BIT(0);
1326 break;
1327 default:
1328 unreachable("Invalid ZS format");
1329 }
1330 }
1331
1332 return props;
1333 }
1334
1335 VKAPI_ATTR void VKAPI_CALL
hk_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)1336 hk_CmdBlitImage2(VkCommandBuffer commandBuffer,
1337 const VkBlitImageInfo2 *pBlitImageInfo)
1338 {
1339 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1340 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1341 perf_debug(dev, "Blit image");
1342
1343 struct hk_meta_save save;
1344 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1345 vk_meta_blit_image2(&cmd->vk, &dev->meta, pBlitImageInfo);
1346 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1347 }
1348
1349 VKAPI_ATTR void VKAPI_CALL
hk_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)1350 hk_CmdResolveImage2(VkCommandBuffer commandBuffer,
1351 const VkResolveImageInfo2 *pResolveImageInfo)
1352 {
1353 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1354 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1355 perf_debug(dev, "Resolve");
1356
1357 struct hk_meta_save save;
1358 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1359 vk_meta_resolve_image2(&cmd->vk, &dev->meta, pResolveImageInfo);
1360 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1361 }
1362
1363 void
hk_meta_resolve_rendering(struct hk_cmd_buffer * cmd,const VkRenderingInfo * pRenderingInfo)1364 hk_meta_resolve_rendering(struct hk_cmd_buffer *cmd,
1365 const VkRenderingInfo *pRenderingInfo)
1366 {
1367 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1368
1369 struct hk_meta_save save;
1370 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1371 vk_meta_resolve_rendering(&cmd->vk, &dev->meta, pRenderingInfo);
1372 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1373 }
1374
1375 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1376 hk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
1377 const VkCopyBufferInfo2 *pCopyBufferInfo)
1378 {
1379 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1380 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1381
1382 struct hk_meta_save save;
1383 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1384 vk_meta_copy_buffer(&cmd->vk, &dev->meta, pCopyBufferInfo);
1385 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1386 }
1387
1388 static bool
hk_copy_requires_gfx(struct hk_image * img)1389 hk_copy_requires_gfx(struct hk_image *img)
1390 {
1391 return img->vk.samples > 1 && ail_is_compressed(&img->planes[0].layout);
1392 }
1393
1394 static bool
hk_bind_point(bool gfx)1395 hk_bind_point(bool gfx)
1396 {
1397 return gfx ? VK_PIPELINE_BIND_POINT_GRAPHICS
1398 : VK_PIPELINE_BIND_POINT_COMPUTE;
1399 }
1400
1401 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)1402 hk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
1403 const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
1404 {
1405 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1406 VK_FROM_HANDLE(hk_image, dst_image, pCopyBufferToImageInfo->dstImage);
1407 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1408
1409 bool gfx = hk_copy_requires_gfx(dst_image);
1410 VkPipelineBindPoint bind_point = hk_bind_point(gfx);
1411
1412 struct hk_meta_save save;
1413 hk_meta_begin(cmd, &save, bind_point);
1414
1415 if (gfx) {
1416 struct vk_meta_copy_image_properties dst_props =
1417 hk_meta_copy_get_image_properties(dst_image);
1418
1419 vk_meta_copy_buffer_to_image(&cmd->vk, &dev->meta, pCopyBufferToImageInfo,
1420 &dst_props, bind_point);
1421 } else {
1422 hk_meta_copy_buffer_to_image2(&cmd->vk, &dev->meta,
1423 pCopyBufferToImageInfo);
1424 }
1425
1426 hk_meta_end(cmd, &save, bind_point);
1427 }
1428
1429 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)1430 hk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
1431 const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1432 {
1433 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1434 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1435
1436 struct hk_meta_save save;
1437 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1438 hk_meta_copy_image_to_buffer2(&cmd->vk, &dev->meta, pCopyImageToBufferInfo);
1439 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1440 }
1441
1442 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)1443 hk_CmdCopyImage2(VkCommandBuffer commandBuffer,
1444 const VkCopyImageInfo2 *pCopyImageInfo)
1445 {
1446 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1447 VK_FROM_HANDLE(hk_image, src_image, pCopyImageInfo->srcImage);
1448 VK_FROM_HANDLE(hk_image, dst_image, pCopyImageInfo->dstImage);
1449 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1450 bool gfx = hk_copy_requires_gfx(dst_image);
1451 VkPipelineBindPoint bind_point = hk_bind_point(gfx);
1452
1453 struct hk_meta_save save;
1454 hk_meta_begin(cmd, &save, bind_point);
1455
1456 if (gfx) {
1457 struct vk_meta_copy_image_properties src_props =
1458 hk_meta_copy_get_image_properties(src_image);
1459 struct vk_meta_copy_image_properties dst_props =
1460 hk_meta_copy_get_image_properties(dst_image);
1461
1462 vk_meta_copy_image(&cmd->vk, &dev->meta, pCopyImageInfo, &src_props,
1463 &dst_props, bind_point);
1464 } else {
1465 hk_meta_copy_image2(&cmd->vk, &dev->meta, pCopyImageInfo);
1466 }
1467
1468 hk_meta_end(cmd, &save, bind_point);
1469 }
1470
1471 VKAPI_ATTR void VKAPI_CALL
hk_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dstRange,uint32_t data)1472 hk_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer,
1473 VkDeviceSize dstOffset, VkDeviceSize dstRange, uint32_t data)
1474 {
1475 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1476 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1477
1478 struct hk_meta_save save;
1479 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1480 vk_meta_fill_buffer(&cmd->vk, &dev->meta, dstBuffer, dstOffset, dstRange,
1481 data);
1482 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1483 }
1484
1485 VKAPI_ATTR void VKAPI_CALL
hk_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dstRange,const void * pData)1486 hk_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer,
1487 VkDeviceSize dstOffset, VkDeviceSize dstRange,
1488 const void *pData)
1489 {
1490 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1491 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1492
1493 struct hk_meta_save save;
1494 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1495 vk_meta_update_buffer(&cmd->vk, &dev->meta, dstBuffer, dstOffset, dstRange,
1496 pData);
1497 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1498 }
1499
1500 VKAPI_ATTR void VKAPI_CALL
hk_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1501 hk_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
1502 const VkClearAttachment *pAttachments,
1503 uint32_t rectCount, const VkClearRect *pRects)
1504 {
1505 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1506 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1507
1508 struct vk_meta_rendering_info render_info;
1509 hk_meta_init_render(cmd, &render_info);
1510
1511 struct hk_meta_save save;
1512 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1513 vk_meta_clear_attachments(&cmd->vk, &dev->meta, &render_info,
1514 attachmentCount, pAttachments, rectCount, pRects);
1515 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1516 }
1517