• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "gen_macros.h"
25 
26 #include "nir/nir_builder.h"
27 #include "pan_encoder.h"
28 #include "pan_props.h"
29 #include "pan_shader.h"
30 
31 #include "panvk_private.h"
32 
33 static mali_ptr
panvk_meta_copy_img_emit_texture(struct pan_pool * desc_pool,const struct pan_image_view * view)34 panvk_meta_copy_img_emit_texture(struct pan_pool *desc_pool,
35                                  const struct pan_image_view *view)
36 {
37    struct panfrost_ptr texture = pan_pool_alloc_desc(desc_pool, TEXTURE);
38    size_t payload_size = GENX(panfrost_estimate_texture_payload_size)(view);
39    struct panfrost_ptr surfaces = pan_pool_alloc_aligned(
40       desc_pool, payload_size, pan_alignment(SURFACE_WITH_STRIDE));
41 
42    GENX(panfrost_new_texture)(view, texture.cpu, &surfaces);
43 
44    return texture.gpu;
45 }
46 
47 static mali_ptr
panvk_meta_copy_img_emit_sampler(struct pan_pool * desc_pool)48 panvk_meta_copy_img_emit_sampler(struct pan_pool *desc_pool)
49 {
50    struct panfrost_ptr sampler = pan_pool_alloc_desc(desc_pool, SAMPLER);
51 
52    pan_pack(sampler.cpu, SAMPLER, cfg) {
53       cfg.seamless_cube_map = false;
54       cfg.normalized_coordinates = false;
55       cfg.minify_nearest = true;
56       cfg.magnify_nearest = true;
57    }
58 
59    return sampler.gpu;
60 }
61 
62 static void
panvk_meta_copy_emit_varying(struct pan_pool * pool,mali_ptr coordinates,mali_ptr * varying_bufs,mali_ptr * varyings)63 panvk_meta_copy_emit_varying(struct pan_pool *pool, mali_ptr coordinates,
64                              mali_ptr *varying_bufs, mali_ptr *varyings)
65 {
66    struct panfrost_ptr varying = pan_pool_alloc_desc(pool, ATTRIBUTE);
67    struct panfrost_ptr varying_buffer =
68       pan_pool_alloc_desc_array(pool, 2, ATTRIBUTE_BUFFER);
69 
70    pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
71       cfg.pointer = coordinates;
72       cfg.stride = 4 * sizeof(uint32_t);
73       cfg.size = cfg.stride * 4;
74    }
75 
76    /* Bifrost needs an empty desc to mark end of prefetching */
77    pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER), ATTRIBUTE_BUFFER,
78             cfg)
79       ;
80 
81    pan_pack(varying.cpu, ATTRIBUTE, cfg) {
82       enum pipe_format f = PIPE_FORMAT_R32G32B32_FLOAT;
83 
84       cfg.buffer_index = 0;
85       cfg.format = GENX(panfrost_format_from_pipe_format)(f)->hw;
86    }
87 
88    *varyings = varying.gpu;
89    *varying_bufs = varying_buffer.gpu;
90 }
91 
92 static void
panvk_meta_copy_emit_dcd(struct pan_pool * pool,mali_ptr src_coords,mali_ptr dst_coords,mali_ptr texture,mali_ptr sampler,mali_ptr vpd,mali_ptr tsd,mali_ptr rsd,mali_ptr push_constants,void * out)93 panvk_meta_copy_emit_dcd(struct pan_pool *pool, mali_ptr src_coords,
94                          mali_ptr dst_coords, mali_ptr texture,
95                          mali_ptr sampler, mali_ptr vpd, mali_ptr tsd,
96                          mali_ptr rsd, mali_ptr push_constants, void *out)
97 {
98    pan_pack(out, DRAW, cfg) {
99       cfg.thread_storage = tsd;
100       cfg.state = rsd;
101       cfg.push_uniforms = push_constants;
102       cfg.position = dst_coords;
103       if (src_coords) {
104          panvk_meta_copy_emit_varying(pool, src_coords, &cfg.varying_buffers,
105                                       &cfg.varyings);
106       }
107       cfg.viewport = vpd;
108       cfg.textures = texture;
109       cfg.samplers = sampler;
110    }
111 }
112 
113 static struct panfrost_ptr
panvk_meta_copy_emit_tiler_job(struct pan_pool * desc_pool,struct pan_jc * jc,mali_ptr src_coords,mali_ptr dst_coords,mali_ptr texture,mali_ptr sampler,mali_ptr push_constants,mali_ptr vpd,mali_ptr rsd,mali_ptr tsd,mali_ptr tiler)114 panvk_meta_copy_emit_tiler_job(struct pan_pool *desc_pool, struct pan_jc *jc,
115                                mali_ptr src_coords, mali_ptr dst_coords,
116                                mali_ptr texture, mali_ptr sampler,
117                                mali_ptr push_constants, mali_ptr vpd,
118                                mali_ptr rsd, mali_ptr tsd, mali_ptr tiler)
119 {
120    struct panfrost_ptr job = pan_pool_alloc_desc(desc_pool, TILER_JOB);
121 
122    panvk_meta_copy_emit_dcd(desc_pool, src_coords, dst_coords, texture, sampler,
123                             vpd, tsd, rsd, push_constants,
124                             pan_section_ptr(job.cpu, TILER_JOB, DRAW));
125 
126    pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
127       cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
128       cfg.index_count = 4;
129       cfg.job_task_split = 6;
130    }
131 
132    pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
133       cfg.constant = 1.0f;
134    }
135 
136    void *invoc = pan_section_ptr(job.cpu, TILER_JOB, INVOCATION);
137    panfrost_pack_work_groups_compute(invoc, 1, 4, 1, 1, 1, 1, true, false);
138 
139    pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg)
140       ;
141    pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) {
142       cfg.address = tiler;
143    }
144 
145    pan_jc_add_job(desc_pool, jc, MALI_JOB_TYPE_TILER, false, false, 0, 0, &job,
146                   false);
147    return job;
148 }
149 
150 static struct panfrost_ptr
panvk_meta_copy_emit_compute_job(struct pan_pool * desc_pool,struct pan_jc * jc,const struct pan_compute_dim * num_wg,const struct pan_compute_dim * wg_sz,mali_ptr texture,mali_ptr sampler,mali_ptr push_constants,mali_ptr rsd,mali_ptr tsd)151 panvk_meta_copy_emit_compute_job(struct pan_pool *desc_pool, struct pan_jc *jc,
152                                  const struct pan_compute_dim *num_wg,
153                                  const struct pan_compute_dim *wg_sz,
154                                  mali_ptr texture, mali_ptr sampler,
155                                  mali_ptr push_constants, mali_ptr rsd,
156                                  mali_ptr tsd)
157 {
158    struct panfrost_ptr job = pan_pool_alloc_desc(desc_pool, COMPUTE_JOB);
159 
160    void *invoc = pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION);
161    panfrost_pack_work_groups_compute(invoc, num_wg->x, num_wg->y, num_wg->z,
162                                      wg_sz->x, wg_sz->y, wg_sz->z, false,
163                                      false);
164 
165    pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
166       cfg.job_task_split = 8;
167    }
168 
169    panvk_meta_copy_emit_dcd(desc_pool, 0, 0, texture, sampler, 0, tsd, rsd,
170                             push_constants,
171                             pan_section_ptr(job.cpu, COMPUTE_JOB, DRAW));
172 
173    pan_jc_add_job(desc_pool, jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0,
174                   &job, false);
175    return job;
176 }
177 
178 static uint32_t
panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize)179 panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize)
180 {
181    switch (texelsize) {
182    case 6:
183       return MALI_RGB16UI << 12;
184    case 8:
185       return MALI_RG32UI << 12;
186    case 12:
187       return MALI_RGB32UI << 12;
188    case 16:
189       return MALI_RGBA32UI << 12;
190    default:
191       unreachable("Invalid texel size\n");
192    }
193 }
194 
195 static mali_ptr
panvk_meta_copy_to_img_emit_rsd(struct pan_pool * desc_pool,mali_ptr shader,const struct pan_shader_info * shader_info,enum pipe_format fmt,unsigned wrmask,bool from_img)196 panvk_meta_copy_to_img_emit_rsd(struct pan_pool *desc_pool, mali_ptr shader,
197                                 const struct pan_shader_info *shader_info,
198                                 enum pipe_format fmt, unsigned wrmask,
199                                 bool from_img)
200 {
201    struct panfrost_ptr rsd_ptr = pan_pool_alloc_desc_aggregate(
202       desc_pool, PAN_DESC(RENDERER_STATE), PAN_DESC_ARRAY(1, BLEND));
203 
204    bool raw = util_format_get_blocksize(fmt) > 4;
205    unsigned fullmask = (1 << util_format_get_nr_components(fmt)) - 1;
206    bool partialwrite = fullmask != wrmask && !raw;
207    bool readstb = fullmask != wrmask && raw;
208 
209    pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
210       pan_shader_prepare_rsd(shader_info, shader, &cfg);
211       if (from_img) {
212          cfg.shader.varying_count = 1;
213          cfg.shader.texture_count = 1;
214          cfg.shader.sampler_count = 1;
215       }
216       cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
217       cfg.multisample_misc.sample_mask = UINT16_MAX;
218       cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
219       cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
220       cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
221       cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
222       cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
223       cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
224       cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
225       cfg.stencil_front.mask = 0xFF;
226       cfg.stencil_back = cfg.stencil_front;
227 
228       cfg.properties.allow_forward_pixel_to_be_killed = true;
229       cfg.properties.allow_forward_pixel_to_kill = !partialwrite && !readstb;
230       cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
231       cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
232    }
233 
234    pan_pack(rsd_ptr.cpu + pan_size(RENDERER_STATE), BLEND, cfg) {
235       cfg.round_to_fb_precision = true;
236       cfg.load_destination = partialwrite;
237       cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
238       cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
239       cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
240       cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
241       cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
242       cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
243       cfg.internal.mode =
244          partialwrite ? MALI_BLEND_MODE_FIXED_FUNCTION : MALI_BLEND_MODE_OPAQUE;
245       cfg.equation.color_mask = partialwrite ? wrmask : 0xf;
246       cfg.internal.fixed_function.num_comps = 4;
247       if (!raw) {
248          cfg.internal.fixed_function.conversion.memory_format =
249             GENX(panfrost_dithered_format_from_pipe_format)(fmt, false);
250          cfg.internal.fixed_function.conversion.register_format =
251             MALI_REGISTER_FILE_FORMAT_F32;
252       } else {
253          unsigned imgtexelsz = util_format_get_blocksize(fmt);
254 
255          cfg.internal.fixed_function.conversion.memory_format =
256             panvk_meta_copy_img_bifrost_raw_format(imgtexelsz);
257          cfg.internal.fixed_function.conversion.register_format =
258             (imgtexelsz & 2) ? MALI_REGISTER_FILE_FORMAT_U16
259                              : MALI_REGISTER_FILE_FORMAT_U32;
260       }
261    }
262 
263    return rsd_ptr.gpu;
264 }
265 
266 static mali_ptr
panvk_meta_copy_to_buf_emit_rsd(struct pan_pool * desc_pool,mali_ptr shader,const struct pan_shader_info * shader_info,bool from_img)267 panvk_meta_copy_to_buf_emit_rsd(struct pan_pool *desc_pool, mali_ptr shader,
268                                 const struct pan_shader_info *shader_info,
269                                 bool from_img)
270 {
271    struct panfrost_ptr rsd_ptr =
272       pan_pool_alloc_desc_aggregate(desc_pool, PAN_DESC(RENDERER_STATE));
273 
274    pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
275       pan_shader_prepare_rsd(shader_info, shader, &cfg);
276       if (from_img) {
277          cfg.shader.texture_count = 1;
278          cfg.shader.sampler_count = 1;
279       }
280    }
281 
282    return rsd_ptr.gpu;
283 }
284 
285 static mali_ptr
panvk_meta_copy_img2img_shader(struct panvk_device * dev,enum pipe_format srcfmt,enum pipe_format dstfmt,unsigned dstmask,unsigned texdim,bool texisarray,bool is_ms,struct pan_shader_info * shader_info)286 panvk_meta_copy_img2img_shader(struct panvk_device *dev,
287                                enum pipe_format srcfmt, enum pipe_format dstfmt,
288                                unsigned dstmask, unsigned texdim,
289                                bool texisarray, bool is_ms,
290                                struct pan_shader_info *shader_info)
291 {
292    struct pan_pool *bin_pool = &dev->meta.bin_pool.base;
293 
294    nir_builder b = nir_builder_init_simple_shader(
295       MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(),
296       "panvk_meta_copy_img2img(srcfmt=%s,dstfmt=%s,%dD%s%s)",
297       util_format_name(srcfmt), util_format_name(dstfmt), texdim,
298       texisarray ? "[]" : "", is_ms ? ",ms" : "");
299 
300    nir_variable *coord_var = nir_variable_create(
301       b.shader, nir_var_shader_in,
302       glsl_vector_type(GLSL_TYPE_FLOAT, texdim + texisarray), "coord");
303    coord_var->data.location = VARYING_SLOT_VAR0;
304    nir_def *coord = nir_f2u32(&b, nir_load_var(&b, coord_var));
305 
306    nir_tex_instr *tex = nir_tex_instr_create(b.shader, is_ms ? 2 : 1);
307    tex->op = is_ms ? nir_texop_txf_ms : nir_texop_txf;
308    tex->texture_index = 0;
309    tex->is_array = texisarray;
310    tex->dest_type =
311       util_format_is_unorm(srcfmt) ? nir_type_float32 : nir_type_uint32;
312 
313    switch (texdim) {
314    case 1:
315       assert(!is_ms);
316       tex->sampler_dim = GLSL_SAMPLER_DIM_1D;
317       break;
318    case 2:
319       tex->sampler_dim = is_ms ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
320       break;
321    case 3:
322       assert(!is_ms);
323       tex->sampler_dim = GLSL_SAMPLER_DIM_3D;
324       break;
325    default:
326       unreachable("Invalid texture dimension");
327    }
328 
329    tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord);
330    tex->coord_components = texdim + texisarray;
331 
332    if (is_ms) {
333       tex->src[1] =
334          nir_tex_src_for_ssa(nir_tex_src_ms_index, nir_load_sample_id(&b));
335    }
336 
337    nir_def_init(&tex->instr, &tex->def, 4,
338                 nir_alu_type_get_type_size(tex->dest_type));
339    nir_builder_instr_insert(&b, &tex->instr);
340 
341    nir_def *texel = &tex->def;
342 
343    unsigned dstcompsz =
344       util_format_get_component_bits(dstfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
345    unsigned ndstcomps = util_format_get_nr_components(dstfmt);
346    const struct glsl_type *outtype = NULL;
347 
348    if (srcfmt == PIPE_FORMAT_R5G6B5_UNORM && dstfmt == PIPE_FORMAT_R8G8_UNORM) {
349       nir_def *rgb = nir_f2u32(
350          &b, nir_fmul(&b, texel,
351                       nir_vec3(&b, nir_imm_float(&b, 31), nir_imm_float(&b, 63),
352                                nir_imm_float(&b, 31))));
353       nir_def *rg = nir_vec2(
354          &b,
355          nir_ior(&b, nir_channel(&b, rgb, 0),
356                  nir_ishl(&b, nir_channel(&b, rgb, 1), nir_imm_int(&b, 5))),
357          nir_ior(&b, nir_ushr_imm(&b, nir_channel(&b, rgb, 1), 3),
358                  nir_ishl(&b, nir_channel(&b, rgb, 2), nir_imm_int(&b, 3))));
359       rg = nir_iand_imm(&b, rg, 255);
360       texel = nir_fmul_imm(&b, nir_u2f32(&b, rg), 1.0 / 255);
361       outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
362    } else if (srcfmt == PIPE_FORMAT_R8G8_UNORM &&
363               dstfmt == PIPE_FORMAT_R5G6B5_UNORM) {
364       nir_def *rg = nir_f2u32(&b, nir_fmul_imm(&b, texel, 255));
365       nir_def *rgb = nir_vec3(
366          &b, nir_channel(&b, rg, 0),
367          nir_ior(&b, nir_ushr_imm(&b, nir_channel(&b, rg, 0), 5),
368                  nir_ishl(&b, nir_channel(&b, rg, 1), nir_imm_int(&b, 3))),
369          nir_ushr_imm(&b, nir_channel(&b, rg, 1), 3));
370       rgb = nir_iand(&b, rgb,
371                      nir_vec3(&b, nir_imm_int(&b, 31), nir_imm_int(&b, 63),
372                               nir_imm_int(&b, 31)));
373       texel = nir_fmul(
374          &b, nir_u2f32(&b, rgb),
375          nir_vec3(&b, nir_imm_float(&b, 1.0 / 31), nir_imm_float(&b, 1.0 / 63),
376                   nir_imm_float(&b, 1.0 / 31)));
377       outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
378    } else {
379       assert(srcfmt == dstfmt);
380       enum glsl_base_type basetype;
381       if (util_format_is_unorm(dstfmt)) {
382          basetype = GLSL_TYPE_FLOAT;
383       } else if (dstcompsz == 16) {
384          basetype = GLSL_TYPE_UINT16;
385       } else {
386          assert(dstcompsz == 32);
387          basetype = GLSL_TYPE_UINT;
388       }
389 
390       if (dstcompsz == 16)
391          texel = nir_u2u16(&b, texel);
392 
393       texel = nir_trim_vector(&b, texel, ndstcomps);
394       outtype = glsl_vector_type(basetype, ndstcomps);
395    }
396 
397    nir_variable *out =
398       nir_variable_create(b.shader, nir_var_shader_out, outtype, "out");
399    out->data.location = FRAG_RESULT_DATA0;
400 
401    unsigned fullmask = (1 << ndstcomps) - 1;
402    if (dstcompsz > 8 && dstmask != fullmask) {
403       nir_def *oldtexel = nir_load_var(&b, out);
404       nir_def *dstcomps[4];
405 
406       for (unsigned i = 0; i < ndstcomps; i++) {
407          if (dstmask & BITFIELD_BIT(i))
408             dstcomps[i] = nir_channel(&b, texel, i);
409          else
410             dstcomps[i] = nir_channel(&b, oldtexel, i);
411       }
412 
413       texel = nir_vec(&b, dstcomps, ndstcomps);
414    }
415 
416    nir_store_var(&b, out, texel, 0xff);
417 
418    struct panfrost_compile_inputs inputs = {
419       .gpu_id = dev->physical_device->kmod.props.gpu_prod_id,
420       .is_blit = true,
421       .no_ubo_to_push = true,
422    };
423 
424    struct util_dynarray binary;
425 
426    util_dynarray_init(&binary, NULL);
427    pan_shader_preprocess(b.shader, inputs.gpu_id);
428    NIR_PASS_V(b.shader, GENX(pan_inline_rt_conversion), &dstfmt);
429    GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
430 
431    shader_info->fs.sample_shading = is_ms;
432 
433    mali_ptr shader =
434       pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
435 
436    util_dynarray_fini(&binary);
437    ralloc_free(b.shader);
438 
439    return shader;
440 }
441 
442 static enum pipe_format
panvk_meta_copy_img_format(enum pipe_format fmt)443 panvk_meta_copy_img_format(enum pipe_format fmt)
444 {
445    /* We can't use a non-compressed format when handling a tiled/AFBC
446     * compressed format because the tile size differ (4x4 blocks for
447     * compressed formats and 16x16 texels for non-compressed ones).
448     */
449    assert(!util_format_is_compressed(fmt));
450 
451    /* Pick blendable formats when we can, otherwise pick the UINT variant
452     * matching the texel size.
453     */
454    switch (util_format_get_blocksize(fmt)) {
455    case 16:
456       return PIPE_FORMAT_R32G32B32A32_UINT;
457    case 12:
458       return PIPE_FORMAT_R32G32B32_UINT;
459    case 8:
460       return PIPE_FORMAT_R32G32_UINT;
461    case 6:
462       return PIPE_FORMAT_R16G16B16_UINT;
463    case 4:
464       return PIPE_FORMAT_R8G8B8A8_UNORM;
465    case 2:
466       return (fmt == PIPE_FORMAT_R5G6B5_UNORM ||
467               fmt == PIPE_FORMAT_B5G6R5_UNORM)
468                 ? PIPE_FORMAT_R5G6B5_UNORM
469                 : PIPE_FORMAT_R8G8_UNORM;
470    case 1:
471       return PIPE_FORMAT_R8_UNORM;
472    default:
473       unreachable("Unsupported format\n");
474    }
475 }
476 
477 struct panvk_meta_copy_img2img_format_info {
478    enum pipe_format srcfmt;
479    enum pipe_format dstfmt;
480    unsigned dstmask;
481 } PACKED;
482 
483 static const struct panvk_meta_copy_img2img_format_info
484    panvk_meta_copy_img2img_fmts[] = {
485       {PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R8_UNORM, 0x1},
486       {PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
487       {PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
488       {PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
489       {PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
490       /* Z24S8(depth) */
491       {PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x7},
492       /* Z24S8(stencil) */
493       {PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x8},
494       {PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0xf},
495       {PIPE_FORMAT_R16G16B16_UINT, PIPE_FORMAT_R16G16B16_UINT, 0x7},
496       {PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x3},
497       /* Z32S8X24(depth) */
498       {PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x1},
499       /* Z32S8X24(stencil) */
500       {PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x2},
501       {PIPE_FORMAT_R32G32B32_UINT, PIPE_FORMAT_R32G32B32_UINT, 0x7},
502       {PIPE_FORMAT_R32G32B32A32_UINT, PIPE_FORMAT_R32G32B32A32_UINT, 0xf},
503 };
504 
505 static unsigned
panvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key)506 panvk_meta_copy_img2img_format_idx(
507    struct panvk_meta_copy_img2img_format_info key)
508 {
509    STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) ==
510                  PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
511 
512    for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
513       if (!memcmp(&key, &panvk_meta_copy_img2img_fmts[i], sizeof(key)))
514          return i;
515    }
516 
517    unreachable("Invalid image format\n");
518 }
519 
520 static unsigned
panvk_meta_copy_img_mask(enum pipe_format imgfmt,VkImageAspectFlags aspectMask)521 panvk_meta_copy_img_mask(enum pipe_format imgfmt, VkImageAspectFlags aspectMask)
522 {
523    if (aspectMask != VK_IMAGE_ASPECT_DEPTH_BIT &&
524        aspectMask != VK_IMAGE_ASPECT_STENCIL_BIT) {
525       enum pipe_format outfmt = panvk_meta_copy_img_format(imgfmt);
526 
527       return (1 << util_format_get_nr_components(outfmt)) - 1;
528    }
529 
530    switch (imgfmt) {
531    case PIPE_FORMAT_S8_UINT:
532       return 1;
533    case PIPE_FORMAT_Z16_UNORM:
534       return 3;
535    case PIPE_FORMAT_Z16_UNORM_S8_UINT:
536       return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 3 : 8;
537    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
538       return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 7 : 8;
539    case PIPE_FORMAT_Z24X8_UNORM:
540       assert(aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT);
541       return 7;
542    case PIPE_FORMAT_Z32_FLOAT:
543       return 0xf;
544    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
545       return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 2;
546    default:
547       unreachable("Invalid depth format\n");
548    }
549 }
550 
551 static void
panvk_meta_copy_img2img(struct panvk_cmd_buffer * cmdbuf,const struct panvk_image * src,const struct panvk_image * dst,const VkImageCopy2 * region)552 panvk_meta_copy_img2img(struct panvk_cmd_buffer *cmdbuf,
553                         const struct panvk_image *src,
554                         const struct panvk_image *dst,
555                         const VkImageCopy2 *region)
556 {
557    struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
558    struct panvk_meta_copy_img2img_format_info key = {
559       .srcfmt = panvk_meta_copy_img_format(src->pimage.layout.format),
560       .dstfmt = panvk_meta_copy_img_format(dst->pimage.layout.format),
561       .dstmask = panvk_meta_copy_img_mask(dst->pimage.layout.format,
562                                           region->dstSubresource.aspectMask),
563    };
564 
565    assert(src->pimage.layout.nr_samples == dst->pimage.layout.nr_samples);
566 
567    unsigned texdimidx = panvk_meta_copy_tex_type(
568       src->pimage.layout.dim, src->pimage.layout.array_size > 1);
569    unsigned fmtidx = panvk_meta_copy_img2img_format_idx(key);
570    unsigned ms = dst->pimage.layout.nr_samples > 1 ? 1 : 0;
571 
572    mali_ptr rsd =
573       cmdbuf->device->meta.copy.img2img[ms][texdimidx][fmtidx]
574          .rsd;
575 
576    struct pan_image_view srcview = {
577       .format = key.srcfmt,
578       .dim = src->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE
579                 ? MALI_TEXTURE_DIMENSION_2D
580                 : src->pimage.layout.dim,
581       .planes[0] = &src->pimage,
582       .nr_samples = src->pimage.layout.nr_samples,
583       .first_level = region->srcSubresource.mipLevel,
584       .last_level = region->srcSubresource.mipLevel,
585       .first_layer = region->srcSubresource.baseArrayLayer,
586       .last_layer = region->srcSubresource.baseArrayLayer +
587                     region->srcSubresource.layerCount - 1,
588       .swizzle = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
589                   PIPE_SWIZZLE_W},
590    };
591 
592    struct pan_image_view dstview = {
593       .format = key.dstfmt,
594       .dim = MALI_TEXTURE_DIMENSION_2D,
595       .planes[0] = &dst->pimage,
596       .nr_samples = dst->pimage.layout.nr_samples,
597       .first_level = region->dstSubresource.mipLevel,
598       .last_level = region->dstSubresource.mipLevel,
599       .swizzle = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
600                   PIPE_SWIZZLE_W},
601    };
602 
603    unsigned minx = MAX2(region->dstOffset.x, 0);
604    unsigned miny = MAX2(region->dstOffset.y, 0);
605    unsigned maxx = MAX2(region->dstOffset.x + region->extent.width - 1, 0);
606    unsigned maxy = MAX2(region->dstOffset.y + region->extent.height - 1, 0);
607 
608    mali_ptr vpd = panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
609                                                      minx, miny, maxx, maxy);
610 
611    float dst_rect[] = {
612       minx, miny,     0.0, 1.0, maxx + 1, miny,     0.0, 1.0,
613       minx, maxy + 1, 0.0, 1.0, maxx + 1, maxy + 1, 0.0, 1.0,
614    };
615 
616    mali_ptr dst_coords = pan_pool_upload_aligned(
617       &cmdbuf->desc_pool.base, dst_rect, sizeof(dst_rect), 64);
618 
619    /* TODO: don't force preloads of dst resources if unneeded */
620 
621    unsigned width =
622       u_minify(dst->pimage.layout.width, region->dstSubresource.mipLevel);
623    unsigned height =
624       u_minify(dst->pimage.layout.height, region->dstSubresource.mipLevel);
625    cmdbuf->state.fb.crc_valid[0] = false;
626    *fbinfo = (struct pan_fb_info){
627       .tile_buf_budget = panfrost_query_optimal_tib_size(
628          cmdbuf->device->physical_device->model),
629       .width = width,
630       .height = height,
631       .extent.minx = minx & ~31,
632       .extent.miny = miny & ~31,
633       .extent.maxx = MIN2(ALIGN_POT(maxx + 1, 32), width) - 1,
634       .extent.maxy = MIN2(ALIGN_POT(maxy + 1, 32), height) - 1,
635       .nr_samples = dst->pimage.layout.nr_samples,
636       .rt_count = 1,
637       .rts[0].view = &dstview,
638       .rts[0].preload = true,
639       .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
640    };
641 
642    mali_ptr texture =
643       panvk_meta_copy_img_emit_texture(&cmdbuf->desc_pool.base, &srcview);
644    mali_ptr sampler = panvk_meta_copy_img_emit_sampler(&cmdbuf->desc_pool.base);
645 
646    panvk_per_arch(cmd_close_batch)(cmdbuf);
647 
648    minx = MAX2(region->srcOffset.x, 0);
649    miny = MAX2(region->srcOffset.y, 0);
650    maxx = MAX2(region->srcOffset.x + region->extent.width - 1, 0);
651    maxy = MAX2(region->srcOffset.y + region->extent.height - 1, 0);
652    assert(region->dstOffset.z >= 0);
653 
654    unsigned first_src_layer = MAX2(0, region->srcOffset.z);
655    unsigned first_dst_layer =
656       MAX2(region->dstSubresource.baseArrayLayer, region->dstOffset.z);
657    unsigned nlayers =
658       MAX2(region->dstSubresource.layerCount, region->extent.depth);
659    for (unsigned l = 0; l < nlayers; l++) {
660       unsigned src_l = l + first_src_layer;
661       float src_rect[] = {
662          minx, miny,     src_l, 1.0, maxx + 1, miny,     src_l, 1.0,
663          minx, maxy + 1, src_l, 1.0, maxx + 1, maxy + 1, src_l, 1.0,
664       };
665 
666       mali_ptr src_coords = pan_pool_upload_aligned(
667          &cmdbuf->desc_pool.base, src_rect, sizeof(src_rect), 64);
668 
669       struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
670 
671       dstview.first_layer = dstview.last_layer = l + first_dst_layer;
672       batch->blit.src = src->bo;
673       batch->blit.dst = dst->bo;
674       panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
675       panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
676       panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
677 
678       mali_ptr tsd, tiler;
679 
680       tsd = batch->tls.gpu;
681       tiler = batch->tiler.descs.gpu;
682 
683       struct panfrost_ptr job;
684 
685       job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base, &batch->jc,
686                                            src_coords, dst_coords, texture,
687                                            sampler, 0, vpd, rsd, tsd, tiler);
688 
689       util_dynarray_append(&batch->jobs, void *, job.cpu);
690       panvk_per_arch(cmd_close_batch)(cmdbuf);
691    }
692 }
693 
694 static void
panvk_meta_copy_img2img_init(struct panvk_device * dev,bool is_ms)695 panvk_meta_copy_img2img_init(struct panvk_device *dev, bool is_ms)
696 {
697    STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) ==
698                  PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
699 
700    for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
701       for (unsigned texdim = 1; texdim <= 3; texdim++) {
702          unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
703          assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
704 
705          /* No MSAA on 1D/3D textures */
706          if (texdim != 2 && is_ms)
707             continue;
708 
709          struct pan_shader_info shader_info;
710          mali_ptr shader = panvk_meta_copy_img2img_shader(
711             dev, panvk_meta_copy_img2img_fmts[i].srcfmt,
712             panvk_meta_copy_img2img_fmts[i].dstfmt,
713             panvk_meta_copy_img2img_fmts[i].dstmask, texdim, false, is_ms,
714             &shader_info);
715          dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
716             panvk_meta_copy_to_img_emit_rsd(
717                &dev->meta.desc_pool.base, shader, &shader_info,
718                panvk_meta_copy_img2img_fmts[i].dstfmt,
719                panvk_meta_copy_img2img_fmts[i].dstmask, true);
720          if (texdim == 3)
721             continue;
722 
723          memset(&shader_info, 0, sizeof(shader_info));
724          texdimidx = panvk_meta_copy_tex_type(texdim, true);
725          assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
726          shader = panvk_meta_copy_img2img_shader(
727             dev, panvk_meta_copy_img2img_fmts[i].srcfmt,
728             panvk_meta_copy_img2img_fmts[i].dstfmt,
729             panvk_meta_copy_img2img_fmts[i].dstmask, texdim, true, is_ms,
730             &shader_info);
731          dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
732             panvk_meta_copy_to_img_emit_rsd(
733                &dev->meta.desc_pool.base, shader, &shader_info,
734                panvk_meta_copy_img2img_fmts[i].dstfmt,
735                panvk_meta_copy_img2img_fmts[i].dstmask, true);
736       }
737    }
738 }
739 
740 void
panvk_per_arch(CmdCopyImage2)741 panvk_per_arch(CmdCopyImage2)(VkCommandBuffer commandBuffer,
742                               const VkCopyImageInfo2 *pCopyImageInfo)
743 {
744    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
745    VK_FROM_HANDLE(panvk_image, dst, pCopyImageInfo->dstImage);
746    VK_FROM_HANDLE(panvk_image, src, pCopyImageInfo->srcImage);
747 
748    for (unsigned i = 0; i < pCopyImageInfo->regionCount; i++) {
749       panvk_meta_copy_img2img(cmdbuf, src, dst, &pCopyImageInfo->pRegions[i]);
750    }
751 }
752 
753 static unsigned
panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt,unsigned mask)754 panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt, unsigned mask)
755 {
756    unsigned imgtexelsz = util_format_get_blocksize(imgfmt);
757    unsigned nbufcomps = util_bitcount(mask);
758 
759    if (nbufcomps == util_format_get_nr_components(imgfmt))
760       return imgtexelsz;
761 
762    /* Special case for Z24 buffers which are not tightly packed */
763    if (mask == 7 && imgtexelsz == 4)
764       return 4;
765 
766    /* Special case for S8 extraction from Z32_S8X24 */
767    if (mask == 2 && imgtexelsz == 8)
768       return 1;
769 
770    unsigned compsz =
771       util_format_get_component_bits(imgfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
772 
773    assert(!(compsz % 8));
774 
775    return nbufcomps * compsz / 8;
776 }
777 
778 static enum pipe_format
panvk_meta_copy_buf2img_format(enum pipe_format imgfmt)779 panvk_meta_copy_buf2img_format(enum pipe_format imgfmt)
780 {
781    /* Pick blendable formats when we can, and the FLOAT variant matching the
782     * texelsize otherwise.
783     */
784    switch (util_format_get_blocksize(imgfmt)) {
785    case 1:
786       return PIPE_FORMAT_R8_UNORM;
787    /* AFBC stores things differently for RGB565,
788     * we can't simply map to R8G8 in that case */
789    case 2:
790       return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
791               imgfmt == PIPE_FORMAT_B5G6R5_UNORM)
792                 ? PIPE_FORMAT_R5G6B5_UNORM
793                 : PIPE_FORMAT_R8G8_UNORM;
794    case 4:
795       return PIPE_FORMAT_R8G8B8A8_UNORM;
796    case 6:
797       return PIPE_FORMAT_R16G16B16_UINT;
798    case 8:
799       return PIPE_FORMAT_R32G32_UINT;
800    case 12:
801       return PIPE_FORMAT_R32G32B32_UINT;
802    case 16:
803       return PIPE_FORMAT_R32G32B32A32_UINT;
804    default:
805       unreachable("Invalid format\n");
806    }
807 }
808 
809 struct panvk_meta_copy_format_info {
810    enum pipe_format imgfmt;
811    unsigned mask;
812 } PACKED;
813 
814 static const struct panvk_meta_copy_format_info panvk_meta_copy_buf2img_fmts[] =
815    {
816       {PIPE_FORMAT_R8_UNORM, 0x1},
817       {PIPE_FORMAT_R8G8_UNORM, 0x3},
818       {PIPE_FORMAT_R5G6B5_UNORM, 0x7},
819       {PIPE_FORMAT_R8G8B8A8_UNORM, 0xf},
820       {PIPE_FORMAT_R16G16B16_UINT, 0x7},
821       {PIPE_FORMAT_R32G32_UINT, 0x3},
822       {PIPE_FORMAT_R32G32B32_UINT, 0x7},
823       {PIPE_FORMAT_R32G32B32A32_UINT, 0xf},
824       /* S8 -> Z24S8 */
825       {PIPE_FORMAT_R8G8B8A8_UNORM, 0x8},
826       /* S8 -> Z32_S8X24 */
827       {PIPE_FORMAT_R32G32_UINT, 0x2},
828       /* Z24X8 -> Z24S8 */
829       {PIPE_FORMAT_R8G8B8A8_UNORM, 0x7},
830       /* Z32 -> Z32_S8X24 */
831       {PIPE_FORMAT_R32G32_UINT, 0x1},
832 };
833 
834 struct panvk_meta_copy_buf2img_info {
835    struct {
836       mali_ptr ptr;
837       struct {
838          unsigned line;
839          unsigned surf;
840       } stride;
841    } buf;
842 } PACKED;
843 
844 #define panvk_meta_copy_buf2img_get_info_field(b, field)                       \
845    nir_load_push_constant(                                                     \
846       (b), 1, sizeof(((struct panvk_meta_copy_buf2img_info *)0)->field) * 8,   \
847       nir_imm_int(b, 0),                                                       \
848       .base = offsetof(struct panvk_meta_copy_buf2img_info, field),            \
849       .range = ~0)
850 
851 static mali_ptr
panvk_meta_copy_buf2img_shader(struct panvk_device * dev,struct panvk_meta_copy_format_info key,struct pan_shader_info * shader_info)852 panvk_meta_copy_buf2img_shader(struct panvk_device *dev,
853                                struct panvk_meta_copy_format_info key,
854                                struct pan_shader_info *shader_info)
855 {
856    struct pan_pool *bin_pool = &dev->meta.bin_pool.base;
857 
858    nir_builder b = nir_builder_init_simple_shader(
859       MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(),
860       "panvk_meta_copy_buf2img(imgfmt=%s,mask=%x)",
861       util_format_name(key.imgfmt), key.mask);
862 
863    nir_variable *coord_var =
864       nir_variable_create(b.shader, nir_var_shader_in,
865                           glsl_vector_type(GLSL_TYPE_FLOAT, 3), "coord");
866    coord_var->data.location = VARYING_SLOT_VAR0;
867    nir_def *coord = nir_load_var(&b, coord_var);
868 
869    coord = nir_f2u32(&b, coord);
870 
871    nir_def *bufptr = panvk_meta_copy_buf2img_get_info_field(&b, buf.ptr);
872    nir_def *buflinestride =
873       panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.line);
874    nir_def *bufsurfstride =
875       panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.surf);
876 
877    unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
878    unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
879    unsigned writemask = key.mask;
880 
881    nir_def *offset =
882       nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
883    offset = nir_iadd(&b, offset,
884                      nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
885    offset = nir_iadd(&b, offset,
886                      nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
887    bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
888 
889    unsigned imgcompsz =
890       (imgtexelsz <= 4 && key.imgfmt != PIPE_FORMAT_R5G6B5_UNORM)
891          ? 1
892          : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
893 
894    unsigned nimgcomps = imgtexelsz / imgcompsz;
895    unsigned bufcompsz = MIN2(buftexelsz, imgcompsz);
896    unsigned nbufcomps = buftexelsz / bufcompsz;
897 
898    assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
899    assert(nbufcomps <= 4 && nimgcomps <= 4);
900 
901    nir_def *texel =
902       nir_load_global(&b, bufptr, bufcompsz, nbufcomps, bufcompsz * 8);
903 
904    enum glsl_base_type basetype;
905    if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
906       texel = nir_vec3(
907          &b, nir_iand_imm(&b, texel, BITFIELD_MASK(5)),
908          nir_iand_imm(&b, nir_ushr_imm(&b, texel, 5), BITFIELD_MASK(6)),
909          nir_iand_imm(&b, nir_ushr_imm(&b, texel, 11), BITFIELD_MASK(5)));
910       texel = nir_fmul(
911          &b, nir_u2f32(&b, texel),
912          nir_vec3(&b, nir_imm_float(&b, 1.0f / 31),
913                   nir_imm_float(&b, 1.0f / 63), nir_imm_float(&b, 1.0f / 31)));
914       nimgcomps = 3;
915       basetype = GLSL_TYPE_FLOAT;
916    } else if (imgcompsz == 1) {
917       assert(bufcompsz == 1);
918       /* Blendable formats are unorm and the fixed-function blend unit
919        * takes float values.
920        */
921       texel = nir_fmul_imm(&b, nir_u2f32(&b, texel), 1.0f / 255);
922       basetype = GLSL_TYPE_FLOAT;
923    } else {
924       texel = nir_u2uN(&b, texel, imgcompsz * 8);
925       basetype = imgcompsz == 2 ? GLSL_TYPE_UINT16 : GLSL_TYPE_UINT;
926    }
927 
928    /* We always pass the texel using 32-bit regs for now */
929    nir_variable *out =
930       nir_variable_create(b.shader, nir_var_shader_out,
931                           glsl_vector_type(basetype, nimgcomps), "out");
932    out->data.location = FRAG_RESULT_DATA0;
933 
934    uint16_t fullmask = (1 << nimgcomps) - 1;
935 
936    assert(fullmask >= writemask);
937 
938    if (fullmask != writemask) {
939       unsigned first_written_comp = ffs(writemask) - 1;
940       nir_def *oldtexel = NULL;
941       if (imgcompsz > 1)
942          oldtexel = nir_load_var(&b, out);
943 
944       nir_def *texel_comps[4];
945       for (unsigned i = 0; i < nimgcomps; i++) {
946          if (writemask & BITFIELD_BIT(i))
947             texel_comps[i] = nir_channel(&b, texel, i - first_written_comp);
948          else if (imgcompsz > 1)
949             texel_comps[i] = nir_channel(&b, oldtexel, i);
950          else
951             texel_comps[i] = nir_imm_intN_t(&b, 0, texel->bit_size);
952       }
953 
954       texel = nir_vec(&b, texel_comps, nimgcomps);
955    }
956 
957    nir_store_var(&b, out, texel, 0xff);
958 
959    struct panfrost_compile_inputs inputs = {
960       .gpu_id = dev->physical_device->kmod.props.gpu_prod_id,
961       .is_blit = true,
962       .no_ubo_to_push = true,
963    };
964 
965    struct util_dynarray binary;
966 
967    util_dynarray_init(&binary, NULL);
968    pan_shader_preprocess(b.shader, inputs.gpu_id);
969 
970    enum pipe_format rt_formats[8] = {key.imgfmt};
971    NIR_PASS_V(b.shader, GENX(pan_inline_rt_conversion), rt_formats);
972 
973    GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
974    shader_info->push.count =
975       DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2img_info), 4);
976 
977    mali_ptr shader =
978       pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
979 
980    util_dynarray_fini(&binary);
981    ralloc_free(b.shader);
982 
983    return shader;
984 }
985 
986 static unsigned
panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key)987 panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key)
988 {
989    for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
990       if (!memcmp(&key, &panvk_meta_copy_buf2img_fmts[i], sizeof(key)))
991          return i;
992    }
993 
994    unreachable("Invalid image format\n");
995 }
996 
997 static void
panvk_meta_copy_buf2img(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * buf,const struct panvk_image * img,const VkBufferImageCopy2 * region)998 panvk_meta_copy_buf2img(struct panvk_cmd_buffer *cmdbuf,
999                         const struct panvk_buffer *buf,
1000                         const struct panvk_image *img,
1001                         const VkBufferImageCopy2 *region)
1002 {
1003    struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
1004    unsigned minx = MAX2(region->imageOffset.x, 0);
1005    unsigned miny = MAX2(region->imageOffset.y, 0);
1006    unsigned maxx =
1007       MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0);
1008    unsigned maxy =
1009       MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1010 
1011    mali_ptr vpd = panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
1012                                                      minx, miny, maxx, maxy);
1013 
1014    float dst_rect[] = {
1015       minx, miny,     0.0, 1.0, maxx + 1, miny,     0.0, 1.0,
1016       minx, maxy + 1, 0.0, 1.0, maxx + 1, maxy + 1, 0.0, 1.0,
1017    };
1018    mali_ptr dst_coords = pan_pool_upload_aligned(
1019       &cmdbuf->desc_pool.base, dst_rect, sizeof(dst_rect), 64);
1020 
1021    struct panvk_meta_copy_format_info key = {
1022       .imgfmt = panvk_meta_copy_buf2img_format(img->pimage.layout.format),
1023       .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1024                                        region->imageSubresource.aspectMask),
1025    };
1026 
1027    unsigned fmtidx = panvk_meta_copy_buf2img_format_idx(key);
1028 
1029    mali_ptr rsd =
1030       cmdbuf->device->meta.copy.buf2img[fmtidx].rsd;
1031 
1032    const struct vk_image_buffer_layout buflayout =
1033       vk_image_buffer_copy_layout(&img->vk, region);
1034    struct panvk_meta_copy_buf2img_info info = {
1035       .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset),
1036       .buf.stride.line = buflayout.row_stride_B,
1037       .buf.stride.surf = buflayout.image_stride_B,
1038    };
1039 
1040    mali_ptr pushconsts =
1041       pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1042 
1043    struct pan_image_view view = {
1044       .format = key.imgfmt,
1045       .dim = MALI_TEXTURE_DIMENSION_2D,
1046       .planes[0] = &img->pimage,
1047       .nr_samples = img->pimage.layout.nr_samples,
1048       .first_level = region->imageSubresource.mipLevel,
1049       .last_level = region->imageSubresource.mipLevel,
1050       .swizzle = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
1051                   PIPE_SWIZZLE_W},
1052    };
1053 
1054    /* TODO: don't force preloads of dst resources if unneeded */
1055    cmdbuf->state.fb.crc_valid[0] = false;
1056    *fbinfo = (struct pan_fb_info){
1057       .tile_buf_budget = panfrost_query_optimal_tib_size(
1058          cmdbuf->device->physical_device->model),
1059       .width =
1060          u_minify(img->pimage.layout.width, region->imageSubresource.mipLevel),
1061       .height =
1062          u_minify(img->pimage.layout.height, region->imageSubresource.mipLevel),
1063       .extent.minx = minx,
1064       .extent.maxx = maxx,
1065       .extent.miny = miny,
1066       .extent.maxy = maxy,
1067       .nr_samples = 1,
1068       .rt_count = 1,
1069       .rts[0].view = &view,
1070       .rts[0].preload = true,
1071       .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
1072    };
1073 
1074    panvk_per_arch(cmd_close_batch)(cmdbuf);
1075 
1076    assert(region->imageSubresource.layerCount == 1 ||
1077           region->imageExtent.depth == 1);
1078    assert(region->imageOffset.z >= 0);
1079    unsigned first_layer =
1080       MAX2(region->imageSubresource.baseArrayLayer, region->imageOffset.z);
1081    unsigned nlayers =
1082       MAX2(region->imageSubresource.layerCount, region->imageExtent.depth);
1083    for (unsigned l = 0; l < nlayers; l++) {
1084       float src_rect[] = {
1085          0,
1086          0,
1087          l,
1088          1.0,
1089          region->imageExtent.width,
1090          0,
1091          l,
1092          1.0,
1093          0,
1094          region->imageExtent.height,
1095          l,
1096          1.0,
1097          region->imageExtent.width,
1098          region->imageExtent.height,
1099          l,
1100          1.0,
1101       };
1102 
1103       mali_ptr src_coords = pan_pool_upload_aligned(
1104          &cmdbuf->desc_pool.base, src_rect, sizeof(src_rect), 64);
1105 
1106       struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1107 
1108       view.first_layer = view.last_layer = l + first_layer;
1109       batch->blit.src = buf->bo;
1110       batch->blit.dst = img->bo;
1111       panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
1112       panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
1113       panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
1114 
1115       mali_ptr tsd, tiler;
1116 
1117       tsd = batch->tls.gpu;
1118       tiler = batch->tiler.descs.gpu;
1119 
1120       struct panfrost_ptr job;
1121 
1122       job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base, &batch->jc,
1123                                            src_coords, dst_coords, 0, 0,
1124                                            pushconsts, vpd, rsd, tsd, tiler);
1125 
1126       util_dynarray_append(&batch->jobs, void *, job.cpu);
1127       panvk_per_arch(cmd_close_batch)(cmdbuf);
1128    }
1129 }
1130 
1131 static void
panvk_meta_copy_buf2img_init(struct panvk_device * dev)1132 panvk_meta_copy_buf2img_init(struct panvk_device *dev)
1133 {
1134    STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_buf2img_fmts) ==
1135                  PANVK_META_COPY_BUF2IMG_NUM_FORMATS);
1136 
1137    for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
1138       struct pan_shader_info shader_info;
1139       mali_ptr shader = panvk_meta_copy_buf2img_shader(
1140          dev, panvk_meta_copy_buf2img_fmts[i], &shader_info);
1141       dev->meta.copy.buf2img[i].rsd = panvk_meta_copy_to_img_emit_rsd(
1142          &dev->meta.desc_pool.base, shader, &shader_info,
1143          panvk_meta_copy_buf2img_fmts[i].imgfmt,
1144          panvk_meta_copy_buf2img_fmts[i].mask, false);
1145    }
1146 }
1147 
1148 void
panvk_per_arch(CmdCopyBufferToImage2)1149 panvk_per_arch(CmdCopyBufferToImage2)(
1150    VkCommandBuffer commandBuffer,
1151    const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
1152 {
1153    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1154    VK_FROM_HANDLE(panvk_buffer, buf, pCopyBufferToImageInfo->srcBuffer);
1155    VK_FROM_HANDLE(panvk_image, img, pCopyBufferToImageInfo->dstImage);
1156 
1157    for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; i++) {
1158       panvk_meta_copy_buf2img(cmdbuf, buf, img,
1159                               &pCopyBufferToImageInfo->pRegions[i]);
1160    }
1161 }
1162 
1163 static const struct panvk_meta_copy_format_info panvk_meta_copy_img2buf_fmts[] =
1164    {
1165       {PIPE_FORMAT_R8_UINT, 0x1},
1166       {PIPE_FORMAT_R8G8_UINT, 0x3},
1167       {PIPE_FORMAT_R5G6B5_UNORM, 0x7},
1168       {PIPE_FORMAT_R8G8B8A8_UINT, 0xf},
1169       {PIPE_FORMAT_R16G16B16_UINT, 0x7},
1170       {PIPE_FORMAT_R32G32_UINT, 0x3},
1171       {PIPE_FORMAT_R32G32B32_UINT, 0x7},
1172       {PIPE_FORMAT_R32G32B32A32_UINT, 0xf},
1173       /* S8 -> Z24S8 */
1174       {PIPE_FORMAT_R8G8B8A8_UINT, 0x8},
1175       /* S8 -> Z32_S8X24 */
1176       {PIPE_FORMAT_R32G32_UINT, 0x2},
1177       /* Z24X8 -> Z24S8 */
1178       {PIPE_FORMAT_R8G8B8A8_UINT, 0x7},
1179       /* Z32 -> Z32_S8X24 */
1180       {PIPE_FORMAT_R32G32_UINT, 0x1},
1181 };
1182 
1183 static enum pipe_format
panvk_meta_copy_img2buf_format(enum pipe_format imgfmt)1184 panvk_meta_copy_img2buf_format(enum pipe_format imgfmt)
1185 {
1186    /* Pick blendable formats when we can, and the FLOAT variant matching the
1187     * texelsize otherwise.
1188     */
1189    switch (util_format_get_blocksize(imgfmt)) {
1190    case 1:
1191       return PIPE_FORMAT_R8_UINT;
1192    /* AFBC stores things differently for RGB565,
1193     * we can't simply map to R8G8 in that case */
1194    case 2:
1195       return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
1196               imgfmt == PIPE_FORMAT_B5G6R5_UNORM)
1197                 ? PIPE_FORMAT_R5G6B5_UNORM
1198                 : PIPE_FORMAT_R8G8_UINT;
1199    case 4:
1200       return PIPE_FORMAT_R8G8B8A8_UINT;
1201    case 6:
1202       return PIPE_FORMAT_R16G16B16_UINT;
1203    case 8:
1204       return PIPE_FORMAT_R32G32_UINT;
1205    case 12:
1206       return PIPE_FORMAT_R32G32B32_UINT;
1207    case 16:
1208       return PIPE_FORMAT_R32G32B32A32_UINT;
1209    default:
1210       unreachable("Invalid format\n");
1211    }
1212 }
1213 
1214 struct panvk_meta_copy_img2buf_info {
1215    struct {
1216       mali_ptr ptr;
1217       struct {
1218          unsigned line;
1219          unsigned surf;
1220       } stride;
1221    } buf;
1222    struct {
1223       struct {
1224          unsigned x, y, z;
1225       } offset;
1226       struct {
1227          unsigned minx, miny, maxx, maxy;
1228       } extent;
1229    } img;
1230 } PACKED;
1231 
1232 #define panvk_meta_copy_img2buf_get_info_field(b, field)                       \
1233    nir_load_push_constant(                                                     \
1234       (b), 1, sizeof(((struct panvk_meta_copy_img2buf_info *)0)->field) * 8,   \
1235       nir_imm_int(b, 0),                                                       \
1236       .base = offsetof(struct panvk_meta_copy_img2buf_info, field),            \
1237       .range = ~0)
1238 
1239 static mali_ptr
panvk_meta_copy_img2buf_shader(struct panvk_device * dev,struct panvk_meta_copy_format_info key,unsigned texdim,unsigned texisarray,struct pan_shader_info * shader_info)1240 panvk_meta_copy_img2buf_shader(struct panvk_device *dev,
1241                                struct panvk_meta_copy_format_info key,
1242                                unsigned texdim, unsigned texisarray,
1243                                struct pan_shader_info *shader_info)
1244 {
1245    unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
1246    unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1247    struct pan_pool *bin_pool = &dev->meta.bin_pool.base;
1248 
1249    /* FIXME: Won't work on compute queues, but we can't do that with
1250     * a compute shader if the destination is an AFBC surface.
1251     */
1252    nir_builder b = nir_builder_init_simple_shader(
1253       MESA_SHADER_COMPUTE, GENX(pan_shader_get_compiler_options)(),
1254       "panvk_meta_copy_img2buf(dim=%dD%s,imgfmt=%s,mask=%x)", texdim,
1255       texisarray ? "[]" : "", util_format_name(key.imgfmt), key.mask);
1256 
1257    nir_def *coord = nir_load_global_invocation_id(&b, 32);
1258    nir_def *bufptr = panvk_meta_copy_img2buf_get_info_field(&b, buf.ptr);
1259    nir_def *buflinestride =
1260       panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.line);
1261    nir_def *bufsurfstride =
1262       panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.surf);
1263 
1264    nir_def *imgminx =
1265       panvk_meta_copy_img2buf_get_info_field(&b, img.extent.minx);
1266    nir_def *imgminy =
1267       panvk_meta_copy_img2buf_get_info_field(&b, img.extent.miny);
1268    nir_def *imgmaxx =
1269       panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxx);
1270    nir_def *imgmaxy =
1271       panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxy);
1272 
1273    nir_def *imgcoords, *inbounds;
1274 
1275    switch (texdim + texisarray) {
1276    case 1:
1277       imgcoords =
1278          nir_iadd(&b, nir_channel(&b, coord, 0),
1279                   panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x));
1280       inbounds =
1281          nir_iand(&b, nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1282                   nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx));
1283       break;
1284    case 2:
1285       imgcoords = nir_vec2(
1286          &b,
1287          nir_iadd(&b, nir_channel(&b, coord, 0),
1288                   panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1289          nir_iadd(&b, nir_channel(&b, coord, 1),
1290                   panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1291       inbounds = nir_iand(
1292          &b,
1293          nir_iand(&b, nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1294                   nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1295          nir_iand(&b, nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1296                   nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1297       break;
1298    case 3:
1299       imgcoords = nir_vec3(
1300          &b,
1301          nir_iadd(&b, nir_channel(&b, coord, 0),
1302                   panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1303          nir_iadd(&b, nir_channel(&b, coord, 1),
1304                   panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)),
1305          nir_iadd(&b, nir_channel(&b, coord, 2),
1306                   panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1307       inbounds = nir_iand(
1308          &b,
1309          nir_iand(&b, nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1310                   nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1311          nir_iand(&b, nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1312                   nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1313       break;
1314    default:
1315       unreachable("Invalid texture dimension\n");
1316    }
1317 
1318    nir_push_if(&b, inbounds);
1319 
1320    /* FIXME: doesn't work for tiled+compressed formats since blocks are 4x4
1321     * blocks instead of 16x16 texels in that case, and there's nothing we can
1322     * do to force the tile size to 4x4 in the render path.
1323     * This being said, compressed textures are not compatible with AFBC, so we
1324     * could use a compute shader arranging the blocks properly.
1325     */
1326    nir_def *offset =
1327       nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
1328    offset = nir_iadd(&b, offset,
1329                      nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
1330    offset = nir_iadd(&b, offset,
1331                      nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
1332    bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
1333 
1334    unsigned imgcompsz =
1335       imgtexelsz <= 4 ? 1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
1336    unsigned nimgcomps = imgtexelsz / imgcompsz;
1337    assert(nimgcomps <= 4);
1338 
1339    nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
1340    tex->op = nir_texop_txf;
1341    tex->texture_index = 0;
1342    tex->is_array = texisarray;
1343    tex->dest_type =
1344       util_format_is_unorm(key.imgfmt) ? nir_type_float32 : nir_type_uint32;
1345 
1346    switch (texdim) {
1347    case 1:
1348       tex->sampler_dim = GLSL_SAMPLER_DIM_1D;
1349       break;
1350    case 2:
1351       tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1352       break;
1353    case 3:
1354       tex->sampler_dim = GLSL_SAMPLER_DIM_3D;
1355       break;
1356    default:
1357       unreachable("Invalid texture dimension");
1358    }
1359 
1360    tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, imgcoords);
1361    tex->coord_components = texdim + texisarray;
1362    nir_def_init(&tex->instr, &tex->def, 4,
1363                 nir_alu_type_get_type_size(tex->dest_type));
1364    nir_builder_instr_insert(&b, &tex->instr);
1365 
1366    nir_def *texel = &tex->def;
1367 
1368    unsigned fullmask = (1 << util_format_get_nr_components(key.imgfmt)) - 1;
1369    unsigned nbufcomps = util_bitcount(fullmask);
1370    if (key.mask != fullmask) {
1371       nir_def *bufcomps[4];
1372       nbufcomps = 0;
1373       for (unsigned i = 0; i < nimgcomps; i++) {
1374          if (key.mask & BITFIELD_BIT(i))
1375             bufcomps[nbufcomps++] = nir_channel(&b, texel, i);
1376       }
1377 
1378       texel = nir_vec(&b, bufcomps, nbufcomps);
1379    }
1380 
1381    unsigned bufcompsz = buftexelsz / nbufcomps;
1382 
1383    if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
1384       texel = nir_fmul(&b, texel,
1385                        nir_vec3(&b, nir_imm_float(&b, 31),
1386                                 nir_imm_float(&b, 63), nir_imm_float(&b, 31)));
1387       texel = nir_f2u16(&b, texel);
1388       texel = nir_ior(
1389          &b, nir_channel(&b, texel, 0),
1390          nir_ior(&b,
1391                  nir_ishl(&b, nir_channel(&b, texel, 1), nir_imm_int(&b, 5)),
1392                  nir_ishl(&b, nir_channel(&b, texel, 2), nir_imm_int(&b, 11))));
1393       imgcompsz = 2;
1394       bufcompsz = 2;
1395       nbufcomps = 1;
1396       nimgcomps = 1;
1397    } else if (imgcompsz == 1) {
1398       nir_def *packed = nir_channel(&b, texel, 0);
1399       for (unsigned i = 1; i < nbufcomps; i++) {
1400          packed = nir_ior(
1401             &b, packed,
1402             nir_ishl(&b, nir_iand_imm(&b, nir_channel(&b, texel, i), 0xff),
1403                      nir_imm_int(&b, i * 8)));
1404       }
1405       texel = packed;
1406 
1407       bufcompsz = nbufcomps == 3 ? 4 : nbufcomps;
1408       nbufcomps = 1;
1409    }
1410 
1411    assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
1412    assert(nbufcomps <= 4 && nimgcomps <= 4);
1413    texel = nir_u2uN(&b, texel, bufcompsz * 8);
1414 
1415    nir_store_global(&b, bufptr, bufcompsz, texel, (1 << nbufcomps) - 1);
1416    nir_pop_if(&b, NULL);
1417 
1418    struct panfrost_compile_inputs inputs = {
1419       .gpu_id = dev->physical_device->kmod.props.gpu_prod_id,
1420       .is_blit = true,
1421       .no_ubo_to_push = true,
1422    };
1423 
1424    struct util_dynarray binary;
1425 
1426    util_dynarray_init(&binary, NULL);
1427    pan_shader_preprocess(b.shader, inputs.gpu_id);
1428    GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1429 
1430    shader_info->push.count =
1431       DIV_ROUND_UP(sizeof(struct panvk_meta_copy_img2buf_info), 4);
1432 
1433    mali_ptr shader =
1434       pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1435 
1436    util_dynarray_fini(&binary);
1437    ralloc_free(b.shader);
1438 
1439    return shader;
1440 }
1441 
1442 static unsigned
panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key)1443 panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key)
1444 {
1445    for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1446       if (!memcmp(&key, &panvk_meta_copy_img2buf_fmts[i], sizeof(key)))
1447          return i;
1448    }
1449 
1450    unreachable("Invalid texel size\n");
1451 }
1452 
1453 static void
panvk_meta_copy_img2buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * buf,const struct panvk_image * img,const VkBufferImageCopy2 * region)1454 panvk_meta_copy_img2buf(struct panvk_cmd_buffer *cmdbuf,
1455                         const struct panvk_buffer *buf,
1456                         const struct panvk_image *img,
1457                         const VkBufferImageCopy2 *region)
1458 {
1459    struct panvk_meta_copy_format_info key = {
1460       .imgfmt = panvk_meta_copy_img2buf_format(img->pimage.layout.format),
1461       .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1462                                        region->imageSubresource.aspectMask),
1463    };
1464    unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1465    unsigned texdimidx = panvk_meta_copy_tex_type(
1466       img->pimage.layout.dim, img->pimage.layout.array_size > 1);
1467    unsigned fmtidx = panvk_meta_copy_img2buf_format_idx(key);
1468 
1469    mali_ptr rsd =
1470       cmdbuf->device->meta.copy.img2buf[texdimidx][fmtidx].rsd;
1471 
1472    struct panvk_meta_copy_img2buf_info info = {
1473       .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset),
1474       .buf.stride.line =
1475          (region->bufferRowLength ?: region->imageExtent.width) * buftexelsz,
1476       .img.offset.x = MAX2(region->imageOffset.x & ~15, 0),
1477       .img.extent.minx = MAX2(region->imageOffset.x, 0),
1478       .img.extent.maxx =
1479          MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0),
1480    };
1481 
1482    if (img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D) {
1483       info.img.extent.maxy = region->imageSubresource.layerCount - 1;
1484    } else {
1485       info.img.offset.y = MAX2(region->imageOffset.y & ~15, 0);
1486       info.img.offset.z = MAX2(region->imageOffset.z, 0);
1487       info.img.extent.miny = MAX2(region->imageOffset.y, 0);
1488       info.img.extent.maxy =
1489          MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1490    }
1491 
1492    info.buf.stride.surf =
1493       (region->bufferImageHeight ?: region->imageExtent.height) *
1494       info.buf.stride.line;
1495 
1496    mali_ptr pushconsts =
1497       pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1498 
1499    struct pan_image_view view = {
1500       .format = key.imgfmt,
1501       .dim = img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE
1502                 ? MALI_TEXTURE_DIMENSION_2D
1503                 : img->pimage.layout.dim,
1504       .planes[0] = &img->pimage,
1505       .nr_samples = img->pimage.layout.nr_samples,
1506       .first_level = region->imageSubresource.mipLevel,
1507       .last_level = region->imageSubresource.mipLevel,
1508       .first_layer = region->imageSubresource.baseArrayLayer,
1509       .last_layer = region->imageSubresource.baseArrayLayer +
1510                     region->imageSubresource.layerCount - 1,
1511       .swizzle = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
1512                   PIPE_SWIZZLE_W},
1513    };
1514 
1515    mali_ptr texture =
1516       panvk_meta_copy_img_emit_texture(&cmdbuf->desc_pool.base, &view);
1517    mali_ptr sampler = panvk_meta_copy_img_emit_sampler(&cmdbuf->desc_pool.base);
1518 
1519    panvk_per_arch(cmd_close_batch)(cmdbuf);
1520 
1521    struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1522 
1523    struct pan_tls_info tlsinfo = {0};
1524 
1525    batch->blit.src = img->bo;
1526    batch->blit.dst = buf->bo;
1527    batch->tls = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE);
1528    GENX(pan_emit_tls)(&tlsinfo, batch->tls.cpu);
1529 
1530    mali_ptr tsd = batch->tls.gpu;
1531 
1532    struct pan_compute_dim wg_sz = {
1533       16,
1534       img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1 : 16,
1535       1,
1536    };
1537 
1538    struct pan_compute_dim num_wg = {
1539       (ALIGN_POT(info.img.extent.maxx + 1, 16) - info.img.offset.x) / 16,
1540       img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D
1541          ? region->imageSubresource.layerCount
1542          : (ALIGN_POT(info.img.extent.maxy + 1, 16) - info.img.offset.y) / 16,
1543       img->pimage.layout.dim != MALI_TEXTURE_DIMENSION_1D
1544          ? MAX2(region->imageSubresource.layerCount, region->imageExtent.depth)
1545          : 1,
1546    };
1547 
1548    struct panfrost_ptr job = panvk_meta_copy_emit_compute_job(
1549       &cmdbuf->desc_pool.base, &batch->jc, &num_wg, &wg_sz, texture, sampler,
1550       pushconsts, rsd, tsd);
1551 
1552    util_dynarray_append(&batch->jobs, void *, job.cpu);
1553 
1554    panvk_per_arch(cmd_close_batch)(cmdbuf);
1555 }
1556 
1557 static void
panvk_meta_copy_img2buf_init(struct panvk_device * dev)1558 panvk_meta_copy_img2buf_init(struct panvk_device *dev)
1559 {
1560    STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2buf_fmts) ==
1561                  PANVK_META_COPY_IMG2BUF_NUM_FORMATS);
1562 
1563    for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1564       for (unsigned texdim = 1; texdim <= 3; texdim++) {
1565          unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
1566          assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1567 
1568          struct pan_shader_info shader_info;
1569          mali_ptr shader = panvk_meta_copy_img2buf_shader(
1570             dev, panvk_meta_copy_img2buf_fmts[i], texdim, false, &shader_info);
1571          dev->meta.copy.img2buf[texdimidx][i].rsd =
1572             panvk_meta_copy_to_buf_emit_rsd(&dev->meta.desc_pool.base, shader,
1573                                             &shader_info, true);
1574 
1575          if (texdim == 3)
1576             continue;
1577 
1578          memset(&shader_info, 0, sizeof(shader_info));
1579          texdimidx = panvk_meta_copy_tex_type(texdim, true);
1580          assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1581          shader = panvk_meta_copy_img2buf_shader(
1582             dev, panvk_meta_copy_img2buf_fmts[i], texdim, true, &shader_info);
1583          dev->meta.copy.img2buf[texdimidx][i].rsd =
1584             panvk_meta_copy_to_buf_emit_rsd(&dev->meta.desc_pool.base, shader,
1585                                             &shader_info, true);
1586       }
1587    }
1588 }
1589 
1590 void
panvk_per_arch(CmdCopyImageToBuffer2)1591 panvk_per_arch(CmdCopyImageToBuffer2)(
1592    VkCommandBuffer commandBuffer,
1593    const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1594 {
1595    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1596    VK_FROM_HANDLE(panvk_buffer, buf, pCopyImageToBufferInfo->dstBuffer);
1597    VK_FROM_HANDLE(panvk_image, img, pCopyImageToBufferInfo->srcImage);
1598 
1599    for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; i++) {
1600       panvk_meta_copy_img2buf(cmdbuf, buf, img,
1601                               &pCopyImageToBufferInfo->pRegions[i]);
1602    }
1603 }
1604 
1605 struct panvk_meta_copy_buf2buf_info {
1606    mali_ptr src;
1607    mali_ptr dst;
1608 } PACKED;
1609 
1610 #define panvk_meta_copy_buf2buf_get_info_field(b, field)                       \
1611    nir_load_push_constant(                                                     \
1612       (b), 1, sizeof(((struct panvk_meta_copy_buf2buf_info *)0)->field) * 8,   \
1613       nir_imm_int(b, 0),                                                       \
1614       .base = offsetof(struct panvk_meta_copy_buf2buf_info, field),            \
1615       .range = ~0)
1616 
1617 static mali_ptr
panvk_meta_copy_buf2buf_shader(struct panvk_device * dev,unsigned blksz,struct pan_shader_info * shader_info)1618 panvk_meta_copy_buf2buf_shader(struct panvk_device *dev,
1619                                unsigned blksz,
1620                                struct pan_shader_info *shader_info)
1621 {
1622    struct pan_pool *bin_pool = &dev->meta.bin_pool.base;
1623 
1624    /* FIXME: Won't work on compute queues, but we can't do that with
1625     * a compute shader if the destination is an AFBC surface.
1626     */
1627    nir_builder b = nir_builder_init_simple_shader(
1628       MESA_SHADER_COMPUTE, GENX(pan_shader_get_compiler_options)(),
1629       "panvk_meta_copy_buf2buf(blksz=%d)", blksz);
1630 
1631    nir_def *coord = nir_load_global_invocation_id(&b, 32);
1632 
1633    nir_def *offset = nir_u2u64(
1634       &b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, blksz)));
1635    nir_def *srcptr =
1636       nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, src), offset);
1637    nir_def *dstptr =
1638       nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, dst), offset);
1639 
1640    unsigned compsz = blksz < 4 ? blksz : 4;
1641    unsigned ncomps = blksz / compsz;
1642    nir_store_global(&b, dstptr, blksz,
1643                     nir_load_global(&b, srcptr, blksz, ncomps, compsz * 8),
1644                     (1 << ncomps) - 1);
1645 
1646    struct panfrost_compile_inputs inputs = {
1647       .gpu_id = dev->physical_device->kmod.props.gpu_prod_id,
1648       .is_blit = true,
1649       .no_ubo_to_push = true,
1650    };
1651 
1652    struct util_dynarray binary;
1653 
1654    util_dynarray_init(&binary, NULL);
1655    pan_shader_preprocess(b.shader, inputs.gpu_id);
1656    GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1657 
1658    shader_info->push.count =
1659       DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2buf_info), 4);
1660 
1661    mali_ptr shader =
1662       pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1663 
1664    util_dynarray_fini(&binary);
1665    ralloc_free(b.shader);
1666 
1667    return shader;
1668 }
1669 
1670 static void
panvk_meta_copy_buf2buf_init(struct panvk_device * dev)1671 panvk_meta_copy_buf2buf_init(struct panvk_device *dev)
1672 {
1673    for (unsigned i = 0; i < ARRAY_SIZE(dev->meta.copy.buf2buf); i++) {
1674       struct pan_shader_info shader_info;
1675       mali_ptr shader =
1676          panvk_meta_copy_buf2buf_shader(dev, 1 << i, &shader_info);
1677       dev->meta.copy.buf2buf[i].rsd = panvk_meta_copy_to_buf_emit_rsd(
1678          &dev->meta.desc_pool.base, shader, &shader_info, false);
1679    }
1680 }
1681 
1682 static void
panvk_meta_copy_buf2buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * src,const struct panvk_buffer * dst,const VkBufferCopy2 * region)1683 panvk_meta_copy_buf2buf(struct panvk_cmd_buffer *cmdbuf,
1684                         const struct panvk_buffer *src,
1685                         const struct panvk_buffer *dst,
1686                         const VkBufferCopy2 *region)
1687 {
1688    struct panvk_meta_copy_buf2buf_info info = {
1689       .src = panvk_buffer_gpu_ptr(src, region->srcOffset),
1690       .dst = panvk_buffer_gpu_ptr(dst, region->dstOffset),
1691    };
1692 
1693    unsigned alignment = ffs((info.src | info.dst | region->size) & 15);
1694    unsigned log2blksz = alignment ? alignment - 1 : 4;
1695 
1696    assert(log2blksz <
1697           ARRAY_SIZE(cmdbuf->device->meta.copy.buf2buf));
1698    mali_ptr rsd =
1699       cmdbuf->device->meta.copy.buf2buf[log2blksz].rsd;
1700 
1701    mali_ptr pushconsts =
1702       pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1703 
1704    panvk_per_arch(cmd_close_batch)(cmdbuf);
1705 
1706    struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1707 
1708    panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1709 
1710    mali_ptr tsd = batch->tls.gpu;
1711 
1712    unsigned nblocks = region->size >> log2blksz;
1713    struct pan_compute_dim num_wg = {nblocks, 1, 1};
1714    struct pan_compute_dim wg_sz = {1, 1, 1};
1715    struct panfrost_ptr job = panvk_meta_copy_emit_compute_job(
1716       &cmdbuf->desc_pool.base, &batch->jc, &num_wg, &wg_sz, 0, 0, pushconsts,
1717       rsd, tsd);
1718 
1719    util_dynarray_append(&batch->jobs, void *, job.cpu);
1720 
1721    batch->blit.src = src->bo;
1722    batch->blit.dst = dst->bo;
1723    panvk_per_arch(cmd_close_batch)(cmdbuf);
1724 }
1725 
1726 void
panvk_per_arch(CmdCopyBuffer2)1727 panvk_per_arch(CmdCopyBuffer2)(VkCommandBuffer commandBuffer,
1728                                const VkCopyBufferInfo2 *pCopyBufferInfo)
1729 {
1730    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1731    VK_FROM_HANDLE(panvk_buffer, src, pCopyBufferInfo->srcBuffer);
1732    VK_FROM_HANDLE(panvk_buffer, dst, pCopyBufferInfo->dstBuffer);
1733 
1734    for (unsigned i = 0; i < pCopyBufferInfo->regionCount; i++) {
1735       panvk_meta_copy_buf2buf(cmdbuf, src, dst, &pCopyBufferInfo->pRegions[i]);
1736    }
1737 }
1738 
1739 struct panvk_meta_fill_buf_info {
1740    mali_ptr start;
1741    uint32_t val;
1742 } PACKED;
1743 
1744 #define panvk_meta_fill_buf_get_info_field(b, field)                           \
1745    nir_load_push_constant(                                                     \
1746       (b), 1, sizeof(((struct panvk_meta_fill_buf_info *)0)->field) * 8,       \
1747       nir_imm_int(b, 0),                                                       \
1748       .base = offsetof(struct panvk_meta_fill_buf_info, field), .range = ~0)
1749 
1750 static mali_ptr
panvk_meta_fill_buf_shader(struct panvk_device * dev,struct pan_shader_info * shader_info)1751 panvk_meta_fill_buf_shader(struct panvk_device *dev,
1752                            struct pan_shader_info *shader_info)
1753 {
1754    struct pan_pool *bin_pool = &dev->meta.bin_pool.base;
1755 
1756    /* FIXME: Won't work on compute queues, but we can't do that with
1757     * a compute shader if the destination is an AFBC surface.
1758     */
1759    nir_builder b = nir_builder_init_simple_shader(
1760       MESA_SHADER_COMPUTE, GENX(pan_shader_get_compiler_options)(),
1761       "panvk_meta_fill_buf()");
1762 
1763    nir_def *coord = nir_load_global_invocation_id(&b, 32);
1764 
1765    nir_def *offset = nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0),
1766                                             nir_imm_int(&b, sizeof(uint32_t))));
1767    nir_def *ptr =
1768       nir_iadd(&b, panvk_meta_fill_buf_get_info_field(&b, start), offset);
1769    nir_def *val = panvk_meta_fill_buf_get_info_field(&b, val);
1770 
1771    nir_store_global(&b, ptr, sizeof(uint32_t), val, 1);
1772 
1773    struct panfrost_compile_inputs inputs = {
1774       .gpu_id = dev->physical_device->kmod.props.gpu_prod_id,
1775       .is_blit = true,
1776       .no_ubo_to_push = true,
1777    };
1778 
1779    struct util_dynarray binary;
1780 
1781    util_dynarray_init(&binary, NULL);
1782    pan_shader_preprocess(b.shader, inputs.gpu_id);
1783    GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1784 
1785    shader_info->push.count =
1786       DIV_ROUND_UP(sizeof(struct panvk_meta_fill_buf_info), 4);
1787 
1788    mali_ptr shader =
1789       pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1790 
1791    util_dynarray_fini(&binary);
1792    ralloc_free(b.shader);
1793 
1794    return shader;
1795 }
1796 
1797 static mali_ptr
panvk_meta_fill_buf_emit_rsd(struct panvk_device * dev)1798 panvk_meta_fill_buf_emit_rsd(struct panvk_device *dev)
1799 {
1800    struct pan_pool *desc_pool = &dev->meta.desc_pool.base;
1801    struct pan_shader_info shader_info;
1802 
1803    mali_ptr shader = panvk_meta_fill_buf_shader(dev, &shader_info);
1804 
1805    struct panfrost_ptr rsd_ptr =
1806       pan_pool_alloc_desc_aggregate(desc_pool, PAN_DESC(RENDERER_STATE));
1807 
1808    pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
1809       pan_shader_prepare_rsd(&shader_info, shader, &cfg);
1810    }
1811 
1812    return rsd_ptr.gpu;
1813 }
1814 
1815 static void
panvk_meta_fill_buf_init(struct panvk_device * dev)1816 panvk_meta_fill_buf_init(struct panvk_device *dev)
1817 {
1818    dev->meta.copy.fillbuf.rsd = panvk_meta_fill_buf_emit_rsd(dev);
1819 }
1820 
1821 static void
panvk_meta_fill_buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * dst,VkDeviceSize size,VkDeviceSize offset,uint32_t val)1822 panvk_meta_fill_buf(struct panvk_cmd_buffer *cmdbuf,
1823                     const struct panvk_buffer *dst, VkDeviceSize size,
1824                     VkDeviceSize offset, uint32_t val)
1825 {
1826    struct panvk_meta_fill_buf_info info = {
1827       .start = panvk_buffer_gpu_ptr(dst, offset),
1828       .val = val,
1829    };
1830    size = panvk_buffer_range(dst, offset, size);
1831 
1832    /* From the Vulkan spec:
1833     *
1834     *    "size is the number of bytes to fill, and must be either a multiple
1835     *    of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
1836     *    the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
1837     *    buffer is not a multiple of 4, then the nearest smaller multiple is
1838     *    used."
1839     */
1840    size &= ~3ull;
1841 
1842    assert(!(offset & 3) && !(size & 3));
1843 
1844    unsigned nwords = size / sizeof(uint32_t);
1845    mali_ptr rsd = cmdbuf->device->meta.copy.fillbuf.rsd;
1846 
1847    mali_ptr pushconsts =
1848       pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1849 
1850    panvk_per_arch(cmd_close_batch)(cmdbuf);
1851 
1852    struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1853 
1854    panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1855 
1856    mali_ptr tsd = batch->tls.gpu;
1857 
1858    struct pan_compute_dim num_wg = {nwords, 1, 1};
1859    struct pan_compute_dim wg_sz = {1, 1, 1};
1860    struct panfrost_ptr job = panvk_meta_copy_emit_compute_job(
1861       &cmdbuf->desc_pool.base, &batch->jc, &num_wg, &wg_sz, 0, 0, pushconsts,
1862       rsd, tsd);
1863 
1864    util_dynarray_append(&batch->jobs, void *, job.cpu);
1865 
1866    batch->blit.dst = dst->bo;
1867    panvk_per_arch(cmd_close_batch)(cmdbuf);
1868 }
1869 
1870 void
panvk_per_arch(CmdFillBuffer)1871 panvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer,
1872                               VkDeviceSize dstOffset, VkDeviceSize fillSize,
1873                               uint32_t data)
1874 {
1875    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1876    VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
1877 
1878    panvk_meta_fill_buf(cmdbuf, dst, fillSize, dstOffset, data);
1879 }
1880 
1881 static void
panvk_meta_update_buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * dst,VkDeviceSize offset,VkDeviceSize size,const void * data)1882 panvk_meta_update_buf(struct panvk_cmd_buffer *cmdbuf,
1883                       const struct panvk_buffer *dst, VkDeviceSize offset,
1884                       VkDeviceSize size, const void *data)
1885 {
1886    struct panvk_meta_copy_buf2buf_info info = {
1887       .src = pan_pool_upload_aligned(&cmdbuf->desc_pool.base, data, size, 4),
1888       .dst = panvk_buffer_gpu_ptr(dst, offset),
1889    };
1890 
1891    unsigned log2blksz = ffs(sizeof(uint32_t)) - 1;
1892 
1893    mali_ptr rsd =
1894       cmdbuf->device->meta.copy.buf2buf[log2blksz].rsd;
1895 
1896    mali_ptr pushconsts =
1897       pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1898 
1899    panvk_per_arch(cmd_close_batch)(cmdbuf);
1900 
1901    struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1902 
1903    panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1904 
1905    mali_ptr tsd = batch->tls.gpu;
1906 
1907    unsigned nblocks = size >> log2blksz;
1908    struct pan_compute_dim num_wg = {nblocks, 1, 1};
1909    struct pan_compute_dim wg_sz = {1, 1, 1};
1910    struct panfrost_ptr job = panvk_meta_copy_emit_compute_job(
1911       &cmdbuf->desc_pool.base, &batch->jc, &num_wg, &wg_sz, 0, 0, pushconsts,
1912       rsd, tsd);
1913 
1914    util_dynarray_append(&batch->jobs, void *, job.cpu);
1915 
1916    batch->blit.dst = dst->bo;
1917    panvk_per_arch(cmd_close_batch)(cmdbuf);
1918 }
1919 
1920 void
panvk_per_arch(CmdUpdateBuffer)1921 panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer,
1922                                 VkBuffer dstBuffer, VkDeviceSize dstOffset,
1923                                 VkDeviceSize dataSize, const void *pData)
1924 {
1925    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1926    VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
1927 
1928    panvk_meta_update_buf(cmdbuf, dst, dstOffset, dataSize, pData);
1929 }
1930 
1931 void
panvk_per_arch(meta_copy_init)1932 panvk_per_arch(meta_copy_init)(struct panvk_device *dev)
1933 {
1934    panvk_meta_copy_img2img_init(dev, false);
1935    panvk_meta_copy_img2img_init(dev, true);
1936    panvk_meta_copy_buf2img_init(dev);
1937    panvk_meta_copy_img2buf_init(dev);
1938    panvk_meta_copy_buf2buf_init(dev);
1939    panvk_meta_fill_buf_init(dev);
1940 }
1941