• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "gen_macros.h"
25 
26 #include "nir/nir_builder.h"
27 #include "pan_encoder.h"
28 #include "pan_shader.h"
29 
30 #include "panvk_private.h"
31 
32 static mali_ptr
panvk_meta_copy_img_emit_texture(struct panfrost_device * pdev,struct pan_pool * desc_pool,const struct pan_image_view * view)33 panvk_meta_copy_img_emit_texture(struct panfrost_device *pdev,
34                                  struct pan_pool *desc_pool,
35                                  const struct pan_image_view *view)
36 {
37    struct panfrost_ptr texture =
38       pan_pool_alloc_desc(desc_pool, TEXTURE);
39    size_t payload_size =
40       GENX(panfrost_estimate_texture_payload_size)(view);
41    struct panfrost_ptr surfaces =
42       pan_pool_alloc_aligned(desc_pool, payload_size,
43                              pan_alignment(SURFACE_WITH_STRIDE));
44 
45    GENX(panfrost_new_texture)(pdev, view, texture.cpu, &surfaces);
46 
47    return texture.gpu;
48 }
49 
50 static mali_ptr
panvk_meta_copy_img_emit_sampler(struct panfrost_device * pdev,struct pan_pool * desc_pool)51 panvk_meta_copy_img_emit_sampler(struct panfrost_device *pdev,
52                                  struct pan_pool *desc_pool)
53 {
54    struct panfrost_ptr sampler =
55       pan_pool_alloc_desc(desc_pool, SAMPLER);
56 
57    pan_pack(sampler.cpu, SAMPLER, cfg) {
58       cfg.seamless_cube_map = false;
59       cfg.normalized_coordinates = false;
60       cfg.minify_nearest = true;
61       cfg.magnify_nearest = true;
62    }
63 
64    return sampler.gpu;
65 }
66 
67 static void
panvk_meta_copy_emit_varying(struct pan_pool * pool,mali_ptr coordinates,mali_ptr * varying_bufs,mali_ptr * varyings)68 panvk_meta_copy_emit_varying(struct pan_pool *pool,
69                              mali_ptr coordinates,
70                              mali_ptr *varying_bufs,
71                              mali_ptr *varyings)
72 {
73    struct panfrost_ptr varying =
74       pan_pool_alloc_desc(pool, ATTRIBUTE);
75    struct panfrost_ptr varying_buffer =
76       pan_pool_alloc_desc_array(pool, 2, ATTRIBUTE_BUFFER);
77 
78    pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
79       cfg.pointer = coordinates;
80       cfg.stride = 4 * sizeof(uint32_t);
81       cfg.size = cfg.stride * 4;
82    }
83 
84    /* Bifrost needs an empty desc to mark end of prefetching */
85    pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
86             ATTRIBUTE_BUFFER, cfg);
87 
88    pan_pack(varying.cpu, ATTRIBUTE, cfg) {
89       cfg.buffer_index = 0;
90       cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw;
91    }
92 
93    *varyings = varying.gpu;
94    *varying_bufs = varying_buffer.gpu;
95 }
96 
97 static void
panvk_meta_copy_emit_dcd(struct pan_pool * pool,mali_ptr src_coords,mali_ptr dst_coords,mali_ptr texture,mali_ptr sampler,mali_ptr vpd,mali_ptr tsd,mali_ptr rsd,mali_ptr push_constants,void * out)98 panvk_meta_copy_emit_dcd(struct pan_pool *pool,
99                          mali_ptr src_coords, mali_ptr dst_coords,
100                          mali_ptr texture, mali_ptr sampler,
101                          mali_ptr vpd, mali_ptr tsd, mali_ptr rsd,
102                          mali_ptr push_constants, void *out)
103 {
104    pan_pack(out, DRAW, cfg) {
105       cfg.thread_storage = tsd;
106       cfg.state = rsd;
107       cfg.push_uniforms = push_constants;
108       cfg.position = dst_coords;
109       if (src_coords) {
110               panvk_meta_copy_emit_varying(pool, src_coords,
111                                            &cfg.varying_buffers,
112                                            &cfg.varyings);
113       }
114       cfg.viewport = vpd;
115       cfg.textures = texture;
116       cfg.samplers = sampler;
117    }
118 }
119 
120 static struct panfrost_ptr
panvk_meta_copy_emit_tiler_job(struct pan_pool * desc_pool,struct pan_scoreboard * scoreboard,mali_ptr src_coords,mali_ptr dst_coords,mali_ptr texture,mali_ptr sampler,mali_ptr push_constants,mali_ptr vpd,mali_ptr rsd,mali_ptr tsd,mali_ptr tiler)121 panvk_meta_copy_emit_tiler_job(struct pan_pool *desc_pool,
122                                struct pan_scoreboard *scoreboard,
123                                mali_ptr src_coords, mali_ptr dst_coords,
124                                mali_ptr texture, mali_ptr sampler,
125                                mali_ptr push_constants,
126                                mali_ptr vpd, mali_ptr rsd,
127                                mali_ptr tsd, mali_ptr tiler)
128 {
129    struct panfrost_ptr job =
130       pan_pool_alloc_desc(desc_pool, TILER_JOB);
131 
132    panvk_meta_copy_emit_dcd(desc_pool, src_coords, dst_coords,
133                             texture, sampler, vpd, tsd, rsd, push_constants,
134                             pan_section_ptr(job.cpu, TILER_JOB, DRAW));
135 
136    pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
137       cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
138       cfg.index_count = 4;
139       cfg.job_task_split = 6;
140    }
141 
142    pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
143       cfg.constant = 1.0f;
144    }
145 
146    void *invoc = pan_section_ptr(job.cpu,
147                                  TILER_JOB,
148                                  INVOCATION);
149    panfrost_pack_work_groups_compute(invoc, 1, 4,
150                                      1, 1, 1, 1, true, false);
151 
152    pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg);
153    pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) {
154       cfg.address = tiler;
155    }
156 
157    panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
158                     false, false, 0, 0, &job, false);
159    return job;
160 }
161 
162 static struct panfrost_ptr
panvk_meta_copy_emit_compute_job(struct pan_pool * desc_pool,struct pan_scoreboard * scoreboard,const struct pan_compute_dim * num_wg,const struct pan_compute_dim * wg_sz,mali_ptr texture,mali_ptr sampler,mali_ptr push_constants,mali_ptr rsd,mali_ptr tsd)163 panvk_meta_copy_emit_compute_job(struct pan_pool *desc_pool,
164                                  struct pan_scoreboard *scoreboard,
165                                  const struct pan_compute_dim *num_wg,
166                                  const struct pan_compute_dim *wg_sz,
167                                  mali_ptr texture, mali_ptr sampler,
168                                  mali_ptr push_constants,
169                                  mali_ptr rsd, mali_ptr tsd)
170 {
171    struct panfrost_ptr job =
172       pan_pool_alloc_desc(desc_pool, COMPUTE_JOB);
173 
174    void *invoc = pan_section_ptr(job.cpu,
175                                  COMPUTE_JOB,
176                                  INVOCATION);
177    panfrost_pack_work_groups_compute(invoc, num_wg->x, num_wg->y, num_wg->z,
178                                      wg_sz->x, wg_sz->y, wg_sz->z,
179                                      false, false);
180 
181    pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
182       cfg.job_task_split = 8;
183    }
184 
185    panvk_meta_copy_emit_dcd(desc_pool, 0, 0, texture, sampler,
186                             0, tsd, rsd, push_constants,
187                             pan_section_ptr(job.cpu, COMPUTE_JOB, DRAW));
188 
189    panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
190                     false, false, 0, 0, &job, false);
191    return job;
192 }
193 
194 
195 static uint32_t
panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize)196 panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize)
197 {
198    switch (texelsize) {
199    case 6: return MALI_RGB16UI << 12;
200    case 8: return MALI_RG32UI << 12;
201    case 12: return MALI_RGB32UI << 12;
202    case 16: return MALI_RGBA32UI << 12;
203    default: unreachable("Invalid texel size\n");
204    }
205 }
206 
207 static mali_ptr
panvk_meta_copy_to_img_emit_rsd(struct panfrost_device * pdev,struct pan_pool * desc_pool,mali_ptr shader,const struct pan_shader_info * shader_info,enum pipe_format fmt,unsigned wrmask,bool from_img)208 panvk_meta_copy_to_img_emit_rsd(struct panfrost_device *pdev,
209                                 struct pan_pool *desc_pool,
210                                 mali_ptr shader,
211                                 const struct pan_shader_info *shader_info,
212                                 enum pipe_format fmt, unsigned wrmask,
213                                 bool from_img)
214 {
215    struct panfrost_ptr rsd_ptr =
216       pan_pool_alloc_desc_aggregate(desc_pool,
217                                     PAN_DESC(RENDERER_STATE),
218                                     PAN_DESC_ARRAY(1, BLEND));
219 
220    bool raw = util_format_get_blocksize(fmt) > 4;
221    unsigned fullmask = (1 << util_format_get_nr_components(fmt)) - 1;
222    bool partialwrite = fullmask != wrmask && !raw;
223    bool readstb = fullmask != wrmask && raw;
224 
225    pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
226       pan_shader_prepare_rsd(shader_info, shader, &cfg);
227       if (from_img) {
228          cfg.shader.varying_count = 1;
229          cfg.shader.texture_count = 1;
230          cfg.shader.sampler_count = 1;
231       }
232       cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
233       cfg.multisample_misc.sample_mask = UINT16_MAX;
234       cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
235       cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
236       cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
237       cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
238       cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
239       cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
240       cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
241       cfg.stencil_front.mask = 0xFF;
242       cfg.stencil_back = cfg.stencil_front;
243 
244       cfg.properties.allow_forward_pixel_to_be_killed = true;
245       cfg.properties.allow_forward_pixel_to_kill =
246          !partialwrite && !readstb;
247       cfg.properties.zs_update_operation =
248          MALI_PIXEL_KILL_STRONG_EARLY;
249       cfg.properties.pixel_kill_operation =
250          MALI_PIXEL_KILL_FORCE_EARLY;
251    }
252 
253    pan_pack(rsd_ptr.cpu + pan_size(RENDERER_STATE), BLEND, cfg) {
254       cfg.round_to_fb_precision = true;
255       cfg.load_destination = partialwrite;
256       cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
257       cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
258       cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
259       cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
260       cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
261       cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
262       cfg.internal.mode =
263          partialwrite ?
264          MALI_BLEND_MODE_FIXED_FUNCTION :
265          MALI_BLEND_MODE_OPAQUE;
266       cfg.equation.color_mask = partialwrite ? wrmask : 0xf;
267       cfg.internal.fixed_function.num_comps = 4;
268       if (!raw) {
269          cfg.internal.fixed_function.conversion.memory_format =
270             panfrost_format_to_bifrost_blend(pdev, fmt, false);
271          cfg.internal.fixed_function.conversion.register_format =
272             MALI_REGISTER_FILE_FORMAT_F32;
273       } else {
274          unsigned imgtexelsz = util_format_get_blocksize(fmt);
275 
276          cfg.internal.fixed_function.conversion.memory_format =
277             panvk_meta_copy_img_bifrost_raw_format(imgtexelsz);
278          cfg.internal.fixed_function.conversion.register_format =
279             (imgtexelsz & 2) ?
280             MALI_REGISTER_FILE_FORMAT_U16 :
281             MALI_REGISTER_FILE_FORMAT_U32;
282       }
283    }
284 
285    return rsd_ptr.gpu;
286 }
287 
288 static mali_ptr
panvk_meta_copy_to_buf_emit_rsd(struct panfrost_device * pdev,struct pan_pool * desc_pool,mali_ptr shader,const struct pan_shader_info * shader_info,bool from_img)289 panvk_meta_copy_to_buf_emit_rsd(struct panfrost_device *pdev,
290                                 struct pan_pool *desc_pool,
291                                 mali_ptr shader,
292                                 const struct pan_shader_info *shader_info,
293                                 bool from_img)
294 {
295    struct panfrost_ptr rsd_ptr =
296       pan_pool_alloc_desc_aggregate(desc_pool,
297                                     PAN_DESC(RENDERER_STATE));
298 
299    pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
300       pan_shader_prepare_rsd(shader_info, shader, &cfg);
301       if (from_img) {
302          cfg.shader.texture_count = 1;
303          cfg.shader.sampler_count = 1;
304       }
305    }
306 
307    return rsd_ptr.gpu;
308 }
309 
310 static mali_ptr
panvk_meta_copy_img2img_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,enum pipe_format srcfmt,enum pipe_format dstfmt,unsigned dstmask,unsigned texdim,bool texisarray,bool is_ms,struct pan_shader_info * shader_info)311 panvk_meta_copy_img2img_shader(struct panfrost_device *pdev,
312                                struct pan_pool *bin_pool,
313                                enum pipe_format srcfmt,
314                                enum pipe_format dstfmt, unsigned dstmask,
315                                unsigned texdim, bool texisarray, bool is_ms,
316                                struct pan_shader_info *shader_info)
317 {
318    nir_builder b =
319       nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
320                                      GENX(pan_shader_get_compiler_options)(),
321                                      "panvk_meta_copy_img2img(srcfmt=%s,dstfmt=%s,%dD%s%s)",
322                                      util_format_name(srcfmt), util_format_name(dstfmt),
323                                      texdim, texisarray ? "[]" : "", is_ms ? ",ms" : "");
324 
325    nir_variable *coord_var =
326       nir_variable_create(b.shader, nir_var_shader_in,
327                           glsl_vector_type(GLSL_TYPE_FLOAT, texdim + texisarray),
328                           "coord");
329    coord_var->data.location = VARYING_SLOT_VAR0;
330    nir_ssa_def *coord = nir_f2u32(&b, nir_load_var(&b, coord_var));
331 
332    nir_tex_instr *tex = nir_tex_instr_create(b.shader, is_ms ? 2 : 1);
333    tex->op = is_ms ? nir_texop_txf_ms : nir_texop_txf;
334    tex->texture_index = 0;
335    tex->is_array = texisarray;
336    tex->dest_type = util_format_is_unorm(srcfmt) ?
337                     nir_type_float32 : nir_type_uint32;
338 
339    switch (texdim) {
340    case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break;
341    case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break;
342    case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break;
343    default: unreachable("Invalid texture dimension");
344    }
345 
346    tex->src[0].src_type = nir_tex_src_coord;
347    tex->src[0].src = nir_src_for_ssa(coord);
348    tex->coord_components = texdim + texisarray;
349 
350    if (is_ms) {
351       tex->src[1].src_type = nir_tex_src_ms_index;
352       tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
353    }
354 
355    nir_ssa_dest_init(&tex->instr, &tex->dest, 4,
356                      nir_alu_type_get_type_size(tex->dest_type), NULL);
357    nir_builder_instr_insert(&b, &tex->instr);
358 
359    nir_ssa_def *texel = &tex->dest.ssa;
360 
361    unsigned dstcompsz =
362       util_format_get_component_bits(dstfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
363    unsigned ndstcomps = util_format_get_nr_components(dstfmt);
364    const struct glsl_type *outtype = NULL;
365 
366    if (srcfmt == PIPE_FORMAT_R5G6B5_UNORM && dstfmt == PIPE_FORMAT_R8G8_UNORM) {
367       nir_ssa_def *rgb =
368          nir_f2u32(&b, nir_fmul(&b, texel,
369                                 nir_vec3(&b,
370                                          nir_imm_float(&b, 31),
371                                          nir_imm_float(&b, 63),
372                                          nir_imm_float(&b, 31))));
373       nir_ssa_def *rg =
374          nir_vec2(&b,
375                   nir_ior(&b, nir_channel(&b, rgb, 0),
376                           nir_ishl(&b, nir_channel(&b, rgb, 1),
377                                    nir_imm_int(&b, 5))),
378                   nir_ior(&b,
379                           nir_ushr_imm(&b, nir_channel(&b, rgb, 1), 3),
380                           nir_ishl(&b, nir_channel(&b, rgb, 2),
381                                    nir_imm_int(&b, 3))));
382       rg = nir_iand_imm(&b, rg, 255);
383       texel = nir_fmul_imm(&b, nir_u2f32(&b, rg), 1.0 / 255);
384       outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
385    } else if (srcfmt == PIPE_FORMAT_R8G8_UNORM && dstfmt == PIPE_FORMAT_R5G6B5_UNORM) {
386       nir_ssa_def *rg = nir_f2u32(&b, nir_fmul_imm(&b, texel, 255));
387       nir_ssa_def *rgb =
388          nir_vec3(&b,
389                   nir_channel(&b, rg, 0),
390                   nir_ior(&b,
391                           nir_ushr_imm(&b, nir_channel(&b, rg, 0), 5),
392                           nir_ishl(&b, nir_channel(&b, rg, 1),
393                                    nir_imm_int(&b, 3))),
394                   nir_ushr_imm(&b, nir_channel(&b, rg, 1), 3));
395       rgb = nir_iand(&b, rgb,
396                      nir_vec3(&b,
397                               nir_imm_int(&b, 31),
398                               nir_imm_int(&b, 63),
399                               nir_imm_int(&b, 31)));
400       texel = nir_fmul(&b, nir_u2f32(&b, rgb),
401                        nir_vec3(&b,
402                                 nir_imm_float(&b, 1.0 / 31),
403                                 nir_imm_float(&b, 1.0 / 63),
404                                 nir_imm_float(&b, 1.0 / 31)));
405       outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
406    } else {
407       assert(srcfmt == dstfmt);
408       enum glsl_base_type basetype;
409       if (util_format_is_unorm(dstfmt)) {
410          basetype = GLSL_TYPE_FLOAT;
411       } else if (dstcompsz == 16) {
412          basetype = GLSL_TYPE_UINT16;
413       } else {
414          assert(dstcompsz == 32);
415          basetype = GLSL_TYPE_UINT;
416       }
417 
418       if (dstcompsz == 16)
419          texel = nir_u2u16(&b, texel);
420 
421       texel = nir_channels(&b, texel, (1 << ndstcomps) - 1);
422       outtype = glsl_vector_type(basetype, ndstcomps);
423    }
424 
425    nir_variable *out =
426       nir_variable_create(b.shader, nir_var_shader_out, outtype, "out");
427    out->data.location = FRAG_RESULT_DATA0;
428 
429    unsigned fullmask = (1 << ndstcomps) - 1;
430    if (dstcompsz > 8 && dstmask != fullmask) {
431       nir_ssa_def *oldtexel = nir_load_var(&b, out);
432       nir_ssa_def *dstcomps[4];
433 
434       for (unsigned i = 0; i < ndstcomps; i++) {
435          if (dstmask & BITFIELD_BIT(i))
436             dstcomps[i] = nir_channel(&b, texel, i);
437          else
438             dstcomps[i] = nir_channel(&b, oldtexel, i);
439       }
440 
441       texel = nir_vec(&b, dstcomps, ndstcomps);
442    }
443 
444    nir_store_var(&b, out, texel, 0xff);
445 
446    struct panfrost_compile_inputs inputs = {
447       .gpu_id = pdev->gpu_id,
448       .is_blit = true,
449       .no_ubo_to_push = true,
450    };
451 
452    pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
453       cfg.memory_format = (dstcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
454       cfg.register_format = dstcompsz == 2 ?
455                             MALI_REGISTER_FILE_FORMAT_U16 :
456                             MALI_REGISTER_FILE_FORMAT_U32;
457    }
458    inputs.bifrost.static_rt_conv = true;
459 
460    struct util_dynarray binary;
461 
462    util_dynarray_init(&binary, NULL);
463    GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
464 
465    shader_info->fs.sample_shading = is_ms;
466 
467    mali_ptr shader =
468       pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
469 
470    util_dynarray_fini(&binary);
471    ralloc_free(b.shader);
472 
473    return shader;
474 }
475 
476 static enum pipe_format
panvk_meta_copy_img_format(enum pipe_format fmt)477 panvk_meta_copy_img_format(enum pipe_format fmt)
478 {
479    /* We can't use a non-compressed format when handling a tiled/AFBC
480     * compressed format because the tile size differ (4x4 blocks for
481     * compressed formats and 16x16 texels for non-compressed ones).
482     */
483    assert(!util_format_is_compressed(fmt));
484 
485    /* Pick blendable formats when we can, otherwise pick the UINT variant
486     * matching the texel size.
487     */
488    switch (util_format_get_blocksize(fmt)) {
489    case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
490    case 12: return PIPE_FORMAT_R32G32B32_UINT;
491    case 8: return PIPE_FORMAT_R32G32_UINT;
492    case 6: return PIPE_FORMAT_R16G16B16_UINT;
493    case 4: return PIPE_FORMAT_R8G8B8A8_UNORM;
494    case 2: return (fmt == PIPE_FORMAT_R5G6B5_UNORM ||
495                    fmt == PIPE_FORMAT_B5G6R5_UNORM) ?
496                   PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM;
497    case 1: return PIPE_FORMAT_R8_UNORM;
498    default: unreachable("Unsupported format\n");
499    }
500 }
501 
502 struct panvk_meta_copy_img2img_format_info {
503    enum pipe_format srcfmt;
504    enum pipe_format dstfmt;
505    unsigned dstmask;
506 } PACKED;
507 
508 static const struct panvk_meta_copy_img2img_format_info panvk_meta_copy_img2img_fmts[] = {
509    { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R8_UNORM, 0x1},
510    { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
511    { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
512    { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
513    { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
514    /* Z24S8(depth) */
515    { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 },
516    /* Z24S8(stencil) */
517    { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 },
518    { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0xf },
519    { PIPE_FORMAT_R16G16B16_UINT, PIPE_FORMAT_R16G16B16_UINT, 0x7 },
520    { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x3 },
521    /* Z32S8X24(depth) */
522    { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x1 },
523    /* Z32S8X24(stencil) */
524    { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x2 },
525    { PIPE_FORMAT_R32G32B32_UINT, PIPE_FORMAT_R32G32B32_UINT, 0x7 },
526    { PIPE_FORMAT_R32G32B32A32_UINT, PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
527 };
528 
529 static unsigned
panvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key)530 panvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key)
531 {
532    STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
533 
534    for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
535       if (!memcmp(&key, &panvk_meta_copy_img2img_fmts[i], sizeof(key)))
536          return i;
537    }
538 
539    unreachable("Invalid image format\n");
540 }
541 
542 static unsigned
panvk_meta_copy_img_mask(enum pipe_format imgfmt,VkImageAspectFlags aspectMask)543 panvk_meta_copy_img_mask(enum pipe_format imgfmt, VkImageAspectFlags aspectMask)
544 {
545    if (aspectMask != VK_IMAGE_ASPECT_DEPTH_BIT &&
546        aspectMask != VK_IMAGE_ASPECT_STENCIL_BIT) {
547       enum pipe_format outfmt = panvk_meta_copy_img_format(imgfmt);
548 
549       return (1 << util_format_get_nr_components(outfmt)) - 1;
550    }
551 
552    switch (imgfmt) {
553    case PIPE_FORMAT_S8_UINT:
554       return 1;
555    case PIPE_FORMAT_Z16_UNORM:
556       return 3;
557    case PIPE_FORMAT_Z16_UNORM_S8_UINT:
558       return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 3 : 8;
559    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
560       return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 7 : 8;
561    case PIPE_FORMAT_Z24X8_UNORM:
562       assert(aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT);
563       return 7;
564    case PIPE_FORMAT_Z32_FLOAT:
565       return 0xf;
566    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
567       return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 2;
568    default:
569       unreachable("Invalid depth format\n");
570    }
571 }
572 
573 static void
panvk_meta_copy_img2img(struct panvk_cmd_buffer * cmdbuf,const struct panvk_image * src,const struct panvk_image * dst,const VkImageCopy2 * region)574 panvk_meta_copy_img2img(struct panvk_cmd_buffer *cmdbuf,
575                         const struct panvk_image *src,
576                         const struct panvk_image *dst,
577                         const VkImageCopy2 *region)
578 {
579    struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
580    struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
581    struct panvk_meta_copy_img2img_format_info key = {
582       .srcfmt = panvk_meta_copy_img_format(src->pimage.layout.format),
583       .dstfmt = panvk_meta_copy_img_format(dst->pimage.layout.format),
584       .dstmask = panvk_meta_copy_img_mask(dst->pimage.layout.format,
585                                           region->dstSubresource.aspectMask),
586    };
587 
588    assert(src->pimage.layout.nr_samples == dst->pimage.layout.nr_samples);
589 
590    unsigned texdimidx =
591       panvk_meta_copy_tex_type(src->pimage.layout.dim,
592                                src->pimage.layout.array_size > 1);
593    unsigned fmtidx =
594       panvk_meta_copy_img2img_format_idx(key);
595    unsigned ms = dst->pimage.layout.nr_samples > 1 ? 1 : 0;
596 
597    mali_ptr rsd =
598       cmdbuf->device->physical_device->meta.copy.img2img[ms][texdimidx][fmtidx].rsd;
599 
600    struct pan_image_view srcview = {
601       .format = key.srcfmt,
602       .dim = src->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
603              MALI_TEXTURE_DIMENSION_2D : src->pimage.layout.dim,
604       .image = &src->pimage,
605       .nr_samples = src->pimage.layout.nr_samples,
606       .first_level = region->srcSubresource.mipLevel,
607       .last_level = region->srcSubresource.mipLevel,
608       .first_layer = region->srcSubresource.baseArrayLayer,
609       .last_layer = region->srcSubresource.baseArrayLayer + region->srcSubresource.layerCount - 1,
610       .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
611    };
612 
613    struct pan_image_view dstview = {
614       .format = key.dstfmt,
615       .dim = MALI_TEXTURE_DIMENSION_2D,
616       .image = &dst->pimage,
617       .nr_samples = dst->pimage.layout.nr_samples,
618       .first_level = region->dstSubresource.mipLevel,
619       .last_level = region->dstSubresource.mipLevel,
620       .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
621    };
622 
623    unsigned minx = MAX2(region->dstOffset.x, 0);
624    unsigned miny = MAX2(region->dstOffset.y, 0);
625    unsigned maxx = MAX2(region->dstOffset.x + region->extent.width - 1, 0);
626    unsigned maxy = MAX2(region->dstOffset.y + region->extent.height - 1, 0);
627 
628    mali_ptr vpd =
629       panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
630                                          minx, miny, maxx, maxy);
631 
632    float dst_rect[] = {
633       minx, miny, 0.0, 1.0,
634       maxx + 1, miny, 0.0, 1.0,
635       minx, maxy + 1, 0.0, 1.0,
636       maxx + 1, maxy + 1, 0.0, 1.0,
637    };
638 
639    mali_ptr dst_coords =
640       pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect,
641                               sizeof(dst_rect), 64);
642 
643    /* TODO: don't force preloads of dst resources if unneeded */
644 
645    unsigned width = u_minify(dst->pimage.layout.width, region->dstSubresource.mipLevel);
646    unsigned height = u_minify(dst->pimage.layout.height, region->dstSubresource.mipLevel);
647    cmdbuf->state.fb.crc_valid[0] = false;
648    *fbinfo = (struct pan_fb_info){
649       .width = width,
650       .height = height,
651       .extent.minx = minx & ~31,
652       .extent.miny = miny & ~31,
653       .extent.maxx = MIN2(ALIGN_POT(maxx + 1, 32), width) - 1,
654       .extent.maxy = MIN2(ALIGN_POT(maxy + 1, 32), height) - 1,
655       .nr_samples = dst->pimage.layout.nr_samples,
656       .rt_count = 1,
657       .rts[0].view = &dstview,
658       .rts[0].preload = true,
659       .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
660    };
661 
662    mali_ptr texture =
663       panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &srcview);
664    mali_ptr sampler =
665       panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base);
666 
667    panvk_per_arch(cmd_close_batch)(cmdbuf);
668 
669    minx = MAX2(region->srcOffset.x, 0);
670    miny = MAX2(region->srcOffset.y, 0);
671    maxx = MAX2(region->srcOffset.x + region->extent.width - 1, 0);
672    maxy = MAX2(region->srcOffset.y + region->extent.height - 1, 0);
673    assert(region->dstOffset.z >= 0);
674 
675    unsigned first_src_layer = MAX2(0, region->srcOffset.z);
676    unsigned first_dst_layer = MAX2(region->dstSubresource.baseArrayLayer, region->dstOffset.z);
677    unsigned nlayers = MAX2(region->dstSubresource.layerCount, region->extent.depth);
678    for (unsigned l = 0; l < nlayers; l++) {
679       unsigned src_l = l + first_src_layer;
680       float src_rect[] = {
681          minx, miny, src_l, 1.0,
682          maxx + 1, miny, src_l, 1.0,
683          minx, maxy + 1, src_l, 1.0,
684          maxx + 1, maxy + 1, src_l, 1.0,
685       };
686 
687       mali_ptr src_coords =
688          pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect,
689                                  sizeof(src_rect), 64);
690 
691       struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
692 
693       dstview.first_layer = dstview.last_layer = l + first_dst_layer;
694       batch->blit.src = src->pimage.data.bo;
695       batch->blit.dst = dst->pimage.data.bo;
696       panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
697       panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
698       panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
699 
700       mali_ptr tsd, tiler;
701 
702       tsd = batch->tls.gpu;
703       tiler = batch->tiler.descs.gpu;
704 
705       struct panfrost_ptr job;
706 
707       job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base,
708                                            &batch->scoreboard,
709                                            src_coords, dst_coords,
710                                            texture, sampler, 0,
711                                            vpd, rsd, tsd, tiler);
712 
713       util_dynarray_append(&batch->jobs, void *, job.cpu);
714       panvk_per_arch(cmd_close_batch)(cmdbuf);
715    }
716 }
717 
718 static void
panvk_meta_copy_img2img_init(struct panvk_physical_device * dev,bool is_ms)719 panvk_meta_copy_img2img_init(struct panvk_physical_device *dev, bool is_ms)
720 {
721    STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
722 
723    for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
724       for (unsigned texdim = 1; texdim <= 3; texdim++) {
725          unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
726          assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
727 
728          /* No MSAA on 3D textures */
729          if (texdim == 3 && is_ms) continue;
730 
731          struct pan_shader_info shader_info;
732          mali_ptr shader =
733             panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
734                                            panvk_meta_copy_img2img_fmts[i].srcfmt,
735                                            panvk_meta_copy_img2img_fmts[i].dstfmt,
736                                            panvk_meta_copy_img2img_fmts[i].dstmask,
737                                            texdim, false, is_ms, &shader_info);
738          dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
739             panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
740                                             shader, &shader_info,
741                                             panvk_meta_copy_img2img_fmts[i].dstfmt,
742                                             panvk_meta_copy_img2img_fmts[i].dstmask,
743                                             true);
744          if (texdim == 3)
745             continue;
746 
747          memset(&shader_info, 0, sizeof(shader_info));
748          texdimidx = panvk_meta_copy_tex_type(texdim, true);
749          assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
750          shader =
751             panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
752                                            panvk_meta_copy_img2img_fmts[i].srcfmt,
753                                            panvk_meta_copy_img2img_fmts[i].dstfmt,
754                                            panvk_meta_copy_img2img_fmts[i].dstmask,
755                                            texdim, true, is_ms, &shader_info);
756          dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
757             panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
758                                             shader, &shader_info,
759                                             panvk_meta_copy_img2img_fmts[i].dstfmt,
760                                             panvk_meta_copy_img2img_fmts[i].dstmask,
761                                             true);
762       }
763    }
764 }
765 
766 void
panvk_per_arch(CmdCopyImage2)767 panvk_per_arch(CmdCopyImage2)(VkCommandBuffer commandBuffer,
768                               const VkCopyImageInfo2 *pCopyImageInfo)
769 {
770    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
771    VK_FROM_HANDLE(panvk_image, dst, pCopyImageInfo->dstImage);
772    VK_FROM_HANDLE(panvk_image, src, pCopyImageInfo->srcImage);
773 
774    for (unsigned i = 0; i < pCopyImageInfo->regionCount; i++) {
775       panvk_meta_copy_img2img(cmdbuf, src, dst, &pCopyImageInfo->pRegions[i]);
776    }
777 }
778 
779 static unsigned
panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt,unsigned mask)780 panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt, unsigned mask)
781 {
782    unsigned imgtexelsz = util_format_get_blocksize(imgfmt);
783    unsigned nbufcomps = util_bitcount(mask);
784 
785    if (nbufcomps == util_format_get_nr_components(imgfmt))
786       return imgtexelsz;
787 
788    /* Special case for Z24 buffers which are not tightly packed */
789    if (mask == 7 && imgtexelsz == 4)
790       return 4;
791 
792    /* Special case for S8 extraction from Z32_S8X24 */
793    if (mask == 2 && imgtexelsz == 8)
794       return 1;
795 
796    unsigned compsz =
797       util_format_get_component_bits(imgfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
798 
799    assert(!(compsz % 8));
800 
801    return nbufcomps * compsz / 8;
802 }
803 
804 static enum pipe_format
panvk_meta_copy_buf2img_format(enum pipe_format imgfmt)805 panvk_meta_copy_buf2img_format(enum pipe_format imgfmt)
806 {
807    /* Pick blendable formats when we can, and the FLOAT variant matching the
808     * texelsize otherwise.
809     */
810    switch (util_format_get_blocksize(imgfmt)) {
811    case 1: return PIPE_FORMAT_R8_UNORM;
812    /* AFBC stores things differently for RGB565,
813     * we can't simply map to R8G8 in that case */
814    case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
815                    imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ?
816                   PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM;
817    case 4: return PIPE_FORMAT_R8G8B8A8_UNORM;
818    case 6: return PIPE_FORMAT_R16G16B16_UINT;
819    case 8: return PIPE_FORMAT_R32G32_UINT;
820    case 12: return PIPE_FORMAT_R32G32B32_UINT;
821    case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
822    default: unreachable("Invalid format\n");
823    }
824 }
825 
826 struct panvk_meta_copy_format_info {
827    enum pipe_format imgfmt;
828    unsigned mask;
829 } PACKED;
830 
831 static const struct panvk_meta_copy_format_info panvk_meta_copy_buf2img_fmts[] = {
832    { PIPE_FORMAT_R8_UNORM, 0x1 },
833    { PIPE_FORMAT_R8G8_UNORM, 0x3 },
834    { PIPE_FORMAT_R5G6B5_UNORM, 0x7 },
835    { PIPE_FORMAT_R8G8B8A8_UNORM, 0xf },
836    { PIPE_FORMAT_R16G16B16_UINT, 0x7 },
837    { PIPE_FORMAT_R32G32_UINT, 0x3 },
838    { PIPE_FORMAT_R32G32B32_UINT, 0x7 },
839    { PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
840    /* S8 -> Z24S8 */
841    { PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 },
842    /* S8 -> Z32_S8X24 */
843    { PIPE_FORMAT_R32G32_UINT, 0x2 },
844    /* Z24X8 -> Z24S8 */
845    { PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 },
846    /* Z32 -> Z32_S8X24 */
847    { PIPE_FORMAT_R32G32_UINT, 0x1 },
848 };
849 
850 struct panvk_meta_copy_buf2img_info {
851    struct {
852       mali_ptr ptr;
853       struct {
854          unsigned line;
855          unsigned surf;
856       } stride;
857    } buf;
858 } PACKED;
859 
860 #define panvk_meta_copy_buf2img_get_info_field(b, field) \
861         nir_load_push_constant((b), 1, \
862                      sizeof(((struct panvk_meta_copy_buf2img_info *)0)->field) * 8, \
863                      nir_imm_int(b, 0), \
864                      .base = offsetof(struct panvk_meta_copy_buf2img_info, field), \
865                      .range = ~0)
866 
867 static mali_ptr
panvk_meta_copy_buf2img_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct panvk_meta_copy_format_info key,struct pan_shader_info * shader_info)868 panvk_meta_copy_buf2img_shader(struct panfrost_device *pdev,
869                                struct pan_pool *bin_pool,
870                                struct panvk_meta_copy_format_info key,
871                                struct pan_shader_info *shader_info)
872 {
873    nir_builder b =
874       nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
875                                      GENX(pan_shader_get_compiler_options)(),
876                                      "panvk_meta_copy_buf2img(imgfmt=%s,mask=%x)",
877                                      util_format_name(key.imgfmt),
878                                      key.mask);
879 
880    nir_variable *coord_var =
881       nir_variable_create(b.shader, nir_var_shader_in,
882                           glsl_vector_type(GLSL_TYPE_FLOAT, 3),
883                           "coord");
884    coord_var->data.location = VARYING_SLOT_VAR0;
885    nir_ssa_def *coord = nir_load_var(&b, coord_var);
886 
887    coord = nir_f2u32(&b, coord);
888 
889    nir_ssa_def *bufptr =
890       panvk_meta_copy_buf2img_get_info_field(&b, buf.ptr);
891    nir_ssa_def *buflinestride =
892       panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.line);
893    nir_ssa_def *bufsurfstride =
894       panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.surf);
895 
896    unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
897    unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
898    unsigned writemask = key.mask;
899 
900    nir_ssa_def *offset =
901       nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
902    offset = nir_iadd(&b, offset,
903                      nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
904    offset = nir_iadd(&b, offset,
905                      nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
906    bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
907 
908    unsigned imgcompsz =
909       (imgtexelsz <= 4 && key.imgfmt != PIPE_FORMAT_R5G6B5_UNORM) ?
910       1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
911 
912    unsigned nimgcomps = imgtexelsz / imgcompsz;
913    unsigned bufcompsz = MIN2(buftexelsz, imgcompsz);
914    unsigned nbufcomps = buftexelsz / bufcompsz;
915 
916    assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
917    assert(nbufcomps <= 4 && nimgcomps <= 4);
918 
919    nir_ssa_def *texel =
920       nir_load_global(&b, bufptr, bufcompsz, nbufcomps, bufcompsz * 8);
921 
922    enum glsl_base_type basetype;
923    if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
924       texel = nir_vec3(&b,
925                        nir_iand_imm(&b, texel, BITFIELD_MASK(5)),
926                        nir_iand_imm(&b, nir_ushr_imm(&b, texel, 5), BITFIELD_MASK(6)),
927                        nir_iand_imm(&b, nir_ushr_imm(&b, texel, 11), BITFIELD_MASK(5)));
928       texel = nir_fmul(&b,
929                        nir_u2f32(&b, texel),
930                        nir_vec3(&b,
931                                 nir_imm_float(&b, 1.0f / 31),
932                                 nir_imm_float(&b, 1.0f / 63),
933                                 nir_imm_float(&b, 1.0f / 31)));
934       nimgcomps = 3;
935       basetype = GLSL_TYPE_FLOAT;
936    } else if (imgcompsz == 1) {
937       assert(bufcompsz == 1);
938       /* Blendable formats are unorm and the fixed-function blend unit
939        * takes float values.
940        */
941       texel = nir_fmul(&b, nir_u2f32(&b, texel),
942                        nir_imm_float(&b, 1.0f / 255));
943       basetype = GLSL_TYPE_FLOAT;
944    } else {
945       texel = nir_u2uN(&b, texel, imgcompsz * 8);
946       basetype = imgcompsz == 2 ? GLSL_TYPE_UINT16 : GLSL_TYPE_UINT;
947    }
948 
949    /* We always pass the texel using 32-bit regs for now */
950    nir_variable *out =
951       nir_variable_create(b.shader, nir_var_shader_out,
952                           glsl_vector_type(basetype, nimgcomps),
953                           "out");
954    out->data.location = FRAG_RESULT_DATA0;
955 
956    uint16_t fullmask = (1 << nimgcomps) - 1;
957 
958    assert(fullmask >= writemask);
959 
960    if (fullmask != writemask) {
961       unsigned first_written_comp = ffs(writemask) - 1;
962       nir_ssa_def *oldtexel = NULL;
963       if (imgcompsz > 1)
964          oldtexel = nir_load_var(&b, out);
965 
966       nir_ssa_def *texel_comps[4];
967       for (unsigned i = 0; i < nimgcomps; i++) {
968          if (writemask & BITFIELD_BIT(i))
969             texel_comps[i] = nir_channel(&b, texel, i - first_written_comp);
970          else if (imgcompsz > 1)
971             texel_comps[i] = nir_channel(&b, oldtexel, i);
972          else
973             texel_comps[i] = nir_imm_intN_t(&b, 0, texel->bit_size);
974       }
975 
976       texel = nir_vec(&b, texel_comps, nimgcomps);
977    }
978 
979    nir_store_var(&b, out, texel, 0xff);
980 
981    struct panfrost_compile_inputs inputs = {
982       .gpu_id = pdev->gpu_id,
983       .is_blit = true,
984       .no_ubo_to_push = true,
985    };
986 
987    pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
988       cfg.memory_format = (imgcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
989       cfg.register_format = imgcompsz == 2 ?
990                             MALI_REGISTER_FILE_FORMAT_U16 :
991                             MALI_REGISTER_FILE_FORMAT_U32;
992    }
993    inputs.bifrost.static_rt_conv = true;
994 
995    struct util_dynarray binary;
996 
997    util_dynarray_init(&binary, NULL);
998    GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
999    shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2img_info), 4);
1000 
1001    mali_ptr shader =
1002       pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1003 
1004    util_dynarray_fini(&binary);
1005    ralloc_free(b.shader);
1006 
1007    return shader;
1008 }
1009 
1010 static unsigned
panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key)1011 panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key)
1012 {
1013    for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
1014       if (!memcmp(&key, &panvk_meta_copy_buf2img_fmts[i], sizeof(key)))
1015          return i;
1016    }
1017 
1018    unreachable("Invalid image format\n");
1019 }
1020 
1021 static void
panvk_meta_copy_buf2img(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * buf,const struct panvk_image * img,const VkBufferImageCopy2 * region)1022 panvk_meta_copy_buf2img(struct panvk_cmd_buffer *cmdbuf,
1023                         const struct panvk_buffer *buf,
1024                         const struct panvk_image *img,
1025                         const VkBufferImageCopy2 *region)
1026 {
1027    struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
1028    unsigned minx = MAX2(region->imageOffset.x, 0);
1029    unsigned miny = MAX2(region->imageOffset.y, 0);
1030    unsigned maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0);
1031    unsigned maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1032 
1033    mali_ptr vpd =
1034       panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
1035                                          minx, miny, maxx, maxy);
1036 
1037    float dst_rect[] = {
1038       minx, miny, 0.0, 1.0,
1039       maxx + 1, miny, 0.0, 1.0,
1040       minx, maxy + 1, 0.0, 1.0,
1041       maxx + 1, maxy + 1, 0.0, 1.0,
1042    };
1043    mali_ptr dst_coords =
1044       pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect,
1045                               sizeof(dst_rect), 64);
1046 
1047    struct panvk_meta_copy_format_info key = {
1048       .imgfmt = panvk_meta_copy_buf2img_format(img->pimage.layout.format),
1049       .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1050                                        region->imageSubresource.aspectMask),
1051    };
1052 
1053    unsigned fmtidx = panvk_meta_copy_buf2img_format_idx(key);
1054 
1055    mali_ptr rsd =
1056       cmdbuf->device->physical_device->meta.copy.buf2img[fmtidx].rsd;
1057 
1058    const struct vk_image_buffer_layout buflayout =
1059       vk_image_buffer_copy_layout(&img->vk, region);
1060    struct panvk_meta_copy_buf2img_info info = {
1061       .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset),
1062       .buf.stride.line = buflayout.row_stride_B,
1063       .buf.stride.surf = buflayout.image_stride_B,
1064    };
1065 
1066    mali_ptr pushconsts =
1067       pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1068 
1069    struct pan_image_view view = {
1070       .format = key.imgfmt,
1071       .dim = MALI_TEXTURE_DIMENSION_2D,
1072       .image = &img->pimage,
1073       .nr_samples = img->pimage.layout.nr_samples,
1074       .first_level = region->imageSubresource.mipLevel,
1075       .last_level = region->imageSubresource.mipLevel,
1076       .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
1077    };
1078 
1079    /* TODO: don't force preloads of dst resources if unneeded */
1080    cmdbuf->state.fb.crc_valid[0] = false;
1081    *fbinfo = (struct pan_fb_info){
1082       .width = u_minify(img->pimage.layout.width, region->imageSubresource.mipLevel),
1083       .height = u_minify(img->pimage.layout.height, region->imageSubresource.mipLevel),
1084       .extent.minx = minx,
1085       .extent.maxx = maxx,
1086       .extent.miny = miny,
1087       .extent.maxy = maxy,
1088       .nr_samples = 1,
1089       .rt_count = 1,
1090       .rts[0].view = &view,
1091       .rts[0].preload = true,
1092       .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
1093    };
1094 
1095    panvk_per_arch(cmd_close_batch)(cmdbuf);
1096 
1097    assert(region->imageSubresource.layerCount == 1 ||
1098           region->imageExtent.depth == 1);
1099    assert(region->imageOffset.z >= 0);
1100    unsigned first_layer = MAX2(region->imageSubresource.baseArrayLayer, region->imageOffset.z);
1101    unsigned nlayers = MAX2(region->imageSubresource.layerCount, region->imageExtent.depth);
1102    for (unsigned l = 0; l < nlayers; l++) {
1103       float src_rect[] = {
1104          0, 0, l, 1.0,
1105          region->imageExtent.width, 0, l, 1.0,
1106          0, region->imageExtent.height, l, 1.0,
1107          region->imageExtent.width, region->imageExtent.height, l, 1.0,
1108       };
1109 
1110       mali_ptr src_coords =
1111          pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect,
1112                                  sizeof(src_rect), 64);
1113 
1114       struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1115 
1116       view.first_layer = view.last_layer = l + first_layer;
1117       batch->blit.src = buf->bo;
1118       batch->blit.dst = img->pimage.data.bo;
1119       panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
1120       panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
1121       panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
1122 
1123       mali_ptr tsd, tiler;
1124 
1125       tsd = batch->tls.gpu;
1126       tiler = batch->tiler.descs.gpu;
1127 
1128       struct panfrost_ptr job;
1129 
1130       job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base,
1131                                            &batch->scoreboard,
1132                                            src_coords, dst_coords,
1133                                            0, 0, pushconsts,
1134                                            vpd, rsd, tsd, tiler);
1135 
1136       util_dynarray_append(&batch->jobs, void *, job.cpu);
1137       panvk_per_arch(cmd_close_batch)(cmdbuf);
1138    }
1139 }
1140 
1141 static void
panvk_meta_copy_buf2img_init(struct panvk_physical_device * dev)1142 panvk_meta_copy_buf2img_init(struct panvk_physical_device *dev)
1143 {
1144    STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_buf2img_fmts) == PANVK_META_COPY_BUF2IMG_NUM_FORMATS);
1145 
1146    for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
1147       struct pan_shader_info shader_info;
1148       mali_ptr shader =
1149          panvk_meta_copy_buf2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
1150                                         panvk_meta_copy_buf2img_fmts[i],
1151                                         &shader_info);
1152       dev->meta.copy.buf2img[i].rsd =
1153          panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
1154                                          shader, &shader_info,
1155                                          panvk_meta_copy_buf2img_fmts[i].imgfmt,
1156                                          panvk_meta_copy_buf2img_fmts[i].mask,
1157                                          false);
1158    }
1159 }
1160 
1161 void
panvk_per_arch(CmdCopyBufferToImage2)1162 panvk_per_arch(CmdCopyBufferToImage2)(VkCommandBuffer commandBuffer,
1163                                       const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
1164 {
1165    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1166    VK_FROM_HANDLE(panvk_buffer, buf, pCopyBufferToImageInfo->srcBuffer);
1167    VK_FROM_HANDLE(panvk_image, img, pCopyBufferToImageInfo->dstImage);
1168 
1169    for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; i++) {
1170       panvk_meta_copy_buf2img(cmdbuf, buf, img, &pCopyBufferToImageInfo->pRegions[i]);
1171    }
1172 }
1173 
1174 static const struct panvk_meta_copy_format_info panvk_meta_copy_img2buf_fmts[] = {
1175    { PIPE_FORMAT_R8_UINT, 0x1 },
1176    { PIPE_FORMAT_R8G8_UINT, 0x3 },
1177    { PIPE_FORMAT_R5G6B5_UNORM, 0x7 },
1178    { PIPE_FORMAT_R8G8B8A8_UINT, 0xf },
1179    { PIPE_FORMAT_R16G16B16_UINT, 0x7 },
1180    { PIPE_FORMAT_R32G32_UINT, 0x3 },
1181    { PIPE_FORMAT_R32G32B32_UINT, 0x7 },
1182    { PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
1183    /* S8 -> Z24S8 */
1184    { PIPE_FORMAT_R8G8B8A8_UINT, 0x8 },
1185    /* S8 -> Z32_S8X24 */
1186    { PIPE_FORMAT_R32G32_UINT, 0x2 },
1187    /* Z24X8 -> Z24S8 */
1188    { PIPE_FORMAT_R8G8B8A8_UINT, 0x7 },
1189    /* Z32 -> Z32_S8X24 */
1190    { PIPE_FORMAT_R32G32_UINT, 0x1 },
1191 };
1192 
1193 static enum pipe_format
panvk_meta_copy_img2buf_format(enum pipe_format imgfmt)1194 panvk_meta_copy_img2buf_format(enum pipe_format imgfmt)
1195 {
1196    /* Pick blendable formats when we can, and the FLOAT variant matching the
1197     * texelsize otherwise.
1198     */
1199    switch (util_format_get_blocksize(imgfmt)) {
1200    case 1: return PIPE_FORMAT_R8_UINT;
1201    /* AFBC stores things differently for RGB565,
1202     * we can't simply map to R8G8 in that case */
1203    case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
1204                    imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ?
1205                   PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UINT;
1206    case 4: return PIPE_FORMAT_R8G8B8A8_UINT;
1207    case 6: return PIPE_FORMAT_R16G16B16_UINT;
1208    case 8: return PIPE_FORMAT_R32G32_UINT;
1209    case 12: return PIPE_FORMAT_R32G32B32_UINT;
1210    case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
1211    default: unreachable("Invalid format\n");
1212    }
1213 }
1214 
1215 struct panvk_meta_copy_img2buf_info {
1216    struct {
1217       mali_ptr ptr;
1218       struct {
1219          unsigned line;
1220          unsigned surf;
1221       } stride;
1222    } buf;
1223    struct {
1224       struct {
1225          unsigned x, y, z;
1226       } offset;
1227       struct {
1228          unsigned minx, miny, maxx, maxy;
1229       } extent;
1230    } img;
1231 } PACKED;
1232 
1233 #define panvk_meta_copy_img2buf_get_info_field(b, field) \
1234         nir_load_push_constant((b), 1, \
1235                      sizeof(((struct panvk_meta_copy_img2buf_info *)0)->field) * 8, \
1236                      nir_imm_int(b, 0), \
1237                      .base = offsetof(struct panvk_meta_copy_img2buf_info, field), \
1238                      .range = ~0)
1239 
1240 static mali_ptr
panvk_meta_copy_img2buf_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct panvk_meta_copy_format_info key,unsigned texdim,unsigned texisarray,struct pan_shader_info * shader_info)1241 panvk_meta_copy_img2buf_shader(struct panfrost_device *pdev,
1242                                struct pan_pool *bin_pool,
1243                                struct panvk_meta_copy_format_info key,
1244                                unsigned texdim, unsigned texisarray,
1245                                struct pan_shader_info *shader_info)
1246 {
1247    unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
1248    unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1249 
1250    /* FIXME: Won't work on compute queues, but we can't do that with
1251     * a compute shader if the destination is an AFBC surface.
1252     */
1253    nir_builder b =
1254       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1255                                      GENX(pan_shader_get_compiler_options)(),
1256                                      "panvk_meta_copy_img2buf(dim=%dD%s,imgfmt=%s,mask=%x)",
1257                                      texdim, texisarray ? "[]" : "",
1258                                      util_format_name(key.imgfmt),
1259                                      key.mask);
1260 
1261    nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1262    nir_ssa_def *bufptr =
1263       panvk_meta_copy_img2buf_get_info_field(&b, buf.ptr);
1264    nir_ssa_def *buflinestride =
1265       panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.line);
1266    nir_ssa_def *bufsurfstride =
1267       panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.surf);
1268 
1269    nir_ssa_def *imgminx =
1270       panvk_meta_copy_img2buf_get_info_field(&b, img.extent.minx);
1271    nir_ssa_def *imgminy =
1272       panvk_meta_copy_img2buf_get_info_field(&b, img.extent.miny);
1273    nir_ssa_def *imgmaxx =
1274       panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxx);
1275    nir_ssa_def *imgmaxy =
1276       panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxy);
1277 
1278    nir_ssa_def *imgcoords, *inbounds;
1279 
1280    switch (texdim + texisarray) {
1281    case 1:
1282       imgcoords =
1283          nir_iadd(&b,
1284                   nir_channel(&b, coord, 0),
1285                   panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x));
1286       inbounds =
1287          nir_iand(&b,
1288                   nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1289                   nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx));
1290       break;
1291    case 2:
1292       imgcoords =
1293          nir_vec2(&b,
1294                   nir_iadd(&b,
1295                            nir_channel(&b, coord, 0),
1296                            panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1297                   nir_iadd(&b,
1298                            nir_channel(&b, coord, 1),
1299                            panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1300       inbounds =
1301          nir_iand(&b,
1302                   nir_iand(&b,
1303                            nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1304                            nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1305                   nir_iand(&b,
1306                            nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1307                            nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1308       break;
1309    case 3:
1310       imgcoords =
1311          nir_vec3(&b,
1312                   nir_iadd(&b,
1313                            nir_channel(&b, coord, 0),
1314                            panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1315                   nir_iadd(&b,
1316                            nir_channel(&b, coord, 1),
1317                            panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)),
1318                   nir_iadd(&b,
1319                            nir_channel(&b, coord, 2),
1320                            panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1321       inbounds =
1322          nir_iand(&b,
1323                   nir_iand(&b,
1324                            nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1325                            nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1326                   nir_iand(&b,
1327                            nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1328                            nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1329       break;
1330    default:
1331       unreachable("Invalid texture dimension\n");
1332    }
1333 
1334    nir_push_if(&b, inbounds);
1335 
1336    /* FIXME: doesn't work for tiled+compressed formats since blocks are 4x4
1337     * blocks instead of 16x16 texels in that case, and there's nothing we can
1338     * do to force the tile size to 4x4 in the render path.
1339     * This being said, compressed textures are not compatible with AFBC, so we
1340     * could use a compute shader arranging the blocks properly.
1341     */
1342    nir_ssa_def *offset =
1343       nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
1344    offset = nir_iadd(&b, offset,
1345                      nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
1346    offset = nir_iadd(&b, offset,
1347                      nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
1348    bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
1349 
1350    unsigned imgcompsz = imgtexelsz <= 4 ?
1351                         1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
1352    unsigned nimgcomps = imgtexelsz / imgcompsz;
1353    assert(nimgcomps <= 4);
1354 
1355    nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
1356    tex->op = nir_texop_txf;
1357    tex->texture_index = 0;
1358    tex->is_array = texisarray;
1359    tex->dest_type = util_format_is_unorm(key.imgfmt) ?
1360                     nir_type_float32 : nir_type_uint32;
1361 
1362    switch (texdim) {
1363    case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break;
1364    case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break;
1365    case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break;
1366    default: unreachable("Invalid texture dimension");
1367    }
1368 
1369    tex->src[0].src_type = nir_tex_src_coord;
1370    tex->src[0].src = nir_src_for_ssa(imgcoords);
1371    tex->coord_components = texdim + texisarray;
1372    nir_ssa_dest_init(&tex->instr, &tex->dest, 4,
1373                      nir_alu_type_get_type_size(tex->dest_type), NULL);
1374    nir_builder_instr_insert(&b, &tex->instr);
1375 
1376    nir_ssa_def *texel = &tex->dest.ssa;
1377 
1378    unsigned fullmask = (1 << util_format_get_nr_components(key.imgfmt)) - 1;
1379    unsigned nbufcomps = util_bitcount(fullmask);
1380    if (key.mask != fullmask) {
1381       nir_ssa_def *bufcomps[4];
1382       nbufcomps = 0;
1383       for (unsigned i = 0; i < nimgcomps; i++) {
1384          if (key.mask & BITFIELD_BIT(i))
1385             bufcomps[nbufcomps++] = nir_channel(&b, texel, i);
1386       }
1387 
1388       texel = nir_vec(&b, bufcomps, nbufcomps);
1389    }
1390 
1391    unsigned bufcompsz = buftexelsz / nbufcomps;
1392 
1393    if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
1394       texel = nir_fmul(&b, texel,
1395                        nir_vec3(&b,
1396                                 nir_imm_float(&b, 31),
1397                                 nir_imm_float(&b, 63),
1398                                 nir_imm_float(&b, 31)));
1399       texel = nir_f2u16(&b, texel);
1400       texel = nir_ior(&b, nir_channel(&b, texel, 0),
1401                       nir_ior(&b,
1402                               nir_ishl(&b, nir_channel(&b, texel, 1), nir_imm_int(&b, 5)),
1403                               nir_ishl(&b, nir_channel(&b, texel, 2), nir_imm_int(&b, 11))));
1404       imgcompsz = 2;
1405       bufcompsz = 2;
1406       nbufcomps = 1;
1407       nimgcomps = 1;
1408    } else if (imgcompsz == 1) {
1409       nir_ssa_def *packed = nir_channel(&b, texel, 0);
1410       for (unsigned i = 1; i < nbufcomps; i++) {
1411          packed = nir_ior(&b, packed,
1412                           nir_ishl(&b, nir_iand_imm(&b, nir_channel(&b, texel, i), 0xff),
1413                                    nir_imm_int(&b, i * 8)));
1414       }
1415       texel = packed;
1416 
1417       bufcompsz = nbufcomps == 3 ? 4 : nbufcomps;
1418       nbufcomps = 1;
1419    }
1420 
1421    assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
1422    assert(nbufcomps <= 4 && nimgcomps <= 4);
1423    texel = nir_u2uN(&b, texel, bufcompsz * 8);
1424 
1425    nir_store_global(&b, bufptr, bufcompsz, texel, (1 << nbufcomps) - 1);
1426    nir_pop_if(&b, NULL);
1427 
1428    struct panfrost_compile_inputs inputs = {
1429       .gpu_id = pdev->gpu_id,
1430       .is_blit = true,
1431       .no_ubo_to_push = true,
1432    };
1433 
1434    struct util_dynarray binary;
1435 
1436    util_dynarray_init(&binary, NULL);
1437    GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1438 
1439    shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_img2buf_info), 4);
1440 
1441    mali_ptr shader =
1442       pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1443 
1444    util_dynarray_fini(&binary);
1445    ralloc_free(b.shader);
1446 
1447    return shader;
1448 }
1449 
1450 static unsigned
panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key)1451 panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key)
1452 {
1453    for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1454       if (!memcmp(&key, &panvk_meta_copy_img2buf_fmts[i], sizeof(key)))
1455          return i;
1456    }
1457 
1458    unreachable("Invalid texel size\n");
1459 }
1460 
1461 static void
panvk_meta_copy_img2buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * buf,const struct panvk_image * img,const VkBufferImageCopy2 * region)1462 panvk_meta_copy_img2buf(struct panvk_cmd_buffer *cmdbuf,
1463                         const struct panvk_buffer *buf,
1464                         const struct panvk_image *img,
1465                         const VkBufferImageCopy2 *region)
1466 {
1467    struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
1468    struct panvk_meta_copy_format_info key = {
1469       .imgfmt = panvk_meta_copy_img2buf_format(img->pimage.layout.format),
1470       .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1471                                        region->imageSubresource.aspectMask),
1472    };
1473    unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1474    unsigned texdimidx =
1475       panvk_meta_copy_tex_type(img->pimage.layout.dim,
1476                                img->pimage.layout.array_size > 1);
1477    unsigned fmtidx = panvk_meta_copy_img2buf_format_idx(key);
1478 
1479    mali_ptr rsd =
1480       cmdbuf->device->physical_device->meta.copy.img2buf[texdimidx][fmtidx].rsd;
1481 
1482    struct panvk_meta_copy_img2buf_info info = {
1483       .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset),
1484       .buf.stride.line = (region->bufferRowLength ? : region->imageExtent.width) * buftexelsz,
1485       .img.offset.x = MAX2(region->imageOffset.x & ~15, 0),
1486       .img.extent.minx = MAX2(region->imageOffset.x, 0),
1487       .img.extent.maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0),
1488    };
1489 
1490    if (img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D) {
1491       info.img.extent.maxy = region->imageSubresource.layerCount - 1;
1492    } else {
1493       info.img.offset.y = MAX2(region->imageOffset.y & ~15, 0);
1494       info.img.offset.z = MAX2(region->imageOffset.z, 0);
1495       info.img.extent.miny = MAX2(region->imageOffset.y, 0);
1496       info.img.extent.maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1497    }
1498 
1499    info.buf.stride.surf = (region->bufferImageHeight ? : region->imageExtent.height) *
1500                           info.buf.stride.line;
1501 
1502    mali_ptr pushconsts =
1503       pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1504 
1505    struct pan_image_view view = {
1506       .format = key.imgfmt,
1507       .dim = img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
1508              MALI_TEXTURE_DIMENSION_2D : img->pimage.layout.dim,
1509       .image = &img->pimage,
1510       .nr_samples = img->pimage.layout.nr_samples,
1511       .first_level = region->imageSubresource.mipLevel,
1512       .last_level = region->imageSubresource.mipLevel,
1513       .first_layer = region->imageSubresource.baseArrayLayer,
1514       .last_layer = region->imageSubresource.baseArrayLayer + region->imageSubresource.layerCount - 1,
1515       .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
1516    };
1517 
1518    mali_ptr texture =
1519       panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &view);
1520    mali_ptr sampler =
1521       panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base);
1522 
1523    panvk_per_arch(cmd_close_batch)(cmdbuf);
1524 
1525    struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1526 
1527    struct pan_tls_info tlsinfo = { 0 };
1528 
1529    batch->blit.src = img->pimage.data.bo;
1530    batch->blit.dst = buf->bo;
1531    batch->tls =
1532       pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE);
1533    GENX(pan_emit_tls)(&tlsinfo, batch->tls.cpu);
1534 
1535    mali_ptr tsd = batch->tls.gpu;
1536 
1537    struct pan_compute_dim wg_sz = {
1538       16,
1539       img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1 : 16,
1540       1,
1541    };
1542 
1543    struct pan_compute_dim num_wg = {
1544      (ALIGN_POT(info.img.extent.maxx + 1, 16) - info.img.offset.x) / 16,
1545      img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ?
1546         region->imageSubresource.layerCount :
1547         (ALIGN_POT(info.img.extent.maxy + 1, 16) - info.img.offset.y) / 16,
1548      img->pimage.layout.dim != MALI_TEXTURE_DIMENSION_1D ?
1549         MAX2(region->imageSubresource.layerCount, region->imageExtent.depth) : 1,
1550    };
1551 
1552    struct panfrost_ptr job =
1553       panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1554                                        &batch->scoreboard, &num_wg, &wg_sz,
1555                                        texture, sampler,
1556                                        pushconsts, rsd, tsd);
1557 
1558    util_dynarray_append(&batch->jobs, void *, job.cpu);
1559 
1560    panvk_per_arch(cmd_close_batch)(cmdbuf);
1561 }
1562 
1563 static void
panvk_meta_copy_img2buf_init(struct panvk_physical_device * dev)1564 panvk_meta_copy_img2buf_init(struct panvk_physical_device *dev)
1565 {
1566    STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2buf_fmts) == PANVK_META_COPY_IMG2BUF_NUM_FORMATS);
1567 
1568    for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1569       for (unsigned texdim = 1; texdim <= 3; texdim++) {
1570          unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
1571          assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1572 
1573          struct pan_shader_info shader_info;
1574          mali_ptr shader =
1575             panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1576                                            panvk_meta_copy_img2buf_fmts[i],
1577                                            texdim, false, &shader_info);
1578          dev->meta.copy.img2buf[texdimidx][i].rsd =
1579             panvk_meta_copy_to_buf_emit_rsd(&dev->pdev,
1580                                             &dev->meta.desc_pool.base,
1581                                             shader, &shader_info, true);
1582 
1583          if (texdim == 3)
1584             continue;
1585 
1586          memset(&shader_info, 0, sizeof(shader_info));
1587          texdimidx = panvk_meta_copy_tex_type(texdim, true);
1588          assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1589          shader =
1590             panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1591                                            panvk_meta_copy_img2buf_fmts[i],
1592                                            texdim, true, &shader_info);
1593          dev->meta.copy.img2buf[texdimidx][i].rsd =
1594             panvk_meta_copy_to_buf_emit_rsd(&dev->pdev,
1595                                             &dev->meta.desc_pool.base,
1596                                             shader, &shader_info, true);
1597       }
1598    }
1599 }
1600 
1601 void
panvk_per_arch(CmdCopyImageToBuffer2)1602 panvk_per_arch(CmdCopyImageToBuffer2)(VkCommandBuffer commandBuffer,
1603                                       const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1604 {
1605    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1606    VK_FROM_HANDLE(panvk_buffer, buf, pCopyImageToBufferInfo->dstBuffer);
1607    VK_FROM_HANDLE(panvk_image, img, pCopyImageToBufferInfo->srcImage);
1608 
1609    for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; i++) {
1610       panvk_meta_copy_img2buf(cmdbuf, buf, img, &pCopyImageToBufferInfo->pRegions[i]);
1611    }
1612 }
1613 
1614 struct panvk_meta_copy_buf2buf_info {
1615    mali_ptr src;
1616    mali_ptr dst;
1617 } PACKED;
1618 
1619 #define panvk_meta_copy_buf2buf_get_info_field(b, field) \
1620         nir_load_push_constant((b), 1, \
1621                      sizeof(((struct panvk_meta_copy_buf2buf_info *)0)->field) * 8, \
1622                      nir_imm_int(b, 0), \
1623                      .base = offsetof(struct panvk_meta_copy_buf2buf_info, field), \
1624                      .range = ~0)
1625 
1626 static mali_ptr
panvk_meta_copy_buf2buf_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,unsigned blksz,struct pan_shader_info * shader_info)1627 panvk_meta_copy_buf2buf_shader(struct panfrost_device *pdev,
1628                                struct pan_pool *bin_pool,
1629                                unsigned blksz,
1630                                struct pan_shader_info *shader_info)
1631 {
1632    /* FIXME: Won't work on compute queues, but we can't do that with
1633     * a compute shader if the destination is an AFBC surface.
1634     */
1635    nir_builder b =
1636       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1637                                      GENX(pan_shader_get_compiler_options)(),
1638                                      "panvk_meta_copy_buf2buf(blksz=%d)",
1639                                      blksz);
1640 
1641    nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1642 
1643    nir_ssa_def *offset =
1644       nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, blksz)));
1645    nir_ssa_def *srcptr =
1646       nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, src), offset);
1647    nir_ssa_def *dstptr =
1648       nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, dst), offset);
1649 
1650    unsigned compsz = blksz < 4 ? blksz : 4;
1651    unsigned ncomps = blksz / compsz;
1652    nir_store_global(&b, dstptr, blksz,
1653                     nir_load_global(&b, srcptr, blksz, ncomps, compsz * 8),
1654                     (1 << ncomps) - 1);
1655 
1656    struct panfrost_compile_inputs inputs = {
1657       .gpu_id = pdev->gpu_id,
1658       .is_blit = true,
1659       .no_ubo_to_push = true,
1660    };
1661 
1662    struct util_dynarray binary;
1663 
1664    util_dynarray_init(&binary, NULL);
1665    GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1666 
1667    shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2buf_info), 4);
1668 
1669    mali_ptr shader =
1670       pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1671 
1672    util_dynarray_fini(&binary);
1673    ralloc_free(b.shader);
1674 
1675    return shader;
1676 }
1677 
1678 static void
panvk_meta_copy_buf2buf_init(struct panvk_physical_device * dev)1679 panvk_meta_copy_buf2buf_init(struct panvk_physical_device *dev)
1680 {
1681    for (unsigned i = 0; i < ARRAY_SIZE(dev->meta.copy.buf2buf); i++) {
1682       struct pan_shader_info shader_info;
1683       mali_ptr shader =
1684          panvk_meta_copy_buf2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1685                                         1 << i, &shader_info);
1686       dev->meta.copy.buf2buf[i].rsd =
1687          panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
1688                                          shader, &shader_info, false);
1689    }
1690 }
1691 
1692 static void
panvk_meta_copy_buf2buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * src,const struct panvk_buffer * dst,const VkBufferCopy2 * region)1693 panvk_meta_copy_buf2buf(struct panvk_cmd_buffer *cmdbuf,
1694                         const struct panvk_buffer *src,
1695                         const struct panvk_buffer *dst,
1696                         const VkBufferCopy2 *region)
1697 {
1698    struct panvk_meta_copy_buf2buf_info info = {
1699       .src = panvk_buffer_gpu_ptr(src, region->srcOffset),
1700       .dst = panvk_buffer_gpu_ptr(dst, region->dstOffset),
1701    };
1702 
1703    unsigned alignment = ffs((info.src | info.dst | region->size) & 15);
1704    unsigned log2blksz = alignment ? alignment - 1 : 4;
1705 
1706    assert(log2blksz < ARRAY_SIZE(cmdbuf->device->physical_device->meta.copy.buf2buf));
1707    mali_ptr rsd =
1708       cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd;
1709 
1710    mali_ptr pushconsts =
1711       pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1712 
1713    panvk_per_arch(cmd_close_batch)(cmdbuf);
1714 
1715    struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1716 
1717    panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1718 
1719    mali_ptr tsd = batch->tls.gpu;
1720 
1721    unsigned nblocks = region->size >> log2blksz;
1722    struct pan_compute_dim num_wg = { nblocks, 1, 1 };
1723    struct pan_compute_dim wg_sz = { 1, 1, 1};
1724    struct panfrost_ptr job =
1725      panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1726                                       &batch->scoreboard,
1727                                       &num_wg, &wg_sz,
1728                                       0, 0, pushconsts, rsd, tsd);
1729 
1730    util_dynarray_append(&batch->jobs, void *, job.cpu);
1731 
1732    batch->blit.src = src->bo;
1733    batch->blit.dst = dst->bo;
1734    panvk_per_arch(cmd_close_batch)(cmdbuf);
1735 }
1736 
1737 void
panvk_per_arch(CmdCopyBuffer2)1738 panvk_per_arch(CmdCopyBuffer2)(VkCommandBuffer commandBuffer,
1739                                const VkCopyBufferInfo2 *pCopyBufferInfo)
1740 {
1741    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1742    VK_FROM_HANDLE(panvk_buffer, src, pCopyBufferInfo->srcBuffer);
1743    VK_FROM_HANDLE(panvk_buffer, dst, pCopyBufferInfo->dstBuffer);
1744 
1745    for (unsigned i = 0; i < pCopyBufferInfo->regionCount; i++) {
1746       panvk_meta_copy_buf2buf(cmdbuf, src, dst, &pCopyBufferInfo->pRegions[i]);
1747    }
1748 }
1749 
1750 struct panvk_meta_fill_buf_info {
1751    mali_ptr start;
1752    uint32_t val;
1753 } PACKED;
1754 
1755 #define panvk_meta_fill_buf_get_info_field(b, field) \
1756         nir_load_push_constant((b), 1, \
1757                      sizeof(((struct panvk_meta_fill_buf_info *)0)->field) * 8, \
1758                      nir_imm_int(b, 0), \
1759                      .base = offsetof(struct panvk_meta_fill_buf_info, field), \
1760                      .range = ~0)
1761 
1762 static mali_ptr
panvk_meta_fill_buf_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct pan_shader_info * shader_info)1763 panvk_meta_fill_buf_shader(struct panfrost_device *pdev,
1764                            struct pan_pool *bin_pool,
1765                            struct pan_shader_info *shader_info)
1766 {
1767    /* FIXME: Won't work on compute queues, but we can't do that with
1768     * a compute shader if the destination is an AFBC surface.
1769     */
1770    nir_builder b =
1771       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1772                                      GENX(pan_shader_get_compiler_options)(),
1773                                      "panvk_meta_fill_buf()");
1774 
1775    nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1776 
1777    nir_ssa_def *offset =
1778       nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, sizeof(uint32_t))));
1779    nir_ssa_def *ptr =
1780       nir_iadd(&b, panvk_meta_fill_buf_get_info_field(&b, start), offset);
1781    nir_ssa_def *val = panvk_meta_fill_buf_get_info_field(&b, val);
1782 
1783    nir_store_global(&b, ptr, sizeof(uint32_t), val, 1);
1784 
1785    struct panfrost_compile_inputs inputs = {
1786       .gpu_id = pdev->gpu_id,
1787       .is_blit = true,
1788       .no_ubo_to_push = true,
1789    };
1790 
1791    struct util_dynarray binary;
1792 
1793    util_dynarray_init(&binary, NULL);
1794    GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1795 
1796    shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_fill_buf_info), 4);
1797 
1798    mali_ptr shader =
1799       pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1800 
1801    util_dynarray_fini(&binary);
1802    ralloc_free(b.shader);
1803 
1804    return shader;
1805 }
1806 
1807 static mali_ptr
panvk_meta_fill_buf_emit_rsd(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct pan_pool * desc_pool)1808 panvk_meta_fill_buf_emit_rsd(struct panfrost_device *pdev,
1809                              struct pan_pool *bin_pool,
1810                              struct pan_pool *desc_pool)
1811 {
1812    struct pan_shader_info shader_info;
1813 
1814    mali_ptr shader =
1815       panvk_meta_fill_buf_shader(pdev, bin_pool, &shader_info);
1816 
1817    struct panfrost_ptr rsd_ptr =
1818       pan_pool_alloc_desc_aggregate(desc_pool,
1819                                     PAN_DESC(RENDERER_STATE));
1820 
1821    pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
1822       pan_shader_prepare_rsd(&shader_info, shader, &cfg);
1823    }
1824 
1825    return rsd_ptr.gpu;
1826 }
1827 
1828 static void
panvk_meta_fill_buf_init(struct panvk_physical_device * dev)1829 panvk_meta_fill_buf_init(struct panvk_physical_device *dev)
1830 {
1831    dev->meta.copy.fillbuf.rsd =
1832       panvk_meta_fill_buf_emit_rsd(&dev->pdev, &dev->meta.bin_pool.base,
1833                                    &dev->meta.desc_pool.base);
1834 }
1835 
1836 static void
panvk_meta_fill_buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * dst,VkDeviceSize size,VkDeviceSize offset,uint32_t val)1837 panvk_meta_fill_buf(struct panvk_cmd_buffer *cmdbuf,
1838                     const struct panvk_buffer *dst,
1839                     VkDeviceSize size, VkDeviceSize offset,
1840                     uint32_t val)
1841 {
1842    struct panvk_meta_fill_buf_info info = {
1843       .start = panvk_buffer_gpu_ptr(dst, offset),
1844       .val = val,
1845    };
1846    size = panvk_buffer_range(dst, offset, size);
1847 
1848    /* From the Vulkan spec:
1849     *
1850     *    "size is the number of bytes to fill, and must be either a multiple
1851     *    of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
1852     *    the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
1853     *    buffer is not a multiple of 4, then the nearest smaller multiple is
1854     *    used."
1855     */
1856    size &= ~3ull;
1857 
1858    assert(!(offset & 3) && !(size & 3));
1859 
1860    unsigned nwords = size / sizeof(uint32_t);
1861    mali_ptr rsd =
1862       cmdbuf->device->physical_device->meta.copy.fillbuf.rsd;
1863 
1864    mali_ptr pushconsts =
1865       pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1866 
1867    panvk_per_arch(cmd_close_batch)(cmdbuf);
1868 
1869    struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1870 
1871    panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1872 
1873    mali_ptr tsd = batch->tls.gpu;
1874 
1875    struct pan_compute_dim num_wg = { nwords, 1, 1 };
1876    struct pan_compute_dim wg_sz = { 1, 1, 1};
1877    struct panfrost_ptr job =
1878      panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1879                                       &batch->scoreboard,
1880                                       &num_wg, &wg_sz,
1881                                       0, 0, pushconsts, rsd, tsd);
1882 
1883    util_dynarray_append(&batch->jobs, void *, job.cpu);
1884 
1885    batch->blit.dst = dst->bo;
1886    panvk_per_arch(cmd_close_batch)(cmdbuf);
1887 }
1888 
1889 void
panvk_per_arch(CmdFillBuffer)1890 panvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer,
1891                               VkBuffer dstBuffer,
1892                               VkDeviceSize dstOffset,
1893                               VkDeviceSize fillSize,
1894                               uint32_t data)
1895 {
1896    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1897    VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
1898 
1899    panvk_meta_fill_buf(cmdbuf, dst, fillSize, dstOffset, data);
1900 }
1901 
1902 static void
panvk_meta_update_buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * dst,VkDeviceSize offset,VkDeviceSize size,const void * data)1903 panvk_meta_update_buf(struct panvk_cmd_buffer *cmdbuf,
1904                       const struct panvk_buffer *dst, VkDeviceSize offset,
1905                       VkDeviceSize size, const void *data)
1906 {
1907    struct panvk_meta_copy_buf2buf_info info = {
1908       .src = pan_pool_upload_aligned(&cmdbuf->desc_pool.base, data, size, 4),
1909       .dst = panvk_buffer_gpu_ptr(dst, offset),
1910    };
1911 
1912    unsigned log2blksz = ffs(sizeof(uint32_t)) - 1;
1913 
1914    mali_ptr rsd =
1915       cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd;
1916 
1917    mali_ptr pushconsts =
1918       pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1919 
1920    panvk_per_arch(cmd_close_batch)(cmdbuf);
1921 
1922    struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1923 
1924    panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1925 
1926    mali_ptr tsd = batch->tls.gpu;
1927 
1928    unsigned nblocks = size >> log2blksz;
1929    struct pan_compute_dim num_wg = { nblocks, 1, 1 };
1930    struct pan_compute_dim wg_sz = { 1, 1, 1};
1931    struct panfrost_ptr job =
1932      panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1933                                       &batch->scoreboard,
1934                                       &num_wg, &wg_sz,
1935                                       0, 0, pushconsts, rsd, tsd);
1936 
1937    util_dynarray_append(&batch->jobs, void *, job.cpu);
1938 
1939    batch->blit.dst = dst->bo;
1940    panvk_per_arch(cmd_close_batch)(cmdbuf);
1941 }
1942 
1943 void
panvk_per_arch(CmdUpdateBuffer)1944 panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer,
1945                                 VkBuffer dstBuffer,
1946                                 VkDeviceSize dstOffset,
1947                                 VkDeviceSize dataSize,
1948                                 const void *pData)
1949 {
1950    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1951    VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
1952 
1953    panvk_meta_update_buf(cmdbuf, dst, dstOffset, dataSize, pData);
1954 }
1955 
1956 void
panvk_per_arch(meta_copy_init)1957 panvk_per_arch(meta_copy_init)(struct panvk_physical_device *dev)
1958 {
1959    panvk_meta_copy_img2img_init(dev, false);
1960    panvk_meta_copy_img2img_init(dev, true);
1961    panvk_meta_copy_buf2img_init(dev);
1962    panvk_meta_copy_img2buf_init(dev);
1963    panvk_meta_copy_buf2buf_init(dev);
1964    panvk_meta_fill_buf_init(dev);
1965 }
1966