1 /*
2 * Copyright © 2021 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "gen_macros.h"
25
26 #include "nir/nir_builder.h"
27 #include "pan_encoder.h"
28 #include "pan_shader.h"
29
30 #include "panvk_private.h"
31
32 static mali_ptr
panvk_meta_copy_img_emit_texture(struct panfrost_device * pdev,struct pan_pool * desc_pool,const struct pan_image_view * view)33 panvk_meta_copy_img_emit_texture(struct panfrost_device *pdev,
34 struct pan_pool *desc_pool,
35 const struct pan_image_view *view)
36 {
37 struct panfrost_ptr texture =
38 pan_pool_alloc_desc(desc_pool, TEXTURE);
39 size_t payload_size =
40 GENX(panfrost_estimate_texture_payload_size)(view);
41 struct panfrost_ptr surfaces =
42 pan_pool_alloc_aligned(desc_pool, payload_size,
43 pan_alignment(SURFACE_WITH_STRIDE));
44
45 GENX(panfrost_new_texture)(pdev, view, texture.cpu, &surfaces);
46
47 return texture.gpu;
48 }
49
50 static mali_ptr
panvk_meta_copy_img_emit_sampler(struct panfrost_device * pdev,struct pan_pool * desc_pool)51 panvk_meta_copy_img_emit_sampler(struct panfrost_device *pdev,
52 struct pan_pool *desc_pool)
53 {
54 struct panfrost_ptr sampler =
55 pan_pool_alloc_desc(desc_pool, SAMPLER);
56
57 pan_pack(sampler.cpu, SAMPLER, cfg) {
58 cfg.seamless_cube_map = false;
59 cfg.normalized_coordinates = false;
60 cfg.minify_nearest = true;
61 cfg.magnify_nearest = true;
62 }
63
64 return sampler.gpu;
65 }
66
67 static void
panvk_meta_copy_emit_varying(struct pan_pool * pool,mali_ptr coordinates,mali_ptr * varying_bufs,mali_ptr * varyings)68 panvk_meta_copy_emit_varying(struct pan_pool *pool,
69 mali_ptr coordinates,
70 mali_ptr *varying_bufs,
71 mali_ptr *varyings)
72 {
73 struct panfrost_ptr varying =
74 pan_pool_alloc_desc(pool, ATTRIBUTE);
75 struct panfrost_ptr varying_buffer =
76 pan_pool_alloc_desc_array(pool, 2, ATTRIBUTE_BUFFER);
77
78 pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
79 cfg.pointer = coordinates;
80 cfg.stride = 4 * sizeof(uint32_t);
81 cfg.size = cfg.stride * 4;
82 }
83
84 /* Bifrost needs an empty desc to mark end of prefetching */
85 pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
86 ATTRIBUTE_BUFFER, cfg);
87
88 pan_pack(varying.cpu, ATTRIBUTE, cfg) {
89 cfg.buffer_index = 0;
90 cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw;
91 }
92
93 *varyings = varying.gpu;
94 *varying_bufs = varying_buffer.gpu;
95 }
96
97 static void
panvk_meta_copy_emit_dcd(struct pan_pool * pool,mali_ptr src_coords,mali_ptr dst_coords,mali_ptr texture,mali_ptr sampler,mali_ptr vpd,mali_ptr tsd,mali_ptr rsd,mali_ptr push_constants,void * out)98 panvk_meta_copy_emit_dcd(struct pan_pool *pool,
99 mali_ptr src_coords, mali_ptr dst_coords,
100 mali_ptr texture, mali_ptr sampler,
101 mali_ptr vpd, mali_ptr tsd, mali_ptr rsd,
102 mali_ptr push_constants, void *out)
103 {
104 pan_pack(out, DRAW, cfg) {
105 cfg.thread_storage = tsd;
106 cfg.state = rsd;
107 cfg.push_uniforms = push_constants;
108 cfg.position = dst_coords;
109 if (src_coords) {
110 panvk_meta_copy_emit_varying(pool, src_coords,
111 &cfg.varying_buffers,
112 &cfg.varyings);
113 }
114 cfg.viewport = vpd;
115 cfg.textures = texture;
116 cfg.samplers = sampler;
117 }
118 }
119
120 static struct panfrost_ptr
panvk_meta_copy_emit_tiler_job(struct pan_pool * desc_pool,struct pan_scoreboard * scoreboard,mali_ptr src_coords,mali_ptr dst_coords,mali_ptr texture,mali_ptr sampler,mali_ptr push_constants,mali_ptr vpd,mali_ptr rsd,mali_ptr tsd,mali_ptr tiler)121 panvk_meta_copy_emit_tiler_job(struct pan_pool *desc_pool,
122 struct pan_scoreboard *scoreboard,
123 mali_ptr src_coords, mali_ptr dst_coords,
124 mali_ptr texture, mali_ptr sampler,
125 mali_ptr push_constants,
126 mali_ptr vpd, mali_ptr rsd,
127 mali_ptr tsd, mali_ptr tiler)
128 {
129 struct panfrost_ptr job =
130 pan_pool_alloc_desc(desc_pool, TILER_JOB);
131
132 panvk_meta_copy_emit_dcd(desc_pool, src_coords, dst_coords,
133 texture, sampler, vpd, tsd, rsd, push_constants,
134 pan_section_ptr(job.cpu, TILER_JOB, DRAW));
135
136 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
137 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
138 cfg.index_count = 4;
139 cfg.job_task_split = 6;
140 }
141
142 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
143 cfg.constant = 1.0f;
144 }
145
146 void *invoc = pan_section_ptr(job.cpu,
147 TILER_JOB,
148 INVOCATION);
149 panfrost_pack_work_groups_compute(invoc, 1, 4,
150 1, 1, 1, 1, true, false);
151
152 pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg);
153 pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) {
154 cfg.address = tiler;
155 }
156
157 panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
158 false, false, 0, 0, &job, false);
159 return job;
160 }
161
162 static struct panfrost_ptr
panvk_meta_copy_emit_compute_job(struct pan_pool * desc_pool,struct pan_scoreboard * scoreboard,const struct pan_compute_dim * num_wg,const struct pan_compute_dim * wg_sz,mali_ptr texture,mali_ptr sampler,mali_ptr push_constants,mali_ptr rsd,mali_ptr tsd)163 panvk_meta_copy_emit_compute_job(struct pan_pool *desc_pool,
164 struct pan_scoreboard *scoreboard,
165 const struct pan_compute_dim *num_wg,
166 const struct pan_compute_dim *wg_sz,
167 mali_ptr texture, mali_ptr sampler,
168 mali_ptr push_constants,
169 mali_ptr rsd, mali_ptr tsd)
170 {
171 struct panfrost_ptr job =
172 pan_pool_alloc_desc(desc_pool, COMPUTE_JOB);
173
174 void *invoc = pan_section_ptr(job.cpu,
175 COMPUTE_JOB,
176 INVOCATION);
177 panfrost_pack_work_groups_compute(invoc, num_wg->x, num_wg->y, num_wg->z,
178 wg_sz->x, wg_sz->y, wg_sz->z,
179 false, false);
180
181 pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
182 cfg.job_task_split = 8;
183 }
184
185 panvk_meta_copy_emit_dcd(desc_pool, 0, 0, texture, sampler,
186 0, tsd, rsd, push_constants,
187 pan_section_ptr(job.cpu, COMPUTE_JOB, DRAW));
188
189 panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
190 false, false, 0, 0, &job, false);
191 return job;
192 }
193
194
195 static uint32_t
panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize)196 panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize)
197 {
198 switch (texelsize) {
199 case 6: return MALI_RGB16UI << 12;
200 case 8: return MALI_RG32UI << 12;
201 case 12: return MALI_RGB32UI << 12;
202 case 16: return MALI_RGBA32UI << 12;
203 default: unreachable("Invalid texel size\n");
204 }
205 }
206
207 static mali_ptr
panvk_meta_copy_to_img_emit_rsd(struct panfrost_device * pdev,struct pan_pool * desc_pool,mali_ptr shader,const struct pan_shader_info * shader_info,enum pipe_format fmt,unsigned wrmask,bool from_img)208 panvk_meta_copy_to_img_emit_rsd(struct panfrost_device *pdev,
209 struct pan_pool *desc_pool,
210 mali_ptr shader,
211 const struct pan_shader_info *shader_info,
212 enum pipe_format fmt, unsigned wrmask,
213 bool from_img)
214 {
215 struct panfrost_ptr rsd_ptr =
216 pan_pool_alloc_desc_aggregate(desc_pool,
217 PAN_DESC(RENDERER_STATE),
218 PAN_DESC_ARRAY(1, BLEND));
219
220 bool raw = util_format_get_blocksize(fmt) > 4;
221 unsigned fullmask = (1 << util_format_get_nr_components(fmt)) - 1;
222 bool partialwrite = fullmask != wrmask && !raw;
223 bool readstb = fullmask != wrmask && raw;
224
225 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
226 pan_shader_prepare_rsd(shader_info, shader, &cfg);
227 if (from_img) {
228 cfg.shader.varying_count = 1;
229 cfg.shader.texture_count = 1;
230 cfg.shader.sampler_count = 1;
231 }
232 cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
233 cfg.multisample_misc.sample_mask = UINT16_MAX;
234 cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
235 cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
236 cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
237 cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
238 cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
239 cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
240 cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
241 cfg.stencil_front.mask = 0xFF;
242 cfg.stencil_back = cfg.stencil_front;
243
244 cfg.properties.allow_forward_pixel_to_be_killed = true;
245 cfg.properties.allow_forward_pixel_to_kill =
246 !partialwrite && !readstb;
247 cfg.properties.zs_update_operation =
248 MALI_PIXEL_KILL_STRONG_EARLY;
249 cfg.properties.pixel_kill_operation =
250 MALI_PIXEL_KILL_FORCE_EARLY;
251 }
252
253 pan_pack(rsd_ptr.cpu + pan_size(RENDERER_STATE), BLEND, cfg) {
254 cfg.round_to_fb_precision = true;
255 cfg.load_destination = partialwrite;
256 cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
257 cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
258 cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
259 cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
260 cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
261 cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
262 cfg.internal.mode =
263 partialwrite ?
264 MALI_BLEND_MODE_FIXED_FUNCTION :
265 MALI_BLEND_MODE_OPAQUE;
266 cfg.equation.color_mask = partialwrite ? wrmask : 0xf;
267 cfg.internal.fixed_function.num_comps = 4;
268 if (!raw) {
269 cfg.internal.fixed_function.conversion.memory_format =
270 panfrost_format_to_bifrost_blend(pdev, fmt, false);
271 cfg.internal.fixed_function.conversion.register_format =
272 MALI_REGISTER_FILE_FORMAT_F32;
273 } else {
274 unsigned imgtexelsz = util_format_get_blocksize(fmt);
275
276 cfg.internal.fixed_function.conversion.memory_format =
277 panvk_meta_copy_img_bifrost_raw_format(imgtexelsz);
278 cfg.internal.fixed_function.conversion.register_format =
279 (imgtexelsz & 2) ?
280 MALI_REGISTER_FILE_FORMAT_U16 :
281 MALI_REGISTER_FILE_FORMAT_U32;
282 }
283 }
284
285 return rsd_ptr.gpu;
286 }
287
288 static mali_ptr
panvk_meta_copy_to_buf_emit_rsd(struct panfrost_device * pdev,struct pan_pool * desc_pool,mali_ptr shader,const struct pan_shader_info * shader_info,bool from_img)289 panvk_meta_copy_to_buf_emit_rsd(struct panfrost_device *pdev,
290 struct pan_pool *desc_pool,
291 mali_ptr shader,
292 const struct pan_shader_info *shader_info,
293 bool from_img)
294 {
295 struct panfrost_ptr rsd_ptr =
296 pan_pool_alloc_desc_aggregate(desc_pool,
297 PAN_DESC(RENDERER_STATE));
298
299 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
300 pan_shader_prepare_rsd(shader_info, shader, &cfg);
301 if (from_img) {
302 cfg.shader.texture_count = 1;
303 cfg.shader.sampler_count = 1;
304 }
305 }
306
307 return rsd_ptr.gpu;
308 }
309
310 static mali_ptr
panvk_meta_copy_img2img_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,enum pipe_format srcfmt,enum pipe_format dstfmt,unsigned dstmask,unsigned texdim,bool texisarray,bool is_ms,struct pan_shader_info * shader_info)311 panvk_meta_copy_img2img_shader(struct panfrost_device *pdev,
312 struct pan_pool *bin_pool,
313 enum pipe_format srcfmt,
314 enum pipe_format dstfmt, unsigned dstmask,
315 unsigned texdim, bool texisarray, bool is_ms,
316 struct pan_shader_info *shader_info)
317 {
318 nir_builder b =
319 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
320 GENX(pan_shader_get_compiler_options)(),
321 "panvk_meta_copy_img2img(srcfmt=%s,dstfmt=%s,%dD%s%s)",
322 util_format_name(srcfmt), util_format_name(dstfmt),
323 texdim, texisarray ? "[]" : "", is_ms ? ",ms" : "");
324
325 nir_variable *coord_var =
326 nir_variable_create(b.shader, nir_var_shader_in,
327 glsl_vector_type(GLSL_TYPE_FLOAT, texdim + texisarray),
328 "coord");
329 coord_var->data.location = VARYING_SLOT_VAR0;
330 nir_ssa_def *coord = nir_f2u32(&b, nir_load_var(&b, coord_var));
331
332 nir_tex_instr *tex = nir_tex_instr_create(b.shader, is_ms ? 2 : 1);
333 tex->op = is_ms ? nir_texop_txf_ms : nir_texop_txf;
334 tex->texture_index = 0;
335 tex->is_array = texisarray;
336 tex->dest_type = util_format_is_unorm(srcfmt) ?
337 nir_type_float32 : nir_type_uint32;
338
339 switch (texdim) {
340 case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break;
341 case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break;
342 case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break;
343 default: unreachable("Invalid texture dimension");
344 }
345
346 tex->src[0].src_type = nir_tex_src_coord;
347 tex->src[0].src = nir_src_for_ssa(coord);
348 tex->coord_components = texdim + texisarray;
349
350 if (is_ms) {
351 tex->src[1].src_type = nir_tex_src_ms_index;
352 tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
353 }
354
355 nir_ssa_dest_init(&tex->instr, &tex->dest, 4,
356 nir_alu_type_get_type_size(tex->dest_type), NULL);
357 nir_builder_instr_insert(&b, &tex->instr);
358
359 nir_ssa_def *texel = &tex->dest.ssa;
360
361 unsigned dstcompsz =
362 util_format_get_component_bits(dstfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
363 unsigned ndstcomps = util_format_get_nr_components(dstfmt);
364 const struct glsl_type *outtype = NULL;
365
366 if (srcfmt == PIPE_FORMAT_R5G6B5_UNORM && dstfmt == PIPE_FORMAT_R8G8_UNORM) {
367 nir_ssa_def *rgb =
368 nir_f2u32(&b, nir_fmul(&b, texel,
369 nir_vec3(&b,
370 nir_imm_float(&b, 31),
371 nir_imm_float(&b, 63),
372 nir_imm_float(&b, 31))));
373 nir_ssa_def *rg =
374 nir_vec2(&b,
375 nir_ior(&b, nir_channel(&b, rgb, 0),
376 nir_ishl(&b, nir_channel(&b, rgb, 1),
377 nir_imm_int(&b, 5))),
378 nir_ior(&b,
379 nir_ushr_imm(&b, nir_channel(&b, rgb, 1), 3),
380 nir_ishl(&b, nir_channel(&b, rgb, 2),
381 nir_imm_int(&b, 3))));
382 rg = nir_iand_imm(&b, rg, 255);
383 texel = nir_fmul_imm(&b, nir_u2f32(&b, rg), 1.0 / 255);
384 outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
385 } else if (srcfmt == PIPE_FORMAT_R8G8_UNORM && dstfmt == PIPE_FORMAT_R5G6B5_UNORM) {
386 nir_ssa_def *rg = nir_f2u32(&b, nir_fmul_imm(&b, texel, 255));
387 nir_ssa_def *rgb =
388 nir_vec3(&b,
389 nir_channel(&b, rg, 0),
390 nir_ior(&b,
391 nir_ushr_imm(&b, nir_channel(&b, rg, 0), 5),
392 nir_ishl(&b, nir_channel(&b, rg, 1),
393 nir_imm_int(&b, 3))),
394 nir_ushr_imm(&b, nir_channel(&b, rg, 1), 3));
395 rgb = nir_iand(&b, rgb,
396 nir_vec3(&b,
397 nir_imm_int(&b, 31),
398 nir_imm_int(&b, 63),
399 nir_imm_int(&b, 31)));
400 texel = nir_fmul(&b, nir_u2f32(&b, rgb),
401 nir_vec3(&b,
402 nir_imm_float(&b, 1.0 / 31),
403 nir_imm_float(&b, 1.0 / 63),
404 nir_imm_float(&b, 1.0 / 31)));
405 outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
406 } else {
407 assert(srcfmt == dstfmt);
408 enum glsl_base_type basetype;
409 if (util_format_is_unorm(dstfmt)) {
410 basetype = GLSL_TYPE_FLOAT;
411 } else if (dstcompsz == 16) {
412 basetype = GLSL_TYPE_UINT16;
413 } else {
414 assert(dstcompsz == 32);
415 basetype = GLSL_TYPE_UINT;
416 }
417
418 if (dstcompsz == 16)
419 texel = nir_u2u16(&b, texel);
420
421 texel = nir_channels(&b, texel, (1 << ndstcomps) - 1);
422 outtype = glsl_vector_type(basetype, ndstcomps);
423 }
424
425 nir_variable *out =
426 nir_variable_create(b.shader, nir_var_shader_out, outtype, "out");
427 out->data.location = FRAG_RESULT_DATA0;
428
429 unsigned fullmask = (1 << ndstcomps) - 1;
430 if (dstcompsz > 8 && dstmask != fullmask) {
431 nir_ssa_def *oldtexel = nir_load_var(&b, out);
432 nir_ssa_def *dstcomps[4];
433
434 for (unsigned i = 0; i < ndstcomps; i++) {
435 if (dstmask & BITFIELD_BIT(i))
436 dstcomps[i] = nir_channel(&b, texel, i);
437 else
438 dstcomps[i] = nir_channel(&b, oldtexel, i);
439 }
440
441 texel = nir_vec(&b, dstcomps, ndstcomps);
442 }
443
444 nir_store_var(&b, out, texel, 0xff);
445
446 struct panfrost_compile_inputs inputs = {
447 .gpu_id = pdev->gpu_id,
448 .is_blit = true,
449 .no_ubo_to_push = true,
450 };
451
452 pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
453 cfg.memory_format = (dstcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
454 cfg.register_format = dstcompsz == 2 ?
455 MALI_REGISTER_FILE_FORMAT_U16 :
456 MALI_REGISTER_FILE_FORMAT_U32;
457 }
458 inputs.bifrost.static_rt_conv = true;
459
460 struct util_dynarray binary;
461
462 util_dynarray_init(&binary, NULL);
463 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
464
465 shader_info->fs.sample_shading = is_ms;
466
467 mali_ptr shader =
468 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
469
470 util_dynarray_fini(&binary);
471 ralloc_free(b.shader);
472
473 return shader;
474 }
475
476 static enum pipe_format
panvk_meta_copy_img_format(enum pipe_format fmt)477 panvk_meta_copy_img_format(enum pipe_format fmt)
478 {
479 /* We can't use a non-compressed format when handling a tiled/AFBC
480 * compressed format because the tile size differ (4x4 blocks for
481 * compressed formats and 16x16 texels for non-compressed ones).
482 */
483 assert(!util_format_is_compressed(fmt));
484
485 /* Pick blendable formats when we can, otherwise pick the UINT variant
486 * matching the texel size.
487 */
488 switch (util_format_get_blocksize(fmt)) {
489 case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
490 case 12: return PIPE_FORMAT_R32G32B32_UINT;
491 case 8: return PIPE_FORMAT_R32G32_UINT;
492 case 6: return PIPE_FORMAT_R16G16B16_UINT;
493 case 4: return PIPE_FORMAT_R8G8B8A8_UNORM;
494 case 2: return (fmt == PIPE_FORMAT_R5G6B5_UNORM ||
495 fmt == PIPE_FORMAT_B5G6R5_UNORM) ?
496 PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM;
497 case 1: return PIPE_FORMAT_R8_UNORM;
498 default: unreachable("Unsupported format\n");
499 }
500 }
501
502 struct panvk_meta_copy_img2img_format_info {
503 enum pipe_format srcfmt;
504 enum pipe_format dstfmt;
505 unsigned dstmask;
506 } PACKED;
507
508 static const struct panvk_meta_copy_img2img_format_info panvk_meta_copy_img2img_fmts[] = {
509 { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R8_UNORM, 0x1},
510 { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
511 { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
512 { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
513 { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
514 /* Z24S8(depth) */
515 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 },
516 /* Z24S8(stencil) */
517 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 },
518 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0xf },
519 { PIPE_FORMAT_R16G16B16_UINT, PIPE_FORMAT_R16G16B16_UINT, 0x7 },
520 { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x3 },
521 /* Z32S8X24(depth) */
522 { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x1 },
523 /* Z32S8X24(stencil) */
524 { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x2 },
525 { PIPE_FORMAT_R32G32B32_UINT, PIPE_FORMAT_R32G32B32_UINT, 0x7 },
526 { PIPE_FORMAT_R32G32B32A32_UINT, PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
527 };
528
529 static unsigned
panvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key)530 panvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key)
531 {
532 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
533
534 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
535 if (!memcmp(&key, &panvk_meta_copy_img2img_fmts[i], sizeof(key)))
536 return i;
537 }
538
539 unreachable("Invalid image format\n");
540 }
541
542 static unsigned
panvk_meta_copy_img_mask(enum pipe_format imgfmt,VkImageAspectFlags aspectMask)543 panvk_meta_copy_img_mask(enum pipe_format imgfmt, VkImageAspectFlags aspectMask)
544 {
545 if (aspectMask != VK_IMAGE_ASPECT_DEPTH_BIT &&
546 aspectMask != VK_IMAGE_ASPECT_STENCIL_BIT) {
547 enum pipe_format outfmt = panvk_meta_copy_img_format(imgfmt);
548
549 return (1 << util_format_get_nr_components(outfmt)) - 1;
550 }
551
552 switch (imgfmt) {
553 case PIPE_FORMAT_S8_UINT:
554 return 1;
555 case PIPE_FORMAT_Z16_UNORM:
556 return 3;
557 case PIPE_FORMAT_Z16_UNORM_S8_UINT:
558 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 3 : 8;
559 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
560 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 7 : 8;
561 case PIPE_FORMAT_Z24X8_UNORM:
562 assert(aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT);
563 return 7;
564 case PIPE_FORMAT_Z32_FLOAT:
565 return 0xf;
566 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
567 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 2;
568 default:
569 unreachable("Invalid depth format\n");
570 }
571 }
572
573 static void
panvk_meta_copy_img2img(struct panvk_cmd_buffer * cmdbuf,const struct panvk_image * src,const struct panvk_image * dst,const VkImageCopy2 * region)574 panvk_meta_copy_img2img(struct panvk_cmd_buffer *cmdbuf,
575 const struct panvk_image *src,
576 const struct panvk_image *dst,
577 const VkImageCopy2 *region)
578 {
579 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
580 struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
581 struct panvk_meta_copy_img2img_format_info key = {
582 .srcfmt = panvk_meta_copy_img_format(src->pimage.layout.format),
583 .dstfmt = panvk_meta_copy_img_format(dst->pimage.layout.format),
584 .dstmask = panvk_meta_copy_img_mask(dst->pimage.layout.format,
585 region->dstSubresource.aspectMask),
586 };
587
588 assert(src->pimage.layout.nr_samples == dst->pimage.layout.nr_samples);
589
590 unsigned texdimidx =
591 panvk_meta_copy_tex_type(src->pimage.layout.dim,
592 src->pimage.layout.array_size > 1);
593 unsigned fmtidx =
594 panvk_meta_copy_img2img_format_idx(key);
595 unsigned ms = dst->pimage.layout.nr_samples > 1 ? 1 : 0;
596
597 mali_ptr rsd =
598 cmdbuf->device->physical_device->meta.copy.img2img[ms][texdimidx][fmtidx].rsd;
599
600 struct pan_image_view srcview = {
601 .format = key.srcfmt,
602 .dim = src->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
603 MALI_TEXTURE_DIMENSION_2D : src->pimage.layout.dim,
604 .image = &src->pimage,
605 .nr_samples = src->pimage.layout.nr_samples,
606 .first_level = region->srcSubresource.mipLevel,
607 .last_level = region->srcSubresource.mipLevel,
608 .first_layer = region->srcSubresource.baseArrayLayer,
609 .last_layer = region->srcSubresource.baseArrayLayer + region->srcSubresource.layerCount - 1,
610 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
611 };
612
613 struct pan_image_view dstview = {
614 .format = key.dstfmt,
615 .dim = MALI_TEXTURE_DIMENSION_2D,
616 .image = &dst->pimage,
617 .nr_samples = dst->pimage.layout.nr_samples,
618 .first_level = region->dstSubresource.mipLevel,
619 .last_level = region->dstSubresource.mipLevel,
620 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
621 };
622
623 unsigned minx = MAX2(region->dstOffset.x, 0);
624 unsigned miny = MAX2(region->dstOffset.y, 0);
625 unsigned maxx = MAX2(region->dstOffset.x + region->extent.width - 1, 0);
626 unsigned maxy = MAX2(region->dstOffset.y + region->extent.height - 1, 0);
627
628 mali_ptr vpd =
629 panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
630 minx, miny, maxx, maxy);
631
632 float dst_rect[] = {
633 minx, miny, 0.0, 1.0,
634 maxx + 1, miny, 0.0, 1.0,
635 minx, maxy + 1, 0.0, 1.0,
636 maxx + 1, maxy + 1, 0.0, 1.0,
637 };
638
639 mali_ptr dst_coords =
640 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect,
641 sizeof(dst_rect), 64);
642
643 /* TODO: don't force preloads of dst resources if unneeded */
644
645 unsigned width = u_minify(dst->pimage.layout.width, region->dstSubresource.mipLevel);
646 unsigned height = u_minify(dst->pimage.layout.height, region->dstSubresource.mipLevel);
647 cmdbuf->state.fb.crc_valid[0] = false;
648 *fbinfo = (struct pan_fb_info){
649 .width = width,
650 .height = height,
651 .extent.minx = minx & ~31,
652 .extent.miny = miny & ~31,
653 .extent.maxx = MIN2(ALIGN_POT(maxx + 1, 32), width) - 1,
654 .extent.maxy = MIN2(ALIGN_POT(maxy + 1, 32), height) - 1,
655 .nr_samples = dst->pimage.layout.nr_samples,
656 .rt_count = 1,
657 .rts[0].view = &dstview,
658 .rts[0].preload = true,
659 .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
660 };
661
662 mali_ptr texture =
663 panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &srcview);
664 mali_ptr sampler =
665 panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base);
666
667 panvk_per_arch(cmd_close_batch)(cmdbuf);
668
669 minx = MAX2(region->srcOffset.x, 0);
670 miny = MAX2(region->srcOffset.y, 0);
671 maxx = MAX2(region->srcOffset.x + region->extent.width - 1, 0);
672 maxy = MAX2(region->srcOffset.y + region->extent.height - 1, 0);
673 assert(region->dstOffset.z >= 0);
674
675 unsigned first_src_layer = MAX2(0, region->srcOffset.z);
676 unsigned first_dst_layer = MAX2(region->dstSubresource.baseArrayLayer, region->dstOffset.z);
677 unsigned nlayers = MAX2(region->dstSubresource.layerCount, region->extent.depth);
678 for (unsigned l = 0; l < nlayers; l++) {
679 unsigned src_l = l + first_src_layer;
680 float src_rect[] = {
681 minx, miny, src_l, 1.0,
682 maxx + 1, miny, src_l, 1.0,
683 minx, maxy + 1, src_l, 1.0,
684 maxx + 1, maxy + 1, src_l, 1.0,
685 };
686
687 mali_ptr src_coords =
688 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect,
689 sizeof(src_rect), 64);
690
691 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
692
693 dstview.first_layer = dstview.last_layer = l + first_dst_layer;
694 batch->blit.src = src->pimage.data.bo;
695 batch->blit.dst = dst->pimage.data.bo;
696 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
697 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
698 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
699
700 mali_ptr tsd, tiler;
701
702 tsd = batch->tls.gpu;
703 tiler = batch->tiler.descs.gpu;
704
705 struct panfrost_ptr job;
706
707 job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base,
708 &batch->scoreboard,
709 src_coords, dst_coords,
710 texture, sampler, 0,
711 vpd, rsd, tsd, tiler);
712
713 util_dynarray_append(&batch->jobs, void *, job.cpu);
714 panvk_per_arch(cmd_close_batch)(cmdbuf);
715 }
716 }
717
718 static void
panvk_meta_copy_img2img_init(struct panvk_physical_device * dev,bool is_ms)719 panvk_meta_copy_img2img_init(struct panvk_physical_device *dev, bool is_ms)
720 {
721 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
722
723 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
724 for (unsigned texdim = 1; texdim <= 3; texdim++) {
725 unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
726 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
727
728 /* No MSAA on 3D textures */
729 if (texdim == 3 && is_ms) continue;
730
731 struct pan_shader_info shader_info;
732 mali_ptr shader =
733 panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
734 panvk_meta_copy_img2img_fmts[i].srcfmt,
735 panvk_meta_copy_img2img_fmts[i].dstfmt,
736 panvk_meta_copy_img2img_fmts[i].dstmask,
737 texdim, false, is_ms, &shader_info);
738 dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
739 panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
740 shader, &shader_info,
741 panvk_meta_copy_img2img_fmts[i].dstfmt,
742 panvk_meta_copy_img2img_fmts[i].dstmask,
743 true);
744 if (texdim == 3)
745 continue;
746
747 memset(&shader_info, 0, sizeof(shader_info));
748 texdimidx = panvk_meta_copy_tex_type(texdim, true);
749 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
750 shader =
751 panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
752 panvk_meta_copy_img2img_fmts[i].srcfmt,
753 panvk_meta_copy_img2img_fmts[i].dstfmt,
754 panvk_meta_copy_img2img_fmts[i].dstmask,
755 texdim, true, is_ms, &shader_info);
756 dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
757 panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
758 shader, &shader_info,
759 panvk_meta_copy_img2img_fmts[i].dstfmt,
760 panvk_meta_copy_img2img_fmts[i].dstmask,
761 true);
762 }
763 }
764 }
765
766 void
panvk_per_arch(CmdCopyImage2)767 panvk_per_arch(CmdCopyImage2)(VkCommandBuffer commandBuffer,
768 const VkCopyImageInfo2 *pCopyImageInfo)
769 {
770 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
771 VK_FROM_HANDLE(panvk_image, dst, pCopyImageInfo->dstImage);
772 VK_FROM_HANDLE(panvk_image, src, pCopyImageInfo->srcImage);
773
774 for (unsigned i = 0; i < pCopyImageInfo->regionCount; i++) {
775 panvk_meta_copy_img2img(cmdbuf, src, dst, &pCopyImageInfo->pRegions[i]);
776 }
777 }
778
779 static unsigned
panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt,unsigned mask)780 panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt, unsigned mask)
781 {
782 unsigned imgtexelsz = util_format_get_blocksize(imgfmt);
783 unsigned nbufcomps = util_bitcount(mask);
784
785 if (nbufcomps == util_format_get_nr_components(imgfmt))
786 return imgtexelsz;
787
788 /* Special case for Z24 buffers which are not tightly packed */
789 if (mask == 7 && imgtexelsz == 4)
790 return 4;
791
792 /* Special case for S8 extraction from Z32_S8X24 */
793 if (mask == 2 && imgtexelsz == 8)
794 return 1;
795
796 unsigned compsz =
797 util_format_get_component_bits(imgfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
798
799 assert(!(compsz % 8));
800
801 return nbufcomps * compsz / 8;
802 }
803
804 static enum pipe_format
panvk_meta_copy_buf2img_format(enum pipe_format imgfmt)805 panvk_meta_copy_buf2img_format(enum pipe_format imgfmt)
806 {
807 /* Pick blendable formats when we can, and the FLOAT variant matching the
808 * texelsize otherwise.
809 */
810 switch (util_format_get_blocksize(imgfmt)) {
811 case 1: return PIPE_FORMAT_R8_UNORM;
812 /* AFBC stores things differently for RGB565,
813 * we can't simply map to R8G8 in that case */
814 case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
815 imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ?
816 PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM;
817 case 4: return PIPE_FORMAT_R8G8B8A8_UNORM;
818 case 6: return PIPE_FORMAT_R16G16B16_UINT;
819 case 8: return PIPE_FORMAT_R32G32_UINT;
820 case 12: return PIPE_FORMAT_R32G32B32_UINT;
821 case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
822 default: unreachable("Invalid format\n");
823 }
824 }
825
826 struct panvk_meta_copy_format_info {
827 enum pipe_format imgfmt;
828 unsigned mask;
829 } PACKED;
830
831 static const struct panvk_meta_copy_format_info panvk_meta_copy_buf2img_fmts[] = {
832 { PIPE_FORMAT_R8_UNORM, 0x1 },
833 { PIPE_FORMAT_R8G8_UNORM, 0x3 },
834 { PIPE_FORMAT_R5G6B5_UNORM, 0x7 },
835 { PIPE_FORMAT_R8G8B8A8_UNORM, 0xf },
836 { PIPE_FORMAT_R16G16B16_UINT, 0x7 },
837 { PIPE_FORMAT_R32G32_UINT, 0x3 },
838 { PIPE_FORMAT_R32G32B32_UINT, 0x7 },
839 { PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
840 /* S8 -> Z24S8 */
841 { PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 },
842 /* S8 -> Z32_S8X24 */
843 { PIPE_FORMAT_R32G32_UINT, 0x2 },
844 /* Z24X8 -> Z24S8 */
845 { PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 },
846 /* Z32 -> Z32_S8X24 */
847 { PIPE_FORMAT_R32G32_UINT, 0x1 },
848 };
849
850 struct panvk_meta_copy_buf2img_info {
851 struct {
852 mali_ptr ptr;
853 struct {
854 unsigned line;
855 unsigned surf;
856 } stride;
857 } buf;
858 } PACKED;
859
860 #define panvk_meta_copy_buf2img_get_info_field(b, field) \
861 nir_load_push_constant((b), 1, \
862 sizeof(((struct panvk_meta_copy_buf2img_info *)0)->field) * 8, \
863 nir_imm_int(b, 0), \
864 .base = offsetof(struct panvk_meta_copy_buf2img_info, field), \
865 .range = ~0)
866
867 static mali_ptr
panvk_meta_copy_buf2img_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct panvk_meta_copy_format_info key,struct pan_shader_info * shader_info)868 panvk_meta_copy_buf2img_shader(struct panfrost_device *pdev,
869 struct pan_pool *bin_pool,
870 struct panvk_meta_copy_format_info key,
871 struct pan_shader_info *shader_info)
872 {
873 nir_builder b =
874 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
875 GENX(pan_shader_get_compiler_options)(),
876 "panvk_meta_copy_buf2img(imgfmt=%s,mask=%x)",
877 util_format_name(key.imgfmt),
878 key.mask);
879
880 nir_variable *coord_var =
881 nir_variable_create(b.shader, nir_var_shader_in,
882 glsl_vector_type(GLSL_TYPE_FLOAT, 3),
883 "coord");
884 coord_var->data.location = VARYING_SLOT_VAR0;
885 nir_ssa_def *coord = nir_load_var(&b, coord_var);
886
887 coord = nir_f2u32(&b, coord);
888
889 nir_ssa_def *bufptr =
890 panvk_meta_copy_buf2img_get_info_field(&b, buf.ptr);
891 nir_ssa_def *buflinestride =
892 panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.line);
893 nir_ssa_def *bufsurfstride =
894 panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.surf);
895
896 unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
897 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
898 unsigned writemask = key.mask;
899
900 nir_ssa_def *offset =
901 nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
902 offset = nir_iadd(&b, offset,
903 nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
904 offset = nir_iadd(&b, offset,
905 nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
906 bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
907
908 unsigned imgcompsz =
909 (imgtexelsz <= 4 && key.imgfmt != PIPE_FORMAT_R5G6B5_UNORM) ?
910 1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
911
912 unsigned nimgcomps = imgtexelsz / imgcompsz;
913 unsigned bufcompsz = MIN2(buftexelsz, imgcompsz);
914 unsigned nbufcomps = buftexelsz / bufcompsz;
915
916 assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
917 assert(nbufcomps <= 4 && nimgcomps <= 4);
918
919 nir_ssa_def *texel =
920 nir_load_global(&b, bufptr, bufcompsz, nbufcomps, bufcompsz * 8);
921
922 enum glsl_base_type basetype;
923 if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
924 texel = nir_vec3(&b,
925 nir_iand_imm(&b, texel, BITFIELD_MASK(5)),
926 nir_iand_imm(&b, nir_ushr_imm(&b, texel, 5), BITFIELD_MASK(6)),
927 nir_iand_imm(&b, nir_ushr_imm(&b, texel, 11), BITFIELD_MASK(5)));
928 texel = nir_fmul(&b,
929 nir_u2f32(&b, texel),
930 nir_vec3(&b,
931 nir_imm_float(&b, 1.0f / 31),
932 nir_imm_float(&b, 1.0f / 63),
933 nir_imm_float(&b, 1.0f / 31)));
934 nimgcomps = 3;
935 basetype = GLSL_TYPE_FLOAT;
936 } else if (imgcompsz == 1) {
937 assert(bufcompsz == 1);
938 /* Blendable formats are unorm and the fixed-function blend unit
939 * takes float values.
940 */
941 texel = nir_fmul(&b, nir_u2f32(&b, texel),
942 nir_imm_float(&b, 1.0f / 255));
943 basetype = GLSL_TYPE_FLOAT;
944 } else {
945 texel = nir_u2uN(&b, texel, imgcompsz * 8);
946 basetype = imgcompsz == 2 ? GLSL_TYPE_UINT16 : GLSL_TYPE_UINT;
947 }
948
949 /* We always pass the texel using 32-bit regs for now */
950 nir_variable *out =
951 nir_variable_create(b.shader, nir_var_shader_out,
952 glsl_vector_type(basetype, nimgcomps),
953 "out");
954 out->data.location = FRAG_RESULT_DATA0;
955
956 uint16_t fullmask = (1 << nimgcomps) - 1;
957
958 assert(fullmask >= writemask);
959
960 if (fullmask != writemask) {
961 unsigned first_written_comp = ffs(writemask) - 1;
962 nir_ssa_def *oldtexel = NULL;
963 if (imgcompsz > 1)
964 oldtexel = nir_load_var(&b, out);
965
966 nir_ssa_def *texel_comps[4];
967 for (unsigned i = 0; i < nimgcomps; i++) {
968 if (writemask & BITFIELD_BIT(i))
969 texel_comps[i] = nir_channel(&b, texel, i - first_written_comp);
970 else if (imgcompsz > 1)
971 texel_comps[i] = nir_channel(&b, oldtexel, i);
972 else
973 texel_comps[i] = nir_imm_intN_t(&b, 0, texel->bit_size);
974 }
975
976 texel = nir_vec(&b, texel_comps, nimgcomps);
977 }
978
979 nir_store_var(&b, out, texel, 0xff);
980
981 struct panfrost_compile_inputs inputs = {
982 .gpu_id = pdev->gpu_id,
983 .is_blit = true,
984 .no_ubo_to_push = true,
985 };
986
987 pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
988 cfg.memory_format = (imgcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
989 cfg.register_format = imgcompsz == 2 ?
990 MALI_REGISTER_FILE_FORMAT_U16 :
991 MALI_REGISTER_FILE_FORMAT_U32;
992 }
993 inputs.bifrost.static_rt_conv = true;
994
995 struct util_dynarray binary;
996
997 util_dynarray_init(&binary, NULL);
998 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
999 shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2img_info), 4);
1000
1001 mali_ptr shader =
1002 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1003
1004 util_dynarray_fini(&binary);
1005 ralloc_free(b.shader);
1006
1007 return shader;
1008 }
1009
1010 static unsigned
panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key)1011 panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key)
1012 {
1013 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
1014 if (!memcmp(&key, &panvk_meta_copy_buf2img_fmts[i], sizeof(key)))
1015 return i;
1016 }
1017
1018 unreachable("Invalid image format\n");
1019 }
1020
1021 static void
panvk_meta_copy_buf2img(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * buf,const struct panvk_image * img,const VkBufferImageCopy2 * region)1022 panvk_meta_copy_buf2img(struct panvk_cmd_buffer *cmdbuf,
1023 const struct panvk_buffer *buf,
1024 const struct panvk_image *img,
1025 const VkBufferImageCopy2 *region)
1026 {
1027 struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
1028 unsigned minx = MAX2(region->imageOffset.x, 0);
1029 unsigned miny = MAX2(region->imageOffset.y, 0);
1030 unsigned maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0);
1031 unsigned maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1032
1033 mali_ptr vpd =
1034 panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
1035 minx, miny, maxx, maxy);
1036
1037 float dst_rect[] = {
1038 minx, miny, 0.0, 1.0,
1039 maxx + 1, miny, 0.0, 1.0,
1040 minx, maxy + 1, 0.0, 1.0,
1041 maxx + 1, maxy + 1, 0.0, 1.0,
1042 };
1043 mali_ptr dst_coords =
1044 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect,
1045 sizeof(dst_rect), 64);
1046
1047 struct panvk_meta_copy_format_info key = {
1048 .imgfmt = panvk_meta_copy_buf2img_format(img->pimage.layout.format),
1049 .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1050 region->imageSubresource.aspectMask),
1051 };
1052
1053 unsigned fmtidx = panvk_meta_copy_buf2img_format_idx(key);
1054
1055 mali_ptr rsd =
1056 cmdbuf->device->physical_device->meta.copy.buf2img[fmtidx].rsd;
1057
1058 const struct vk_image_buffer_layout buflayout =
1059 vk_image_buffer_copy_layout(&img->vk, region);
1060 struct panvk_meta_copy_buf2img_info info = {
1061 .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset),
1062 .buf.stride.line = buflayout.row_stride_B,
1063 .buf.stride.surf = buflayout.image_stride_B,
1064 };
1065
1066 mali_ptr pushconsts =
1067 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1068
1069 struct pan_image_view view = {
1070 .format = key.imgfmt,
1071 .dim = MALI_TEXTURE_DIMENSION_2D,
1072 .image = &img->pimage,
1073 .nr_samples = img->pimage.layout.nr_samples,
1074 .first_level = region->imageSubresource.mipLevel,
1075 .last_level = region->imageSubresource.mipLevel,
1076 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
1077 };
1078
1079 /* TODO: don't force preloads of dst resources if unneeded */
1080 cmdbuf->state.fb.crc_valid[0] = false;
1081 *fbinfo = (struct pan_fb_info){
1082 .width = u_minify(img->pimage.layout.width, region->imageSubresource.mipLevel),
1083 .height = u_minify(img->pimage.layout.height, region->imageSubresource.mipLevel),
1084 .extent.minx = minx,
1085 .extent.maxx = maxx,
1086 .extent.miny = miny,
1087 .extent.maxy = maxy,
1088 .nr_samples = 1,
1089 .rt_count = 1,
1090 .rts[0].view = &view,
1091 .rts[0].preload = true,
1092 .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
1093 };
1094
1095 panvk_per_arch(cmd_close_batch)(cmdbuf);
1096
1097 assert(region->imageSubresource.layerCount == 1 ||
1098 region->imageExtent.depth == 1);
1099 assert(region->imageOffset.z >= 0);
1100 unsigned first_layer = MAX2(region->imageSubresource.baseArrayLayer, region->imageOffset.z);
1101 unsigned nlayers = MAX2(region->imageSubresource.layerCount, region->imageExtent.depth);
1102 for (unsigned l = 0; l < nlayers; l++) {
1103 float src_rect[] = {
1104 0, 0, l, 1.0,
1105 region->imageExtent.width, 0, l, 1.0,
1106 0, region->imageExtent.height, l, 1.0,
1107 region->imageExtent.width, region->imageExtent.height, l, 1.0,
1108 };
1109
1110 mali_ptr src_coords =
1111 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect,
1112 sizeof(src_rect), 64);
1113
1114 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1115
1116 view.first_layer = view.last_layer = l + first_layer;
1117 batch->blit.src = buf->bo;
1118 batch->blit.dst = img->pimage.data.bo;
1119 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
1120 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
1121 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
1122
1123 mali_ptr tsd, tiler;
1124
1125 tsd = batch->tls.gpu;
1126 tiler = batch->tiler.descs.gpu;
1127
1128 struct panfrost_ptr job;
1129
1130 job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base,
1131 &batch->scoreboard,
1132 src_coords, dst_coords,
1133 0, 0, pushconsts,
1134 vpd, rsd, tsd, tiler);
1135
1136 util_dynarray_append(&batch->jobs, void *, job.cpu);
1137 panvk_per_arch(cmd_close_batch)(cmdbuf);
1138 }
1139 }
1140
1141 static void
panvk_meta_copy_buf2img_init(struct panvk_physical_device * dev)1142 panvk_meta_copy_buf2img_init(struct panvk_physical_device *dev)
1143 {
1144 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_buf2img_fmts) == PANVK_META_COPY_BUF2IMG_NUM_FORMATS);
1145
1146 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
1147 struct pan_shader_info shader_info;
1148 mali_ptr shader =
1149 panvk_meta_copy_buf2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
1150 panvk_meta_copy_buf2img_fmts[i],
1151 &shader_info);
1152 dev->meta.copy.buf2img[i].rsd =
1153 panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
1154 shader, &shader_info,
1155 panvk_meta_copy_buf2img_fmts[i].imgfmt,
1156 panvk_meta_copy_buf2img_fmts[i].mask,
1157 false);
1158 }
1159 }
1160
1161 void
panvk_per_arch(CmdCopyBufferToImage2)1162 panvk_per_arch(CmdCopyBufferToImage2)(VkCommandBuffer commandBuffer,
1163 const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
1164 {
1165 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1166 VK_FROM_HANDLE(panvk_buffer, buf, pCopyBufferToImageInfo->srcBuffer);
1167 VK_FROM_HANDLE(panvk_image, img, pCopyBufferToImageInfo->dstImage);
1168
1169 for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; i++) {
1170 panvk_meta_copy_buf2img(cmdbuf, buf, img, &pCopyBufferToImageInfo->pRegions[i]);
1171 }
1172 }
1173
1174 static const struct panvk_meta_copy_format_info panvk_meta_copy_img2buf_fmts[] = {
1175 { PIPE_FORMAT_R8_UINT, 0x1 },
1176 { PIPE_FORMAT_R8G8_UINT, 0x3 },
1177 { PIPE_FORMAT_R5G6B5_UNORM, 0x7 },
1178 { PIPE_FORMAT_R8G8B8A8_UINT, 0xf },
1179 { PIPE_FORMAT_R16G16B16_UINT, 0x7 },
1180 { PIPE_FORMAT_R32G32_UINT, 0x3 },
1181 { PIPE_FORMAT_R32G32B32_UINT, 0x7 },
1182 { PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
1183 /* S8 -> Z24S8 */
1184 { PIPE_FORMAT_R8G8B8A8_UINT, 0x8 },
1185 /* S8 -> Z32_S8X24 */
1186 { PIPE_FORMAT_R32G32_UINT, 0x2 },
1187 /* Z24X8 -> Z24S8 */
1188 { PIPE_FORMAT_R8G8B8A8_UINT, 0x7 },
1189 /* Z32 -> Z32_S8X24 */
1190 { PIPE_FORMAT_R32G32_UINT, 0x1 },
1191 };
1192
1193 static enum pipe_format
panvk_meta_copy_img2buf_format(enum pipe_format imgfmt)1194 panvk_meta_copy_img2buf_format(enum pipe_format imgfmt)
1195 {
1196 /* Pick blendable formats when we can, and the FLOAT variant matching the
1197 * texelsize otherwise.
1198 */
1199 switch (util_format_get_blocksize(imgfmt)) {
1200 case 1: return PIPE_FORMAT_R8_UINT;
1201 /* AFBC stores things differently for RGB565,
1202 * we can't simply map to R8G8 in that case */
1203 case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
1204 imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ?
1205 PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UINT;
1206 case 4: return PIPE_FORMAT_R8G8B8A8_UINT;
1207 case 6: return PIPE_FORMAT_R16G16B16_UINT;
1208 case 8: return PIPE_FORMAT_R32G32_UINT;
1209 case 12: return PIPE_FORMAT_R32G32B32_UINT;
1210 case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
1211 default: unreachable("Invalid format\n");
1212 }
1213 }
1214
1215 struct panvk_meta_copy_img2buf_info {
1216 struct {
1217 mali_ptr ptr;
1218 struct {
1219 unsigned line;
1220 unsigned surf;
1221 } stride;
1222 } buf;
1223 struct {
1224 struct {
1225 unsigned x, y, z;
1226 } offset;
1227 struct {
1228 unsigned minx, miny, maxx, maxy;
1229 } extent;
1230 } img;
1231 } PACKED;
1232
1233 #define panvk_meta_copy_img2buf_get_info_field(b, field) \
1234 nir_load_push_constant((b), 1, \
1235 sizeof(((struct panvk_meta_copy_img2buf_info *)0)->field) * 8, \
1236 nir_imm_int(b, 0), \
1237 .base = offsetof(struct panvk_meta_copy_img2buf_info, field), \
1238 .range = ~0)
1239
1240 static mali_ptr
panvk_meta_copy_img2buf_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct panvk_meta_copy_format_info key,unsigned texdim,unsigned texisarray,struct pan_shader_info * shader_info)1241 panvk_meta_copy_img2buf_shader(struct panfrost_device *pdev,
1242 struct pan_pool *bin_pool,
1243 struct panvk_meta_copy_format_info key,
1244 unsigned texdim, unsigned texisarray,
1245 struct pan_shader_info *shader_info)
1246 {
1247 unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
1248 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1249
1250 /* FIXME: Won't work on compute queues, but we can't do that with
1251 * a compute shader if the destination is an AFBC surface.
1252 */
1253 nir_builder b =
1254 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1255 GENX(pan_shader_get_compiler_options)(),
1256 "panvk_meta_copy_img2buf(dim=%dD%s,imgfmt=%s,mask=%x)",
1257 texdim, texisarray ? "[]" : "",
1258 util_format_name(key.imgfmt),
1259 key.mask);
1260
1261 nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1262 nir_ssa_def *bufptr =
1263 panvk_meta_copy_img2buf_get_info_field(&b, buf.ptr);
1264 nir_ssa_def *buflinestride =
1265 panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.line);
1266 nir_ssa_def *bufsurfstride =
1267 panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.surf);
1268
1269 nir_ssa_def *imgminx =
1270 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.minx);
1271 nir_ssa_def *imgminy =
1272 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.miny);
1273 nir_ssa_def *imgmaxx =
1274 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxx);
1275 nir_ssa_def *imgmaxy =
1276 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxy);
1277
1278 nir_ssa_def *imgcoords, *inbounds;
1279
1280 switch (texdim + texisarray) {
1281 case 1:
1282 imgcoords =
1283 nir_iadd(&b,
1284 nir_channel(&b, coord, 0),
1285 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x));
1286 inbounds =
1287 nir_iand(&b,
1288 nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1289 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx));
1290 break;
1291 case 2:
1292 imgcoords =
1293 nir_vec2(&b,
1294 nir_iadd(&b,
1295 nir_channel(&b, coord, 0),
1296 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1297 nir_iadd(&b,
1298 nir_channel(&b, coord, 1),
1299 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1300 inbounds =
1301 nir_iand(&b,
1302 nir_iand(&b,
1303 nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1304 nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1305 nir_iand(&b,
1306 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1307 nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1308 break;
1309 case 3:
1310 imgcoords =
1311 nir_vec3(&b,
1312 nir_iadd(&b,
1313 nir_channel(&b, coord, 0),
1314 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1315 nir_iadd(&b,
1316 nir_channel(&b, coord, 1),
1317 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)),
1318 nir_iadd(&b,
1319 nir_channel(&b, coord, 2),
1320 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1321 inbounds =
1322 nir_iand(&b,
1323 nir_iand(&b,
1324 nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1325 nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1326 nir_iand(&b,
1327 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1328 nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1329 break;
1330 default:
1331 unreachable("Invalid texture dimension\n");
1332 }
1333
1334 nir_push_if(&b, inbounds);
1335
1336 /* FIXME: doesn't work for tiled+compressed formats since blocks are 4x4
1337 * blocks instead of 16x16 texels in that case, and there's nothing we can
1338 * do to force the tile size to 4x4 in the render path.
1339 * This being said, compressed textures are not compatible with AFBC, so we
1340 * could use a compute shader arranging the blocks properly.
1341 */
1342 nir_ssa_def *offset =
1343 nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
1344 offset = nir_iadd(&b, offset,
1345 nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
1346 offset = nir_iadd(&b, offset,
1347 nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
1348 bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
1349
1350 unsigned imgcompsz = imgtexelsz <= 4 ?
1351 1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
1352 unsigned nimgcomps = imgtexelsz / imgcompsz;
1353 assert(nimgcomps <= 4);
1354
1355 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
1356 tex->op = nir_texop_txf;
1357 tex->texture_index = 0;
1358 tex->is_array = texisarray;
1359 tex->dest_type = util_format_is_unorm(key.imgfmt) ?
1360 nir_type_float32 : nir_type_uint32;
1361
1362 switch (texdim) {
1363 case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break;
1364 case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break;
1365 case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break;
1366 default: unreachable("Invalid texture dimension");
1367 }
1368
1369 tex->src[0].src_type = nir_tex_src_coord;
1370 tex->src[0].src = nir_src_for_ssa(imgcoords);
1371 tex->coord_components = texdim + texisarray;
1372 nir_ssa_dest_init(&tex->instr, &tex->dest, 4,
1373 nir_alu_type_get_type_size(tex->dest_type), NULL);
1374 nir_builder_instr_insert(&b, &tex->instr);
1375
1376 nir_ssa_def *texel = &tex->dest.ssa;
1377
1378 unsigned fullmask = (1 << util_format_get_nr_components(key.imgfmt)) - 1;
1379 unsigned nbufcomps = util_bitcount(fullmask);
1380 if (key.mask != fullmask) {
1381 nir_ssa_def *bufcomps[4];
1382 nbufcomps = 0;
1383 for (unsigned i = 0; i < nimgcomps; i++) {
1384 if (key.mask & BITFIELD_BIT(i))
1385 bufcomps[nbufcomps++] = nir_channel(&b, texel, i);
1386 }
1387
1388 texel = nir_vec(&b, bufcomps, nbufcomps);
1389 }
1390
1391 unsigned bufcompsz = buftexelsz / nbufcomps;
1392
1393 if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
1394 texel = nir_fmul(&b, texel,
1395 nir_vec3(&b,
1396 nir_imm_float(&b, 31),
1397 nir_imm_float(&b, 63),
1398 nir_imm_float(&b, 31)));
1399 texel = nir_f2u16(&b, texel);
1400 texel = nir_ior(&b, nir_channel(&b, texel, 0),
1401 nir_ior(&b,
1402 nir_ishl(&b, nir_channel(&b, texel, 1), nir_imm_int(&b, 5)),
1403 nir_ishl(&b, nir_channel(&b, texel, 2), nir_imm_int(&b, 11))));
1404 imgcompsz = 2;
1405 bufcompsz = 2;
1406 nbufcomps = 1;
1407 nimgcomps = 1;
1408 } else if (imgcompsz == 1) {
1409 nir_ssa_def *packed = nir_channel(&b, texel, 0);
1410 for (unsigned i = 1; i < nbufcomps; i++) {
1411 packed = nir_ior(&b, packed,
1412 nir_ishl(&b, nir_iand_imm(&b, nir_channel(&b, texel, i), 0xff),
1413 nir_imm_int(&b, i * 8)));
1414 }
1415 texel = packed;
1416
1417 bufcompsz = nbufcomps == 3 ? 4 : nbufcomps;
1418 nbufcomps = 1;
1419 }
1420
1421 assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
1422 assert(nbufcomps <= 4 && nimgcomps <= 4);
1423 texel = nir_u2uN(&b, texel, bufcompsz * 8);
1424
1425 nir_store_global(&b, bufptr, bufcompsz, texel, (1 << nbufcomps) - 1);
1426 nir_pop_if(&b, NULL);
1427
1428 struct panfrost_compile_inputs inputs = {
1429 .gpu_id = pdev->gpu_id,
1430 .is_blit = true,
1431 .no_ubo_to_push = true,
1432 };
1433
1434 struct util_dynarray binary;
1435
1436 util_dynarray_init(&binary, NULL);
1437 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1438
1439 shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_img2buf_info), 4);
1440
1441 mali_ptr shader =
1442 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1443
1444 util_dynarray_fini(&binary);
1445 ralloc_free(b.shader);
1446
1447 return shader;
1448 }
1449
1450 static unsigned
panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key)1451 panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key)
1452 {
1453 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1454 if (!memcmp(&key, &panvk_meta_copy_img2buf_fmts[i], sizeof(key)))
1455 return i;
1456 }
1457
1458 unreachable("Invalid texel size\n");
1459 }
1460
1461 static void
panvk_meta_copy_img2buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * buf,const struct panvk_image * img,const VkBufferImageCopy2 * region)1462 panvk_meta_copy_img2buf(struct panvk_cmd_buffer *cmdbuf,
1463 const struct panvk_buffer *buf,
1464 const struct panvk_image *img,
1465 const VkBufferImageCopy2 *region)
1466 {
1467 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
1468 struct panvk_meta_copy_format_info key = {
1469 .imgfmt = panvk_meta_copy_img2buf_format(img->pimage.layout.format),
1470 .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1471 region->imageSubresource.aspectMask),
1472 };
1473 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1474 unsigned texdimidx =
1475 panvk_meta_copy_tex_type(img->pimage.layout.dim,
1476 img->pimage.layout.array_size > 1);
1477 unsigned fmtidx = panvk_meta_copy_img2buf_format_idx(key);
1478
1479 mali_ptr rsd =
1480 cmdbuf->device->physical_device->meta.copy.img2buf[texdimidx][fmtidx].rsd;
1481
1482 struct panvk_meta_copy_img2buf_info info = {
1483 .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset),
1484 .buf.stride.line = (region->bufferRowLength ? : region->imageExtent.width) * buftexelsz,
1485 .img.offset.x = MAX2(region->imageOffset.x & ~15, 0),
1486 .img.extent.minx = MAX2(region->imageOffset.x, 0),
1487 .img.extent.maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0),
1488 };
1489
1490 if (img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D) {
1491 info.img.extent.maxy = region->imageSubresource.layerCount - 1;
1492 } else {
1493 info.img.offset.y = MAX2(region->imageOffset.y & ~15, 0);
1494 info.img.offset.z = MAX2(region->imageOffset.z, 0);
1495 info.img.extent.miny = MAX2(region->imageOffset.y, 0);
1496 info.img.extent.maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1497 }
1498
1499 info.buf.stride.surf = (region->bufferImageHeight ? : region->imageExtent.height) *
1500 info.buf.stride.line;
1501
1502 mali_ptr pushconsts =
1503 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1504
1505 struct pan_image_view view = {
1506 .format = key.imgfmt,
1507 .dim = img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
1508 MALI_TEXTURE_DIMENSION_2D : img->pimage.layout.dim,
1509 .image = &img->pimage,
1510 .nr_samples = img->pimage.layout.nr_samples,
1511 .first_level = region->imageSubresource.mipLevel,
1512 .last_level = region->imageSubresource.mipLevel,
1513 .first_layer = region->imageSubresource.baseArrayLayer,
1514 .last_layer = region->imageSubresource.baseArrayLayer + region->imageSubresource.layerCount - 1,
1515 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
1516 };
1517
1518 mali_ptr texture =
1519 panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &view);
1520 mali_ptr sampler =
1521 panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base);
1522
1523 panvk_per_arch(cmd_close_batch)(cmdbuf);
1524
1525 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1526
1527 struct pan_tls_info tlsinfo = { 0 };
1528
1529 batch->blit.src = img->pimage.data.bo;
1530 batch->blit.dst = buf->bo;
1531 batch->tls =
1532 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE);
1533 GENX(pan_emit_tls)(&tlsinfo, batch->tls.cpu);
1534
1535 mali_ptr tsd = batch->tls.gpu;
1536
1537 struct pan_compute_dim wg_sz = {
1538 16,
1539 img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1 : 16,
1540 1,
1541 };
1542
1543 struct pan_compute_dim num_wg = {
1544 (ALIGN_POT(info.img.extent.maxx + 1, 16) - info.img.offset.x) / 16,
1545 img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ?
1546 region->imageSubresource.layerCount :
1547 (ALIGN_POT(info.img.extent.maxy + 1, 16) - info.img.offset.y) / 16,
1548 img->pimage.layout.dim != MALI_TEXTURE_DIMENSION_1D ?
1549 MAX2(region->imageSubresource.layerCount, region->imageExtent.depth) : 1,
1550 };
1551
1552 struct panfrost_ptr job =
1553 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1554 &batch->scoreboard, &num_wg, &wg_sz,
1555 texture, sampler,
1556 pushconsts, rsd, tsd);
1557
1558 util_dynarray_append(&batch->jobs, void *, job.cpu);
1559
1560 panvk_per_arch(cmd_close_batch)(cmdbuf);
1561 }
1562
1563 static void
panvk_meta_copy_img2buf_init(struct panvk_physical_device * dev)1564 panvk_meta_copy_img2buf_init(struct panvk_physical_device *dev)
1565 {
1566 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2buf_fmts) == PANVK_META_COPY_IMG2BUF_NUM_FORMATS);
1567
1568 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1569 for (unsigned texdim = 1; texdim <= 3; texdim++) {
1570 unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
1571 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1572
1573 struct pan_shader_info shader_info;
1574 mali_ptr shader =
1575 panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1576 panvk_meta_copy_img2buf_fmts[i],
1577 texdim, false, &shader_info);
1578 dev->meta.copy.img2buf[texdimidx][i].rsd =
1579 panvk_meta_copy_to_buf_emit_rsd(&dev->pdev,
1580 &dev->meta.desc_pool.base,
1581 shader, &shader_info, true);
1582
1583 if (texdim == 3)
1584 continue;
1585
1586 memset(&shader_info, 0, sizeof(shader_info));
1587 texdimidx = panvk_meta_copy_tex_type(texdim, true);
1588 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1589 shader =
1590 panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1591 panvk_meta_copy_img2buf_fmts[i],
1592 texdim, true, &shader_info);
1593 dev->meta.copy.img2buf[texdimidx][i].rsd =
1594 panvk_meta_copy_to_buf_emit_rsd(&dev->pdev,
1595 &dev->meta.desc_pool.base,
1596 shader, &shader_info, true);
1597 }
1598 }
1599 }
1600
1601 void
panvk_per_arch(CmdCopyImageToBuffer2)1602 panvk_per_arch(CmdCopyImageToBuffer2)(VkCommandBuffer commandBuffer,
1603 const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1604 {
1605 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1606 VK_FROM_HANDLE(panvk_buffer, buf, pCopyImageToBufferInfo->dstBuffer);
1607 VK_FROM_HANDLE(panvk_image, img, pCopyImageToBufferInfo->srcImage);
1608
1609 for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; i++) {
1610 panvk_meta_copy_img2buf(cmdbuf, buf, img, &pCopyImageToBufferInfo->pRegions[i]);
1611 }
1612 }
1613
1614 struct panvk_meta_copy_buf2buf_info {
1615 mali_ptr src;
1616 mali_ptr dst;
1617 } PACKED;
1618
1619 #define panvk_meta_copy_buf2buf_get_info_field(b, field) \
1620 nir_load_push_constant((b), 1, \
1621 sizeof(((struct panvk_meta_copy_buf2buf_info *)0)->field) * 8, \
1622 nir_imm_int(b, 0), \
1623 .base = offsetof(struct panvk_meta_copy_buf2buf_info, field), \
1624 .range = ~0)
1625
1626 static mali_ptr
panvk_meta_copy_buf2buf_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,unsigned blksz,struct pan_shader_info * shader_info)1627 panvk_meta_copy_buf2buf_shader(struct panfrost_device *pdev,
1628 struct pan_pool *bin_pool,
1629 unsigned blksz,
1630 struct pan_shader_info *shader_info)
1631 {
1632 /* FIXME: Won't work on compute queues, but we can't do that with
1633 * a compute shader if the destination is an AFBC surface.
1634 */
1635 nir_builder b =
1636 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1637 GENX(pan_shader_get_compiler_options)(),
1638 "panvk_meta_copy_buf2buf(blksz=%d)",
1639 blksz);
1640
1641 nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1642
1643 nir_ssa_def *offset =
1644 nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, blksz)));
1645 nir_ssa_def *srcptr =
1646 nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, src), offset);
1647 nir_ssa_def *dstptr =
1648 nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, dst), offset);
1649
1650 unsigned compsz = blksz < 4 ? blksz : 4;
1651 unsigned ncomps = blksz / compsz;
1652 nir_store_global(&b, dstptr, blksz,
1653 nir_load_global(&b, srcptr, blksz, ncomps, compsz * 8),
1654 (1 << ncomps) - 1);
1655
1656 struct panfrost_compile_inputs inputs = {
1657 .gpu_id = pdev->gpu_id,
1658 .is_blit = true,
1659 .no_ubo_to_push = true,
1660 };
1661
1662 struct util_dynarray binary;
1663
1664 util_dynarray_init(&binary, NULL);
1665 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1666
1667 shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2buf_info), 4);
1668
1669 mali_ptr shader =
1670 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1671
1672 util_dynarray_fini(&binary);
1673 ralloc_free(b.shader);
1674
1675 return shader;
1676 }
1677
1678 static void
panvk_meta_copy_buf2buf_init(struct panvk_physical_device * dev)1679 panvk_meta_copy_buf2buf_init(struct panvk_physical_device *dev)
1680 {
1681 for (unsigned i = 0; i < ARRAY_SIZE(dev->meta.copy.buf2buf); i++) {
1682 struct pan_shader_info shader_info;
1683 mali_ptr shader =
1684 panvk_meta_copy_buf2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1685 1 << i, &shader_info);
1686 dev->meta.copy.buf2buf[i].rsd =
1687 panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
1688 shader, &shader_info, false);
1689 }
1690 }
1691
1692 static void
panvk_meta_copy_buf2buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * src,const struct panvk_buffer * dst,const VkBufferCopy2 * region)1693 panvk_meta_copy_buf2buf(struct panvk_cmd_buffer *cmdbuf,
1694 const struct panvk_buffer *src,
1695 const struct panvk_buffer *dst,
1696 const VkBufferCopy2 *region)
1697 {
1698 struct panvk_meta_copy_buf2buf_info info = {
1699 .src = panvk_buffer_gpu_ptr(src, region->srcOffset),
1700 .dst = panvk_buffer_gpu_ptr(dst, region->dstOffset),
1701 };
1702
1703 unsigned alignment = ffs((info.src | info.dst | region->size) & 15);
1704 unsigned log2blksz = alignment ? alignment - 1 : 4;
1705
1706 assert(log2blksz < ARRAY_SIZE(cmdbuf->device->physical_device->meta.copy.buf2buf));
1707 mali_ptr rsd =
1708 cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd;
1709
1710 mali_ptr pushconsts =
1711 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1712
1713 panvk_per_arch(cmd_close_batch)(cmdbuf);
1714
1715 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1716
1717 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1718
1719 mali_ptr tsd = batch->tls.gpu;
1720
1721 unsigned nblocks = region->size >> log2blksz;
1722 struct pan_compute_dim num_wg = { nblocks, 1, 1 };
1723 struct pan_compute_dim wg_sz = { 1, 1, 1};
1724 struct panfrost_ptr job =
1725 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1726 &batch->scoreboard,
1727 &num_wg, &wg_sz,
1728 0, 0, pushconsts, rsd, tsd);
1729
1730 util_dynarray_append(&batch->jobs, void *, job.cpu);
1731
1732 batch->blit.src = src->bo;
1733 batch->blit.dst = dst->bo;
1734 panvk_per_arch(cmd_close_batch)(cmdbuf);
1735 }
1736
1737 void
panvk_per_arch(CmdCopyBuffer2)1738 panvk_per_arch(CmdCopyBuffer2)(VkCommandBuffer commandBuffer,
1739 const VkCopyBufferInfo2 *pCopyBufferInfo)
1740 {
1741 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1742 VK_FROM_HANDLE(panvk_buffer, src, pCopyBufferInfo->srcBuffer);
1743 VK_FROM_HANDLE(panvk_buffer, dst, pCopyBufferInfo->dstBuffer);
1744
1745 for (unsigned i = 0; i < pCopyBufferInfo->regionCount; i++) {
1746 panvk_meta_copy_buf2buf(cmdbuf, src, dst, &pCopyBufferInfo->pRegions[i]);
1747 }
1748 }
1749
1750 struct panvk_meta_fill_buf_info {
1751 mali_ptr start;
1752 uint32_t val;
1753 } PACKED;
1754
1755 #define panvk_meta_fill_buf_get_info_field(b, field) \
1756 nir_load_push_constant((b), 1, \
1757 sizeof(((struct panvk_meta_fill_buf_info *)0)->field) * 8, \
1758 nir_imm_int(b, 0), \
1759 .base = offsetof(struct panvk_meta_fill_buf_info, field), \
1760 .range = ~0)
1761
1762 static mali_ptr
panvk_meta_fill_buf_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct pan_shader_info * shader_info)1763 panvk_meta_fill_buf_shader(struct panfrost_device *pdev,
1764 struct pan_pool *bin_pool,
1765 struct pan_shader_info *shader_info)
1766 {
1767 /* FIXME: Won't work on compute queues, but we can't do that with
1768 * a compute shader if the destination is an AFBC surface.
1769 */
1770 nir_builder b =
1771 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1772 GENX(pan_shader_get_compiler_options)(),
1773 "panvk_meta_fill_buf()");
1774
1775 nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1776
1777 nir_ssa_def *offset =
1778 nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, sizeof(uint32_t))));
1779 nir_ssa_def *ptr =
1780 nir_iadd(&b, panvk_meta_fill_buf_get_info_field(&b, start), offset);
1781 nir_ssa_def *val = panvk_meta_fill_buf_get_info_field(&b, val);
1782
1783 nir_store_global(&b, ptr, sizeof(uint32_t), val, 1);
1784
1785 struct panfrost_compile_inputs inputs = {
1786 .gpu_id = pdev->gpu_id,
1787 .is_blit = true,
1788 .no_ubo_to_push = true,
1789 };
1790
1791 struct util_dynarray binary;
1792
1793 util_dynarray_init(&binary, NULL);
1794 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1795
1796 shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_fill_buf_info), 4);
1797
1798 mali_ptr shader =
1799 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1800
1801 util_dynarray_fini(&binary);
1802 ralloc_free(b.shader);
1803
1804 return shader;
1805 }
1806
1807 static mali_ptr
panvk_meta_fill_buf_emit_rsd(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct pan_pool * desc_pool)1808 panvk_meta_fill_buf_emit_rsd(struct panfrost_device *pdev,
1809 struct pan_pool *bin_pool,
1810 struct pan_pool *desc_pool)
1811 {
1812 struct pan_shader_info shader_info;
1813
1814 mali_ptr shader =
1815 panvk_meta_fill_buf_shader(pdev, bin_pool, &shader_info);
1816
1817 struct panfrost_ptr rsd_ptr =
1818 pan_pool_alloc_desc_aggregate(desc_pool,
1819 PAN_DESC(RENDERER_STATE));
1820
1821 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
1822 pan_shader_prepare_rsd(&shader_info, shader, &cfg);
1823 }
1824
1825 return rsd_ptr.gpu;
1826 }
1827
1828 static void
panvk_meta_fill_buf_init(struct panvk_physical_device * dev)1829 panvk_meta_fill_buf_init(struct panvk_physical_device *dev)
1830 {
1831 dev->meta.copy.fillbuf.rsd =
1832 panvk_meta_fill_buf_emit_rsd(&dev->pdev, &dev->meta.bin_pool.base,
1833 &dev->meta.desc_pool.base);
1834 }
1835
1836 static void
panvk_meta_fill_buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * dst,VkDeviceSize size,VkDeviceSize offset,uint32_t val)1837 panvk_meta_fill_buf(struct panvk_cmd_buffer *cmdbuf,
1838 const struct panvk_buffer *dst,
1839 VkDeviceSize size, VkDeviceSize offset,
1840 uint32_t val)
1841 {
1842 struct panvk_meta_fill_buf_info info = {
1843 .start = panvk_buffer_gpu_ptr(dst, offset),
1844 .val = val,
1845 };
1846 size = panvk_buffer_range(dst, offset, size);
1847
1848 /* From the Vulkan spec:
1849 *
1850 * "size is the number of bytes to fill, and must be either a multiple
1851 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
1852 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
1853 * buffer is not a multiple of 4, then the nearest smaller multiple is
1854 * used."
1855 */
1856 size &= ~3ull;
1857
1858 assert(!(offset & 3) && !(size & 3));
1859
1860 unsigned nwords = size / sizeof(uint32_t);
1861 mali_ptr rsd =
1862 cmdbuf->device->physical_device->meta.copy.fillbuf.rsd;
1863
1864 mali_ptr pushconsts =
1865 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1866
1867 panvk_per_arch(cmd_close_batch)(cmdbuf);
1868
1869 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1870
1871 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1872
1873 mali_ptr tsd = batch->tls.gpu;
1874
1875 struct pan_compute_dim num_wg = { nwords, 1, 1 };
1876 struct pan_compute_dim wg_sz = { 1, 1, 1};
1877 struct panfrost_ptr job =
1878 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1879 &batch->scoreboard,
1880 &num_wg, &wg_sz,
1881 0, 0, pushconsts, rsd, tsd);
1882
1883 util_dynarray_append(&batch->jobs, void *, job.cpu);
1884
1885 batch->blit.dst = dst->bo;
1886 panvk_per_arch(cmd_close_batch)(cmdbuf);
1887 }
1888
1889 void
panvk_per_arch(CmdFillBuffer)1890 panvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer,
1891 VkBuffer dstBuffer,
1892 VkDeviceSize dstOffset,
1893 VkDeviceSize fillSize,
1894 uint32_t data)
1895 {
1896 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1897 VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
1898
1899 panvk_meta_fill_buf(cmdbuf, dst, fillSize, dstOffset, data);
1900 }
1901
1902 static void
panvk_meta_update_buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * dst,VkDeviceSize offset,VkDeviceSize size,const void * data)1903 panvk_meta_update_buf(struct panvk_cmd_buffer *cmdbuf,
1904 const struct panvk_buffer *dst, VkDeviceSize offset,
1905 VkDeviceSize size, const void *data)
1906 {
1907 struct panvk_meta_copy_buf2buf_info info = {
1908 .src = pan_pool_upload_aligned(&cmdbuf->desc_pool.base, data, size, 4),
1909 .dst = panvk_buffer_gpu_ptr(dst, offset),
1910 };
1911
1912 unsigned log2blksz = ffs(sizeof(uint32_t)) - 1;
1913
1914 mali_ptr rsd =
1915 cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd;
1916
1917 mali_ptr pushconsts =
1918 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1919
1920 panvk_per_arch(cmd_close_batch)(cmdbuf);
1921
1922 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1923
1924 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1925
1926 mali_ptr tsd = batch->tls.gpu;
1927
1928 unsigned nblocks = size >> log2blksz;
1929 struct pan_compute_dim num_wg = { nblocks, 1, 1 };
1930 struct pan_compute_dim wg_sz = { 1, 1, 1};
1931 struct panfrost_ptr job =
1932 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1933 &batch->scoreboard,
1934 &num_wg, &wg_sz,
1935 0, 0, pushconsts, rsd, tsd);
1936
1937 util_dynarray_append(&batch->jobs, void *, job.cpu);
1938
1939 batch->blit.dst = dst->bo;
1940 panvk_per_arch(cmd_close_batch)(cmdbuf);
1941 }
1942
1943 void
panvk_per_arch(CmdUpdateBuffer)1944 panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer,
1945 VkBuffer dstBuffer,
1946 VkDeviceSize dstOffset,
1947 VkDeviceSize dataSize,
1948 const void *pData)
1949 {
1950 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1951 VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
1952
1953 panvk_meta_update_buf(cmdbuf, dst, dstOffset, dataSize, pData);
1954 }
1955
1956 void
panvk_per_arch(meta_copy_init)1957 panvk_per_arch(meta_copy_init)(struct panvk_physical_device *dev)
1958 {
1959 panvk_meta_copy_img2img_init(dev, false);
1960 panvk_meta_copy_img2img_init(dev, true);
1961 panvk_meta_copy_buf2img_init(dev);
1962 panvk_meta_copy_img2buf_init(dev);
1963 panvk_meta_copy_buf2buf_init(dev);
1964 panvk_meta_fill_buf_init(dev);
1965 }
1966