1 /*
2 * Copyright © 2021 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "gen_macros.h"
25
26 #include "nir/nir_builder.h"
27 #include "pan_encoder.h"
28 #include "pan_props.h"
29 #include "pan_shader.h"
30
31 #include "panvk_private.h"
32
33 static mali_ptr
panvk_meta_copy_img_emit_texture(struct pan_pool * desc_pool,const struct pan_image_view * view)34 panvk_meta_copy_img_emit_texture(struct pan_pool *desc_pool,
35 const struct pan_image_view *view)
36 {
37 struct panfrost_ptr texture = pan_pool_alloc_desc(desc_pool, TEXTURE);
38 size_t payload_size = GENX(panfrost_estimate_texture_payload_size)(view);
39 struct panfrost_ptr surfaces = pan_pool_alloc_aligned(
40 desc_pool, payload_size, pan_alignment(SURFACE_WITH_STRIDE));
41
42 GENX(panfrost_new_texture)(view, texture.cpu, &surfaces);
43
44 return texture.gpu;
45 }
46
47 static mali_ptr
panvk_meta_copy_img_emit_sampler(struct pan_pool * desc_pool)48 panvk_meta_copy_img_emit_sampler(struct pan_pool *desc_pool)
49 {
50 struct panfrost_ptr sampler = pan_pool_alloc_desc(desc_pool, SAMPLER);
51
52 pan_pack(sampler.cpu, SAMPLER, cfg) {
53 cfg.seamless_cube_map = false;
54 cfg.normalized_coordinates = false;
55 cfg.minify_nearest = true;
56 cfg.magnify_nearest = true;
57 }
58
59 return sampler.gpu;
60 }
61
62 static void
panvk_meta_copy_emit_varying(struct pan_pool * pool,mali_ptr coordinates,mali_ptr * varying_bufs,mali_ptr * varyings)63 panvk_meta_copy_emit_varying(struct pan_pool *pool, mali_ptr coordinates,
64 mali_ptr *varying_bufs, mali_ptr *varyings)
65 {
66 struct panfrost_ptr varying = pan_pool_alloc_desc(pool, ATTRIBUTE);
67 struct panfrost_ptr varying_buffer =
68 pan_pool_alloc_desc_array(pool, 2, ATTRIBUTE_BUFFER);
69
70 pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
71 cfg.pointer = coordinates;
72 cfg.stride = 4 * sizeof(uint32_t);
73 cfg.size = cfg.stride * 4;
74 }
75
76 /* Bifrost needs an empty desc to mark end of prefetching */
77 pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER), ATTRIBUTE_BUFFER,
78 cfg)
79 ;
80
81 pan_pack(varying.cpu, ATTRIBUTE, cfg) {
82 enum pipe_format f = PIPE_FORMAT_R32G32B32_FLOAT;
83
84 cfg.buffer_index = 0;
85 cfg.format = GENX(panfrost_format_from_pipe_format)(f)->hw;
86 }
87
88 *varyings = varying.gpu;
89 *varying_bufs = varying_buffer.gpu;
90 }
91
92 static void
panvk_meta_copy_emit_dcd(struct pan_pool * pool,mali_ptr src_coords,mali_ptr dst_coords,mali_ptr texture,mali_ptr sampler,mali_ptr vpd,mali_ptr tsd,mali_ptr rsd,mali_ptr push_constants,void * out)93 panvk_meta_copy_emit_dcd(struct pan_pool *pool, mali_ptr src_coords,
94 mali_ptr dst_coords, mali_ptr texture,
95 mali_ptr sampler, mali_ptr vpd, mali_ptr tsd,
96 mali_ptr rsd, mali_ptr push_constants, void *out)
97 {
98 pan_pack(out, DRAW, cfg) {
99 cfg.thread_storage = tsd;
100 cfg.state = rsd;
101 cfg.push_uniforms = push_constants;
102 cfg.position = dst_coords;
103 if (src_coords) {
104 panvk_meta_copy_emit_varying(pool, src_coords, &cfg.varying_buffers,
105 &cfg.varyings);
106 }
107 cfg.viewport = vpd;
108 cfg.textures = texture;
109 cfg.samplers = sampler;
110 }
111 }
112
113 static struct panfrost_ptr
panvk_meta_copy_emit_tiler_job(struct pan_pool * desc_pool,struct pan_jc * jc,mali_ptr src_coords,mali_ptr dst_coords,mali_ptr texture,mali_ptr sampler,mali_ptr push_constants,mali_ptr vpd,mali_ptr rsd,mali_ptr tsd,mali_ptr tiler)114 panvk_meta_copy_emit_tiler_job(struct pan_pool *desc_pool, struct pan_jc *jc,
115 mali_ptr src_coords, mali_ptr dst_coords,
116 mali_ptr texture, mali_ptr sampler,
117 mali_ptr push_constants, mali_ptr vpd,
118 mali_ptr rsd, mali_ptr tsd, mali_ptr tiler)
119 {
120 struct panfrost_ptr job = pan_pool_alloc_desc(desc_pool, TILER_JOB);
121
122 panvk_meta_copy_emit_dcd(desc_pool, src_coords, dst_coords, texture, sampler,
123 vpd, tsd, rsd, push_constants,
124 pan_section_ptr(job.cpu, TILER_JOB, DRAW));
125
126 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
127 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
128 cfg.index_count = 4;
129 cfg.job_task_split = 6;
130 }
131
132 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
133 cfg.constant = 1.0f;
134 }
135
136 void *invoc = pan_section_ptr(job.cpu, TILER_JOB, INVOCATION);
137 panfrost_pack_work_groups_compute(invoc, 1, 4, 1, 1, 1, 1, true, false);
138
139 pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg)
140 ;
141 pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) {
142 cfg.address = tiler;
143 }
144
145 pan_jc_add_job(desc_pool, jc, MALI_JOB_TYPE_TILER, false, false, 0, 0, &job,
146 false);
147 return job;
148 }
149
150 static struct panfrost_ptr
panvk_meta_copy_emit_compute_job(struct pan_pool * desc_pool,struct pan_jc * jc,const struct pan_compute_dim * num_wg,const struct pan_compute_dim * wg_sz,mali_ptr texture,mali_ptr sampler,mali_ptr push_constants,mali_ptr rsd,mali_ptr tsd)151 panvk_meta_copy_emit_compute_job(struct pan_pool *desc_pool, struct pan_jc *jc,
152 const struct pan_compute_dim *num_wg,
153 const struct pan_compute_dim *wg_sz,
154 mali_ptr texture, mali_ptr sampler,
155 mali_ptr push_constants, mali_ptr rsd,
156 mali_ptr tsd)
157 {
158 struct panfrost_ptr job = pan_pool_alloc_desc(desc_pool, COMPUTE_JOB);
159
160 void *invoc = pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION);
161 panfrost_pack_work_groups_compute(invoc, num_wg->x, num_wg->y, num_wg->z,
162 wg_sz->x, wg_sz->y, wg_sz->z, false,
163 false);
164
165 pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
166 cfg.job_task_split = 8;
167 }
168
169 panvk_meta_copy_emit_dcd(desc_pool, 0, 0, texture, sampler, 0, tsd, rsd,
170 push_constants,
171 pan_section_ptr(job.cpu, COMPUTE_JOB, DRAW));
172
173 pan_jc_add_job(desc_pool, jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0,
174 &job, false);
175 return job;
176 }
177
178 static uint32_t
panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize)179 panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize)
180 {
181 switch (texelsize) {
182 case 6:
183 return MALI_RGB16UI << 12;
184 case 8:
185 return MALI_RG32UI << 12;
186 case 12:
187 return MALI_RGB32UI << 12;
188 case 16:
189 return MALI_RGBA32UI << 12;
190 default:
191 unreachable("Invalid texel size\n");
192 }
193 }
194
195 static mali_ptr
panvk_meta_copy_to_img_emit_rsd(struct pan_pool * desc_pool,mali_ptr shader,const struct pan_shader_info * shader_info,enum pipe_format fmt,unsigned wrmask,bool from_img)196 panvk_meta_copy_to_img_emit_rsd(struct pan_pool *desc_pool, mali_ptr shader,
197 const struct pan_shader_info *shader_info,
198 enum pipe_format fmt, unsigned wrmask,
199 bool from_img)
200 {
201 struct panfrost_ptr rsd_ptr = pan_pool_alloc_desc_aggregate(
202 desc_pool, PAN_DESC(RENDERER_STATE), PAN_DESC_ARRAY(1, BLEND));
203
204 bool raw = util_format_get_blocksize(fmt) > 4;
205 unsigned fullmask = (1 << util_format_get_nr_components(fmt)) - 1;
206 bool partialwrite = fullmask != wrmask && !raw;
207 bool readstb = fullmask != wrmask && raw;
208
209 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
210 pan_shader_prepare_rsd(shader_info, shader, &cfg);
211 if (from_img) {
212 cfg.shader.varying_count = 1;
213 cfg.shader.texture_count = 1;
214 cfg.shader.sampler_count = 1;
215 }
216 cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
217 cfg.multisample_misc.sample_mask = UINT16_MAX;
218 cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
219 cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
220 cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
221 cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
222 cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
223 cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
224 cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
225 cfg.stencil_front.mask = 0xFF;
226 cfg.stencil_back = cfg.stencil_front;
227
228 cfg.properties.allow_forward_pixel_to_be_killed = true;
229 cfg.properties.allow_forward_pixel_to_kill = !partialwrite && !readstb;
230 cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
231 cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
232 }
233
234 pan_pack(rsd_ptr.cpu + pan_size(RENDERER_STATE), BLEND, cfg) {
235 cfg.round_to_fb_precision = true;
236 cfg.load_destination = partialwrite;
237 cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
238 cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
239 cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
240 cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
241 cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
242 cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
243 cfg.internal.mode =
244 partialwrite ? MALI_BLEND_MODE_FIXED_FUNCTION : MALI_BLEND_MODE_OPAQUE;
245 cfg.equation.color_mask = partialwrite ? wrmask : 0xf;
246 cfg.internal.fixed_function.num_comps = 4;
247 if (!raw) {
248 cfg.internal.fixed_function.conversion.memory_format =
249 GENX(panfrost_dithered_format_from_pipe_format)(fmt, false);
250 cfg.internal.fixed_function.conversion.register_format =
251 MALI_REGISTER_FILE_FORMAT_F32;
252 } else {
253 unsigned imgtexelsz = util_format_get_blocksize(fmt);
254
255 cfg.internal.fixed_function.conversion.memory_format =
256 panvk_meta_copy_img_bifrost_raw_format(imgtexelsz);
257 cfg.internal.fixed_function.conversion.register_format =
258 (imgtexelsz & 2) ? MALI_REGISTER_FILE_FORMAT_U16
259 : MALI_REGISTER_FILE_FORMAT_U32;
260 }
261 }
262
263 return rsd_ptr.gpu;
264 }
265
266 static mali_ptr
panvk_meta_copy_to_buf_emit_rsd(struct pan_pool * desc_pool,mali_ptr shader,const struct pan_shader_info * shader_info,bool from_img)267 panvk_meta_copy_to_buf_emit_rsd(struct pan_pool *desc_pool, mali_ptr shader,
268 const struct pan_shader_info *shader_info,
269 bool from_img)
270 {
271 struct panfrost_ptr rsd_ptr =
272 pan_pool_alloc_desc_aggregate(desc_pool, PAN_DESC(RENDERER_STATE));
273
274 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
275 pan_shader_prepare_rsd(shader_info, shader, &cfg);
276 if (from_img) {
277 cfg.shader.texture_count = 1;
278 cfg.shader.sampler_count = 1;
279 }
280 }
281
282 return rsd_ptr.gpu;
283 }
284
285 static mali_ptr
panvk_meta_copy_img2img_shader(struct panvk_device * dev,enum pipe_format srcfmt,enum pipe_format dstfmt,unsigned dstmask,unsigned texdim,bool texisarray,bool is_ms,struct pan_shader_info * shader_info)286 panvk_meta_copy_img2img_shader(struct panvk_device *dev,
287 enum pipe_format srcfmt, enum pipe_format dstfmt,
288 unsigned dstmask, unsigned texdim,
289 bool texisarray, bool is_ms,
290 struct pan_shader_info *shader_info)
291 {
292 struct pan_pool *bin_pool = &dev->meta.bin_pool.base;
293
294 nir_builder b = nir_builder_init_simple_shader(
295 MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(),
296 "panvk_meta_copy_img2img(srcfmt=%s,dstfmt=%s,%dD%s%s)",
297 util_format_name(srcfmt), util_format_name(dstfmt), texdim,
298 texisarray ? "[]" : "", is_ms ? ",ms" : "");
299
300 nir_variable *coord_var = nir_variable_create(
301 b.shader, nir_var_shader_in,
302 glsl_vector_type(GLSL_TYPE_FLOAT, texdim + texisarray), "coord");
303 coord_var->data.location = VARYING_SLOT_VAR0;
304 nir_def *coord = nir_f2u32(&b, nir_load_var(&b, coord_var));
305
306 nir_tex_instr *tex = nir_tex_instr_create(b.shader, is_ms ? 2 : 1);
307 tex->op = is_ms ? nir_texop_txf_ms : nir_texop_txf;
308 tex->texture_index = 0;
309 tex->is_array = texisarray;
310 tex->dest_type =
311 util_format_is_unorm(srcfmt) ? nir_type_float32 : nir_type_uint32;
312
313 switch (texdim) {
314 case 1:
315 assert(!is_ms);
316 tex->sampler_dim = GLSL_SAMPLER_DIM_1D;
317 break;
318 case 2:
319 tex->sampler_dim = is_ms ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
320 break;
321 case 3:
322 assert(!is_ms);
323 tex->sampler_dim = GLSL_SAMPLER_DIM_3D;
324 break;
325 default:
326 unreachable("Invalid texture dimension");
327 }
328
329 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord);
330 tex->coord_components = texdim + texisarray;
331
332 if (is_ms) {
333 tex->src[1] =
334 nir_tex_src_for_ssa(nir_tex_src_ms_index, nir_load_sample_id(&b));
335 }
336
337 nir_def_init(&tex->instr, &tex->def, 4,
338 nir_alu_type_get_type_size(tex->dest_type));
339 nir_builder_instr_insert(&b, &tex->instr);
340
341 nir_def *texel = &tex->def;
342
343 unsigned dstcompsz =
344 util_format_get_component_bits(dstfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
345 unsigned ndstcomps = util_format_get_nr_components(dstfmt);
346 const struct glsl_type *outtype = NULL;
347
348 if (srcfmt == PIPE_FORMAT_R5G6B5_UNORM && dstfmt == PIPE_FORMAT_R8G8_UNORM) {
349 nir_def *rgb = nir_f2u32(
350 &b, nir_fmul(&b, texel,
351 nir_vec3(&b, nir_imm_float(&b, 31), nir_imm_float(&b, 63),
352 nir_imm_float(&b, 31))));
353 nir_def *rg = nir_vec2(
354 &b,
355 nir_ior(&b, nir_channel(&b, rgb, 0),
356 nir_ishl(&b, nir_channel(&b, rgb, 1), nir_imm_int(&b, 5))),
357 nir_ior(&b, nir_ushr_imm(&b, nir_channel(&b, rgb, 1), 3),
358 nir_ishl(&b, nir_channel(&b, rgb, 2), nir_imm_int(&b, 3))));
359 rg = nir_iand_imm(&b, rg, 255);
360 texel = nir_fmul_imm(&b, nir_u2f32(&b, rg), 1.0 / 255);
361 outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
362 } else if (srcfmt == PIPE_FORMAT_R8G8_UNORM &&
363 dstfmt == PIPE_FORMAT_R5G6B5_UNORM) {
364 nir_def *rg = nir_f2u32(&b, nir_fmul_imm(&b, texel, 255));
365 nir_def *rgb = nir_vec3(
366 &b, nir_channel(&b, rg, 0),
367 nir_ior(&b, nir_ushr_imm(&b, nir_channel(&b, rg, 0), 5),
368 nir_ishl(&b, nir_channel(&b, rg, 1), nir_imm_int(&b, 3))),
369 nir_ushr_imm(&b, nir_channel(&b, rg, 1), 3));
370 rgb = nir_iand(&b, rgb,
371 nir_vec3(&b, nir_imm_int(&b, 31), nir_imm_int(&b, 63),
372 nir_imm_int(&b, 31)));
373 texel = nir_fmul(
374 &b, nir_u2f32(&b, rgb),
375 nir_vec3(&b, nir_imm_float(&b, 1.0 / 31), nir_imm_float(&b, 1.0 / 63),
376 nir_imm_float(&b, 1.0 / 31)));
377 outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
378 } else {
379 assert(srcfmt == dstfmt);
380 enum glsl_base_type basetype;
381 if (util_format_is_unorm(dstfmt)) {
382 basetype = GLSL_TYPE_FLOAT;
383 } else if (dstcompsz == 16) {
384 basetype = GLSL_TYPE_UINT16;
385 } else {
386 assert(dstcompsz == 32);
387 basetype = GLSL_TYPE_UINT;
388 }
389
390 if (dstcompsz == 16)
391 texel = nir_u2u16(&b, texel);
392
393 texel = nir_trim_vector(&b, texel, ndstcomps);
394 outtype = glsl_vector_type(basetype, ndstcomps);
395 }
396
397 nir_variable *out =
398 nir_variable_create(b.shader, nir_var_shader_out, outtype, "out");
399 out->data.location = FRAG_RESULT_DATA0;
400
401 unsigned fullmask = (1 << ndstcomps) - 1;
402 if (dstcompsz > 8 && dstmask != fullmask) {
403 nir_def *oldtexel = nir_load_var(&b, out);
404 nir_def *dstcomps[4];
405
406 for (unsigned i = 0; i < ndstcomps; i++) {
407 if (dstmask & BITFIELD_BIT(i))
408 dstcomps[i] = nir_channel(&b, texel, i);
409 else
410 dstcomps[i] = nir_channel(&b, oldtexel, i);
411 }
412
413 texel = nir_vec(&b, dstcomps, ndstcomps);
414 }
415
416 nir_store_var(&b, out, texel, 0xff);
417
418 struct panfrost_compile_inputs inputs = {
419 .gpu_id = dev->physical_device->kmod.props.gpu_prod_id,
420 .is_blit = true,
421 .no_ubo_to_push = true,
422 };
423
424 struct util_dynarray binary;
425
426 util_dynarray_init(&binary, NULL);
427 pan_shader_preprocess(b.shader, inputs.gpu_id);
428 NIR_PASS_V(b.shader, GENX(pan_inline_rt_conversion), &dstfmt);
429 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
430
431 shader_info->fs.sample_shading = is_ms;
432
433 mali_ptr shader =
434 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
435
436 util_dynarray_fini(&binary);
437 ralloc_free(b.shader);
438
439 return shader;
440 }
441
442 static enum pipe_format
panvk_meta_copy_img_format(enum pipe_format fmt)443 panvk_meta_copy_img_format(enum pipe_format fmt)
444 {
445 /* We can't use a non-compressed format when handling a tiled/AFBC
446 * compressed format because the tile size differ (4x4 blocks for
447 * compressed formats and 16x16 texels for non-compressed ones).
448 */
449 assert(!util_format_is_compressed(fmt));
450
451 /* Pick blendable formats when we can, otherwise pick the UINT variant
452 * matching the texel size.
453 */
454 switch (util_format_get_blocksize(fmt)) {
455 case 16:
456 return PIPE_FORMAT_R32G32B32A32_UINT;
457 case 12:
458 return PIPE_FORMAT_R32G32B32_UINT;
459 case 8:
460 return PIPE_FORMAT_R32G32_UINT;
461 case 6:
462 return PIPE_FORMAT_R16G16B16_UINT;
463 case 4:
464 return PIPE_FORMAT_R8G8B8A8_UNORM;
465 case 2:
466 return (fmt == PIPE_FORMAT_R5G6B5_UNORM ||
467 fmt == PIPE_FORMAT_B5G6R5_UNORM)
468 ? PIPE_FORMAT_R5G6B5_UNORM
469 : PIPE_FORMAT_R8G8_UNORM;
470 case 1:
471 return PIPE_FORMAT_R8_UNORM;
472 default:
473 unreachable("Unsupported format\n");
474 }
475 }
476
477 struct panvk_meta_copy_img2img_format_info {
478 enum pipe_format srcfmt;
479 enum pipe_format dstfmt;
480 unsigned dstmask;
481 } PACKED;
482
483 static const struct panvk_meta_copy_img2img_format_info
484 panvk_meta_copy_img2img_fmts[] = {
485 {PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R8_UNORM, 0x1},
486 {PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
487 {PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
488 {PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
489 {PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
490 /* Z24S8(depth) */
491 {PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x7},
492 /* Z24S8(stencil) */
493 {PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x8},
494 {PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0xf},
495 {PIPE_FORMAT_R16G16B16_UINT, PIPE_FORMAT_R16G16B16_UINT, 0x7},
496 {PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x3},
497 /* Z32S8X24(depth) */
498 {PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x1},
499 /* Z32S8X24(stencil) */
500 {PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x2},
501 {PIPE_FORMAT_R32G32B32_UINT, PIPE_FORMAT_R32G32B32_UINT, 0x7},
502 {PIPE_FORMAT_R32G32B32A32_UINT, PIPE_FORMAT_R32G32B32A32_UINT, 0xf},
503 };
504
505 static unsigned
panvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key)506 panvk_meta_copy_img2img_format_idx(
507 struct panvk_meta_copy_img2img_format_info key)
508 {
509 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) ==
510 PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
511
512 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
513 if (!memcmp(&key, &panvk_meta_copy_img2img_fmts[i], sizeof(key)))
514 return i;
515 }
516
517 unreachable("Invalid image format\n");
518 }
519
520 static unsigned
panvk_meta_copy_img_mask(enum pipe_format imgfmt,VkImageAspectFlags aspectMask)521 panvk_meta_copy_img_mask(enum pipe_format imgfmt, VkImageAspectFlags aspectMask)
522 {
523 if (aspectMask != VK_IMAGE_ASPECT_DEPTH_BIT &&
524 aspectMask != VK_IMAGE_ASPECT_STENCIL_BIT) {
525 enum pipe_format outfmt = panvk_meta_copy_img_format(imgfmt);
526
527 return (1 << util_format_get_nr_components(outfmt)) - 1;
528 }
529
530 switch (imgfmt) {
531 case PIPE_FORMAT_S8_UINT:
532 return 1;
533 case PIPE_FORMAT_Z16_UNORM:
534 return 3;
535 case PIPE_FORMAT_Z16_UNORM_S8_UINT:
536 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 3 : 8;
537 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
538 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 7 : 8;
539 case PIPE_FORMAT_Z24X8_UNORM:
540 assert(aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT);
541 return 7;
542 case PIPE_FORMAT_Z32_FLOAT:
543 return 0xf;
544 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
545 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 2;
546 default:
547 unreachable("Invalid depth format\n");
548 }
549 }
550
551 static void
panvk_meta_copy_img2img(struct panvk_cmd_buffer * cmdbuf,const struct panvk_image * src,const struct panvk_image * dst,const VkImageCopy2 * region)552 panvk_meta_copy_img2img(struct panvk_cmd_buffer *cmdbuf,
553 const struct panvk_image *src,
554 const struct panvk_image *dst,
555 const VkImageCopy2 *region)
556 {
557 struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
558 struct panvk_meta_copy_img2img_format_info key = {
559 .srcfmt = panvk_meta_copy_img_format(src->pimage.layout.format),
560 .dstfmt = panvk_meta_copy_img_format(dst->pimage.layout.format),
561 .dstmask = panvk_meta_copy_img_mask(dst->pimage.layout.format,
562 region->dstSubresource.aspectMask),
563 };
564
565 assert(src->pimage.layout.nr_samples == dst->pimage.layout.nr_samples);
566
567 unsigned texdimidx = panvk_meta_copy_tex_type(
568 src->pimage.layout.dim, src->pimage.layout.array_size > 1);
569 unsigned fmtidx = panvk_meta_copy_img2img_format_idx(key);
570 unsigned ms = dst->pimage.layout.nr_samples > 1 ? 1 : 0;
571
572 mali_ptr rsd =
573 cmdbuf->device->meta.copy.img2img[ms][texdimidx][fmtidx]
574 .rsd;
575
576 struct pan_image_view srcview = {
577 .format = key.srcfmt,
578 .dim = src->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE
579 ? MALI_TEXTURE_DIMENSION_2D
580 : src->pimage.layout.dim,
581 .planes[0] = &src->pimage,
582 .nr_samples = src->pimage.layout.nr_samples,
583 .first_level = region->srcSubresource.mipLevel,
584 .last_level = region->srcSubresource.mipLevel,
585 .first_layer = region->srcSubresource.baseArrayLayer,
586 .last_layer = region->srcSubresource.baseArrayLayer +
587 region->srcSubresource.layerCount - 1,
588 .swizzle = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
589 PIPE_SWIZZLE_W},
590 };
591
592 struct pan_image_view dstview = {
593 .format = key.dstfmt,
594 .dim = MALI_TEXTURE_DIMENSION_2D,
595 .planes[0] = &dst->pimage,
596 .nr_samples = dst->pimage.layout.nr_samples,
597 .first_level = region->dstSubresource.mipLevel,
598 .last_level = region->dstSubresource.mipLevel,
599 .swizzle = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
600 PIPE_SWIZZLE_W},
601 };
602
603 unsigned minx = MAX2(region->dstOffset.x, 0);
604 unsigned miny = MAX2(region->dstOffset.y, 0);
605 unsigned maxx = MAX2(region->dstOffset.x + region->extent.width - 1, 0);
606 unsigned maxy = MAX2(region->dstOffset.y + region->extent.height - 1, 0);
607
608 mali_ptr vpd = panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
609 minx, miny, maxx, maxy);
610
611 float dst_rect[] = {
612 minx, miny, 0.0, 1.0, maxx + 1, miny, 0.0, 1.0,
613 minx, maxy + 1, 0.0, 1.0, maxx + 1, maxy + 1, 0.0, 1.0,
614 };
615
616 mali_ptr dst_coords = pan_pool_upload_aligned(
617 &cmdbuf->desc_pool.base, dst_rect, sizeof(dst_rect), 64);
618
619 /* TODO: don't force preloads of dst resources if unneeded */
620
621 unsigned width =
622 u_minify(dst->pimage.layout.width, region->dstSubresource.mipLevel);
623 unsigned height =
624 u_minify(dst->pimage.layout.height, region->dstSubresource.mipLevel);
625 cmdbuf->state.fb.crc_valid[0] = false;
626 *fbinfo = (struct pan_fb_info){
627 .tile_buf_budget = panfrost_query_optimal_tib_size(
628 cmdbuf->device->physical_device->model),
629 .width = width,
630 .height = height,
631 .extent.minx = minx & ~31,
632 .extent.miny = miny & ~31,
633 .extent.maxx = MIN2(ALIGN_POT(maxx + 1, 32), width) - 1,
634 .extent.maxy = MIN2(ALIGN_POT(maxy + 1, 32), height) - 1,
635 .nr_samples = dst->pimage.layout.nr_samples,
636 .rt_count = 1,
637 .rts[0].view = &dstview,
638 .rts[0].preload = true,
639 .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
640 };
641
642 mali_ptr texture =
643 panvk_meta_copy_img_emit_texture(&cmdbuf->desc_pool.base, &srcview);
644 mali_ptr sampler = panvk_meta_copy_img_emit_sampler(&cmdbuf->desc_pool.base);
645
646 panvk_per_arch(cmd_close_batch)(cmdbuf);
647
648 minx = MAX2(region->srcOffset.x, 0);
649 miny = MAX2(region->srcOffset.y, 0);
650 maxx = MAX2(region->srcOffset.x + region->extent.width - 1, 0);
651 maxy = MAX2(region->srcOffset.y + region->extent.height - 1, 0);
652 assert(region->dstOffset.z >= 0);
653
654 unsigned first_src_layer = MAX2(0, region->srcOffset.z);
655 unsigned first_dst_layer =
656 MAX2(region->dstSubresource.baseArrayLayer, region->dstOffset.z);
657 unsigned nlayers =
658 MAX2(region->dstSubresource.layerCount, region->extent.depth);
659 for (unsigned l = 0; l < nlayers; l++) {
660 unsigned src_l = l + first_src_layer;
661 float src_rect[] = {
662 minx, miny, src_l, 1.0, maxx + 1, miny, src_l, 1.0,
663 minx, maxy + 1, src_l, 1.0, maxx + 1, maxy + 1, src_l, 1.0,
664 };
665
666 mali_ptr src_coords = pan_pool_upload_aligned(
667 &cmdbuf->desc_pool.base, src_rect, sizeof(src_rect), 64);
668
669 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
670
671 dstview.first_layer = dstview.last_layer = l + first_dst_layer;
672 batch->blit.src = src->bo;
673 batch->blit.dst = dst->bo;
674 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
675 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
676 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
677
678 mali_ptr tsd, tiler;
679
680 tsd = batch->tls.gpu;
681 tiler = batch->tiler.descs.gpu;
682
683 struct panfrost_ptr job;
684
685 job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base, &batch->jc,
686 src_coords, dst_coords, texture,
687 sampler, 0, vpd, rsd, tsd, tiler);
688
689 util_dynarray_append(&batch->jobs, void *, job.cpu);
690 panvk_per_arch(cmd_close_batch)(cmdbuf);
691 }
692 }
693
694 static void
panvk_meta_copy_img2img_init(struct panvk_device * dev,bool is_ms)695 panvk_meta_copy_img2img_init(struct panvk_device *dev, bool is_ms)
696 {
697 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) ==
698 PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
699
700 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
701 for (unsigned texdim = 1; texdim <= 3; texdim++) {
702 unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
703 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
704
705 /* No MSAA on 1D/3D textures */
706 if (texdim != 2 && is_ms)
707 continue;
708
709 struct pan_shader_info shader_info;
710 mali_ptr shader = panvk_meta_copy_img2img_shader(
711 dev, panvk_meta_copy_img2img_fmts[i].srcfmt,
712 panvk_meta_copy_img2img_fmts[i].dstfmt,
713 panvk_meta_copy_img2img_fmts[i].dstmask, texdim, false, is_ms,
714 &shader_info);
715 dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
716 panvk_meta_copy_to_img_emit_rsd(
717 &dev->meta.desc_pool.base, shader, &shader_info,
718 panvk_meta_copy_img2img_fmts[i].dstfmt,
719 panvk_meta_copy_img2img_fmts[i].dstmask, true);
720 if (texdim == 3)
721 continue;
722
723 memset(&shader_info, 0, sizeof(shader_info));
724 texdimidx = panvk_meta_copy_tex_type(texdim, true);
725 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
726 shader = panvk_meta_copy_img2img_shader(
727 dev, panvk_meta_copy_img2img_fmts[i].srcfmt,
728 panvk_meta_copy_img2img_fmts[i].dstfmt,
729 panvk_meta_copy_img2img_fmts[i].dstmask, texdim, true, is_ms,
730 &shader_info);
731 dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
732 panvk_meta_copy_to_img_emit_rsd(
733 &dev->meta.desc_pool.base, shader, &shader_info,
734 panvk_meta_copy_img2img_fmts[i].dstfmt,
735 panvk_meta_copy_img2img_fmts[i].dstmask, true);
736 }
737 }
738 }
739
740 void
panvk_per_arch(CmdCopyImage2)741 panvk_per_arch(CmdCopyImage2)(VkCommandBuffer commandBuffer,
742 const VkCopyImageInfo2 *pCopyImageInfo)
743 {
744 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
745 VK_FROM_HANDLE(panvk_image, dst, pCopyImageInfo->dstImage);
746 VK_FROM_HANDLE(panvk_image, src, pCopyImageInfo->srcImage);
747
748 for (unsigned i = 0; i < pCopyImageInfo->regionCount; i++) {
749 panvk_meta_copy_img2img(cmdbuf, src, dst, &pCopyImageInfo->pRegions[i]);
750 }
751 }
752
753 static unsigned
panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt,unsigned mask)754 panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt, unsigned mask)
755 {
756 unsigned imgtexelsz = util_format_get_blocksize(imgfmt);
757 unsigned nbufcomps = util_bitcount(mask);
758
759 if (nbufcomps == util_format_get_nr_components(imgfmt))
760 return imgtexelsz;
761
762 /* Special case for Z24 buffers which are not tightly packed */
763 if (mask == 7 && imgtexelsz == 4)
764 return 4;
765
766 /* Special case for S8 extraction from Z32_S8X24 */
767 if (mask == 2 && imgtexelsz == 8)
768 return 1;
769
770 unsigned compsz =
771 util_format_get_component_bits(imgfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
772
773 assert(!(compsz % 8));
774
775 return nbufcomps * compsz / 8;
776 }
777
778 static enum pipe_format
panvk_meta_copy_buf2img_format(enum pipe_format imgfmt)779 panvk_meta_copy_buf2img_format(enum pipe_format imgfmt)
780 {
781 /* Pick blendable formats when we can, and the FLOAT variant matching the
782 * texelsize otherwise.
783 */
784 switch (util_format_get_blocksize(imgfmt)) {
785 case 1:
786 return PIPE_FORMAT_R8_UNORM;
787 /* AFBC stores things differently for RGB565,
788 * we can't simply map to R8G8 in that case */
789 case 2:
790 return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
791 imgfmt == PIPE_FORMAT_B5G6R5_UNORM)
792 ? PIPE_FORMAT_R5G6B5_UNORM
793 : PIPE_FORMAT_R8G8_UNORM;
794 case 4:
795 return PIPE_FORMAT_R8G8B8A8_UNORM;
796 case 6:
797 return PIPE_FORMAT_R16G16B16_UINT;
798 case 8:
799 return PIPE_FORMAT_R32G32_UINT;
800 case 12:
801 return PIPE_FORMAT_R32G32B32_UINT;
802 case 16:
803 return PIPE_FORMAT_R32G32B32A32_UINT;
804 default:
805 unreachable("Invalid format\n");
806 }
807 }
808
809 struct panvk_meta_copy_format_info {
810 enum pipe_format imgfmt;
811 unsigned mask;
812 } PACKED;
813
814 static const struct panvk_meta_copy_format_info panvk_meta_copy_buf2img_fmts[] =
815 {
816 {PIPE_FORMAT_R8_UNORM, 0x1},
817 {PIPE_FORMAT_R8G8_UNORM, 0x3},
818 {PIPE_FORMAT_R5G6B5_UNORM, 0x7},
819 {PIPE_FORMAT_R8G8B8A8_UNORM, 0xf},
820 {PIPE_FORMAT_R16G16B16_UINT, 0x7},
821 {PIPE_FORMAT_R32G32_UINT, 0x3},
822 {PIPE_FORMAT_R32G32B32_UINT, 0x7},
823 {PIPE_FORMAT_R32G32B32A32_UINT, 0xf},
824 /* S8 -> Z24S8 */
825 {PIPE_FORMAT_R8G8B8A8_UNORM, 0x8},
826 /* S8 -> Z32_S8X24 */
827 {PIPE_FORMAT_R32G32_UINT, 0x2},
828 /* Z24X8 -> Z24S8 */
829 {PIPE_FORMAT_R8G8B8A8_UNORM, 0x7},
830 /* Z32 -> Z32_S8X24 */
831 {PIPE_FORMAT_R32G32_UINT, 0x1},
832 };
833
834 struct panvk_meta_copy_buf2img_info {
835 struct {
836 mali_ptr ptr;
837 struct {
838 unsigned line;
839 unsigned surf;
840 } stride;
841 } buf;
842 } PACKED;
843
844 #define panvk_meta_copy_buf2img_get_info_field(b, field) \
845 nir_load_push_constant( \
846 (b), 1, sizeof(((struct panvk_meta_copy_buf2img_info *)0)->field) * 8, \
847 nir_imm_int(b, 0), \
848 .base = offsetof(struct panvk_meta_copy_buf2img_info, field), \
849 .range = ~0)
850
851 static mali_ptr
panvk_meta_copy_buf2img_shader(struct panvk_device * dev,struct panvk_meta_copy_format_info key,struct pan_shader_info * shader_info)852 panvk_meta_copy_buf2img_shader(struct panvk_device *dev,
853 struct panvk_meta_copy_format_info key,
854 struct pan_shader_info *shader_info)
855 {
856 struct pan_pool *bin_pool = &dev->meta.bin_pool.base;
857
858 nir_builder b = nir_builder_init_simple_shader(
859 MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(),
860 "panvk_meta_copy_buf2img(imgfmt=%s,mask=%x)",
861 util_format_name(key.imgfmt), key.mask);
862
863 nir_variable *coord_var =
864 nir_variable_create(b.shader, nir_var_shader_in,
865 glsl_vector_type(GLSL_TYPE_FLOAT, 3), "coord");
866 coord_var->data.location = VARYING_SLOT_VAR0;
867 nir_def *coord = nir_load_var(&b, coord_var);
868
869 coord = nir_f2u32(&b, coord);
870
871 nir_def *bufptr = panvk_meta_copy_buf2img_get_info_field(&b, buf.ptr);
872 nir_def *buflinestride =
873 panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.line);
874 nir_def *bufsurfstride =
875 panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.surf);
876
877 unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
878 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
879 unsigned writemask = key.mask;
880
881 nir_def *offset =
882 nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
883 offset = nir_iadd(&b, offset,
884 nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
885 offset = nir_iadd(&b, offset,
886 nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
887 bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
888
889 unsigned imgcompsz =
890 (imgtexelsz <= 4 && key.imgfmt != PIPE_FORMAT_R5G6B5_UNORM)
891 ? 1
892 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
893
894 unsigned nimgcomps = imgtexelsz / imgcompsz;
895 unsigned bufcompsz = MIN2(buftexelsz, imgcompsz);
896 unsigned nbufcomps = buftexelsz / bufcompsz;
897
898 assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
899 assert(nbufcomps <= 4 && nimgcomps <= 4);
900
901 nir_def *texel =
902 nir_load_global(&b, bufptr, bufcompsz, nbufcomps, bufcompsz * 8);
903
904 enum glsl_base_type basetype;
905 if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
906 texel = nir_vec3(
907 &b, nir_iand_imm(&b, texel, BITFIELD_MASK(5)),
908 nir_iand_imm(&b, nir_ushr_imm(&b, texel, 5), BITFIELD_MASK(6)),
909 nir_iand_imm(&b, nir_ushr_imm(&b, texel, 11), BITFIELD_MASK(5)));
910 texel = nir_fmul(
911 &b, nir_u2f32(&b, texel),
912 nir_vec3(&b, nir_imm_float(&b, 1.0f / 31),
913 nir_imm_float(&b, 1.0f / 63), nir_imm_float(&b, 1.0f / 31)));
914 nimgcomps = 3;
915 basetype = GLSL_TYPE_FLOAT;
916 } else if (imgcompsz == 1) {
917 assert(bufcompsz == 1);
918 /* Blendable formats are unorm and the fixed-function blend unit
919 * takes float values.
920 */
921 texel = nir_fmul_imm(&b, nir_u2f32(&b, texel), 1.0f / 255);
922 basetype = GLSL_TYPE_FLOAT;
923 } else {
924 texel = nir_u2uN(&b, texel, imgcompsz * 8);
925 basetype = imgcompsz == 2 ? GLSL_TYPE_UINT16 : GLSL_TYPE_UINT;
926 }
927
928 /* We always pass the texel using 32-bit regs for now */
929 nir_variable *out =
930 nir_variable_create(b.shader, nir_var_shader_out,
931 glsl_vector_type(basetype, nimgcomps), "out");
932 out->data.location = FRAG_RESULT_DATA0;
933
934 uint16_t fullmask = (1 << nimgcomps) - 1;
935
936 assert(fullmask >= writemask);
937
938 if (fullmask != writemask) {
939 unsigned first_written_comp = ffs(writemask) - 1;
940 nir_def *oldtexel = NULL;
941 if (imgcompsz > 1)
942 oldtexel = nir_load_var(&b, out);
943
944 nir_def *texel_comps[4];
945 for (unsigned i = 0; i < nimgcomps; i++) {
946 if (writemask & BITFIELD_BIT(i))
947 texel_comps[i] = nir_channel(&b, texel, i - first_written_comp);
948 else if (imgcompsz > 1)
949 texel_comps[i] = nir_channel(&b, oldtexel, i);
950 else
951 texel_comps[i] = nir_imm_intN_t(&b, 0, texel->bit_size);
952 }
953
954 texel = nir_vec(&b, texel_comps, nimgcomps);
955 }
956
957 nir_store_var(&b, out, texel, 0xff);
958
959 struct panfrost_compile_inputs inputs = {
960 .gpu_id = dev->physical_device->kmod.props.gpu_prod_id,
961 .is_blit = true,
962 .no_ubo_to_push = true,
963 };
964
965 struct util_dynarray binary;
966
967 util_dynarray_init(&binary, NULL);
968 pan_shader_preprocess(b.shader, inputs.gpu_id);
969
970 enum pipe_format rt_formats[8] = {key.imgfmt};
971 NIR_PASS_V(b.shader, GENX(pan_inline_rt_conversion), rt_formats);
972
973 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
974 shader_info->push.count =
975 DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2img_info), 4);
976
977 mali_ptr shader =
978 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
979
980 util_dynarray_fini(&binary);
981 ralloc_free(b.shader);
982
983 return shader;
984 }
985
986 static unsigned
panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key)987 panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key)
988 {
989 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
990 if (!memcmp(&key, &panvk_meta_copy_buf2img_fmts[i], sizeof(key)))
991 return i;
992 }
993
994 unreachable("Invalid image format\n");
995 }
996
997 static void
panvk_meta_copy_buf2img(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * buf,const struct panvk_image * img,const VkBufferImageCopy2 * region)998 panvk_meta_copy_buf2img(struct panvk_cmd_buffer *cmdbuf,
999 const struct panvk_buffer *buf,
1000 const struct panvk_image *img,
1001 const VkBufferImageCopy2 *region)
1002 {
1003 struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
1004 unsigned minx = MAX2(region->imageOffset.x, 0);
1005 unsigned miny = MAX2(region->imageOffset.y, 0);
1006 unsigned maxx =
1007 MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0);
1008 unsigned maxy =
1009 MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1010
1011 mali_ptr vpd = panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
1012 minx, miny, maxx, maxy);
1013
1014 float dst_rect[] = {
1015 minx, miny, 0.0, 1.0, maxx + 1, miny, 0.0, 1.0,
1016 minx, maxy + 1, 0.0, 1.0, maxx + 1, maxy + 1, 0.0, 1.0,
1017 };
1018 mali_ptr dst_coords = pan_pool_upload_aligned(
1019 &cmdbuf->desc_pool.base, dst_rect, sizeof(dst_rect), 64);
1020
1021 struct panvk_meta_copy_format_info key = {
1022 .imgfmt = panvk_meta_copy_buf2img_format(img->pimage.layout.format),
1023 .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1024 region->imageSubresource.aspectMask),
1025 };
1026
1027 unsigned fmtidx = panvk_meta_copy_buf2img_format_idx(key);
1028
1029 mali_ptr rsd =
1030 cmdbuf->device->meta.copy.buf2img[fmtidx].rsd;
1031
1032 const struct vk_image_buffer_layout buflayout =
1033 vk_image_buffer_copy_layout(&img->vk, region);
1034 struct panvk_meta_copy_buf2img_info info = {
1035 .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset),
1036 .buf.stride.line = buflayout.row_stride_B,
1037 .buf.stride.surf = buflayout.image_stride_B,
1038 };
1039
1040 mali_ptr pushconsts =
1041 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1042
1043 struct pan_image_view view = {
1044 .format = key.imgfmt,
1045 .dim = MALI_TEXTURE_DIMENSION_2D,
1046 .planes[0] = &img->pimage,
1047 .nr_samples = img->pimage.layout.nr_samples,
1048 .first_level = region->imageSubresource.mipLevel,
1049 .last_level = region->imageSubresource.mipLevel,
1050 .swizzle = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
1051 PIPE_SWIZZLE_W},
1052 };
1053
1054 /* TODO: don't force preloads of dst resources if unneeded */
1055 cmdbuf->state.fb.crc_valid[0] = false;
1056 *fbinfo = (struct pan_fb_info){
1057 .tile_buf_budget = panfrost_query_optimal_tib_size(
1058 cmdbuf->device->physical_device->model),
1059 .width =
1060 u_minify(img->pimage.layout.width, region->imageSubresource.mipLevel),
1061 .height =
1062 u_minify(img->pimage.layout.height, region->imageSubresource.mipLevel),
1063 .extent.minx = minx,
1064 .extent.maxx = maxx,
1065 .extent.miny = miny,
1066 .extent.maxy = maxy,
1067 .nr_samples = 1,
1068 .rt_count = 1,
1069 .rts[0].view = &view,
1070 .rts[0].preload = true,
1071 .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
1072 };
1073
1074 panvk_per_arch(cmd_close_batch)(cmdbuf);
1075
1076 assert(region->imageSubresource.layerCount == 1 ||
1077 region->imageExtent.depth == 1);
1078 assert(region->imageOffset.z >= 0);
1079 unsigned first_layer =
1080 MAX2(region->imageSubresource.baseArrayLayer, region->imageOffset.z);
1081 unsigned nlayers =
1082 MAX2(region->imageSubresource.layerCount, region->imageExtent.depth);
1083 for (unsigned l = 0; l < nlayers; l++) {
1084 float src_rect[] = {
1085 0,
1086 0,
1087 l,
1088 1.0,
1089 region->imageExtent.width,
1090 0,
1091 l,
1092 1.0,
1093 0,
1094 region->imageExtent.height,
1095 l,
1096 1.0,
1097 region->imageExtent.width,
1098 region->imageExtent.height,
1099 l,
1100 1.0,
1101 };
1102
1103 mali_ptr src_coords = pan_pool_upload_aligned(
1104 &cmdbuf->desc_pool.base, src_rect, sizeof(src_rect), 64);
1105
1106 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1107
1108 view.first_layer = view.last_layer = l + first_layer;
1109 batch->blit.src = buf->bo;
1110 batch->blit.dst = img->bo;
1111 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
1112 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
1113 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
1114
1115 mali_ptr tsd, tiler;
1116
1117 tsd = batch->tls.gpu;
1118 tiler = batch->tiler.descs.gpu;
1119
1120 struct panfrost_ptr job;
1121
1122 job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base, &batch->jc,
1123 src_coords, dst_coords, 0, 0,
1124 pushconsts, vpd, rsd, tsd, tiler);
1125
1126 util_dynarray_append(&batch->jobs, void *, job.cpu);
1127 panvk_per_arch(cmd_close_batch)(cmdbuf);
1128 }
1129 }
1130
1131 static void
panvk_meta_copy_buf2img_init(struct panvk_device * dev)1132 panvk_meta_copy_buf2img_init(struct panvk_device *dev)
1133 {
1134 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_buf2img_fmts) ==
1135 PANVK_META_COPY_BUF2IMG_NUM_FORMATS);
1136
1137 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
1138 struct pan_shader_info shader_info;
1139 mali_ptr shader = panvk_meta_copy_buf2img_shader(
1140 dev, panvk_meta_copy_buf2img_fmts[i], &shader_info);
1141 dev->meta.copy.buf2img[i].rsd = panvk_meta_copy_to_img_emit_rsd(
1142 &dev->meta.desc_pool.base, shader, &shader_info,
1143 panvk_meta_copy_buf2img_fmts[i].imgfmt,
1144 panvk_meta_copy_buf2img_fmts[i].mask, false);
1145 }
1146 }
1147
1148 void
panvk_per_arch(CmdCopyBufferToImage2)1149 panvk_per_arch(CmdCopyBufferToImage2)(
1150 VkCommandBuffer commandBuffer,
1151 const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
1152 {
1153 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1154 VK_FROM_HANDLE(panvk_buffer, buf, pCopyBufferToImageInfo->srcBuffer);
1155 VK_FROM_HANDLE(panvk_image, img, pCopyBufferToImageInfo->dstImage);
1156
1157 for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; i++) {
1158 panvk_meta_copy_buf2img(cmdbuf, buf, img,
1159 &pCopyBufferToImageInfo->pRegions[i]);
1160 }
1161 }
1162
1163 static const struct panvk_meta_copy_format_info panvk_meta_copy_img2buf_fmts[] =
1164 {
1165 {PIPE_FORMAT_R8_UINT, 0x1},
1166 {PIPE_FORMAT_R8G8_UINT, 0x3},
1167 {PIPE_FORMAT_R5G6B5_UNORM, 0x7},
1168 {PIPE_FORMAT_R8G8B8A8_UINT, 0xf},
1169 {PIPE_FORMAT_R16G16B16_UINT, 0x7},
1170 {PIPE_FORMAT_R32G32_UINT, 0x3},
1171 {PIPE_FORMAT_R32G32B32_UINT, 0x7},
1172 {PIPE_FORMAT_R32G32B32A32_UINT, 0xf},
1173 /* S8 -> Z24S8 */
1174 {PIPE_FORMAT_R8G8B8A8_UINT, 0x8},
1175 /* S8 -> Z32_S8X24 */
1176 {PIPE_FORMAT_R32G32_UINT, 0x2},
1177 /* Z24X8 -> Z24S8 */
1178 {PIPE_FORMAT_R8G8B8A8_UINT, 0x7},
1179 /* Z32 -> Z32_S8X24 */
1180 {PIPE_FORMAT_R32G32_UINT, 0x1},
1181 };
1182
1183 static enum pipe_format
panvk_meta_copy_img2buf_format(enum pipe_format imgfmt)1184 panvk_meta_copy_img2buf_format(enum pipe_format imgfmt)
1185 {
1186 /* Pick blendable formats when we can, and the FLOAT variant matching the
1187 * texelsize otherwise.
1188 */
1189 switch (util_format_get_blocksize(imgfmt)) {
1190 case 1:
1191 return PIPE_FORMAT_R8_UINT;
1192 /* AFBC stores things differently for RGB565,
1193 * we can't simply map to R8G8 in that case */
1194 case 2:
1195 return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
1196 imgfmt == PIPE_FORMAT_B5G6R5_UNORM)
1197 ? PIPE_FORMAT_R5G6B5_UNORM
1198 : PIPE_FORMAT_R8G8_UINT;
1199 case 4:
1200 return PIPE_FORMAT_R8G8B8A8_UINT;
1201 case 6:
1202 return PIPE_FORMAT_R16G16B16_UINT;
1203 case 8:
1204 return PIPE_FORMAT_R32G32_UINT;
1205 case 12:
1206 return PIPE_FORMAT_R32G32B32_UINT;
1207 case 16:
1208 return PIPE_FORMAT_R32G32B32A32_UINT;
1209 default:
1210 unreachable("Invalid format\n");
1211 }
1212 }
1213
1214 struct panvk_meta_copy_img2buf_info {
1215 struct {
1216 mali_ptr ptr;
1217 struct {
1218 unsigned line;
1219 unsigned surf;
1220 } stride;
1221 } buf;
1222 struct {
1223 struct {
1224 unsigned x, y, z;
1225 } offset;
1226 struct {
1227 unsigned minx, miny, maxx, maxy;
1228 } extent;
1229 } img;
1230 } PACKED;
1231
1232 #define panvk_meta_copy_img2buf_get_info_field(b, field) \
1233 nir_load_push_constant( \
1234 (b), 1, sizeof(((struct panvk_meta_copy_img2buf_info *)0)->field) * 8, \
1235 nir_imm_int(b, 0), \
1236 .base = offsetof(struct panvk_meta_copy_img2buf_info, field), \
1237 .range = ~0)
1238
1239 static mali_ptr
panvk_meta_copy_img2buf_shader(struct panvk_device * dev,struct panvk_meta_copy_format_info key,unsigned texdim,unsigned texisarray,struct pan_shader_info * shader_info)1240 panvk_meta_copy_img2buf_shader(struct panvk_device *dev,
1241 struct panvk_meta_copy_format_info key,
1242 unsigned texdim, unsigned texisarray,
1243 struct pan_shader_info *shader_info)
1244 {
1245 unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
1246 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1247 struct pan_pool *bin_pool = &dev->meta.bin_pool.base;
1248
1249 /* FIXME: Won't work on compute queues, but we can't do that with
1250 * a compute shader if the destination is an AFBC surface.
1251 */
1252 nir_builder b = nir_builder_init_simple_shader(
1253 MESA_SHADER_COMPUTE, GENX(pan_shader_get_compiler_options)(),
1254 "panvk_meta_copy_img2buf(dim=%dD%s,imgfmt=%s,mask=%x)", texdim,
1255 texisarray ? "[]" : "", util_format_name(key.imgfmt), key.mask);
1256
1257 nir_def *coord = nir_load_global_invocation_id(&b, 32);
1258 nir_def *bufptr = panvk_meta_copy_img2buf_get_info_field(&b, buf.ptr);
1259 nir_def *buflinestride =
1260 panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.line);
1261 nir_def *bufsurfstride =
1262 panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.surf);
1263
1264 nir_def *imgminx =
1265 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.minx);
1266 nir_def *imgminy =
1267 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.miny);
1268 nir_def *imgmaxx =
1269 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxx);
1270 nir_def *imgmaxy =
1271 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxy);
1272
1273 nir_def *imgcoords, *inbounds;
1274
1275 switch (texdim + texisarray) {
1276 case 1:
1277 imgcoords =
1278 nir_iadd(&b, nir_channel(&b, coord, 0),
1279 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x));
1280 inbounds =
1281 nir_iand(&b, nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1282 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx));
1283 break;
1284 case 2:
1285 imgcoords = nir_vec2(
1286 &b,
1287 nir_iadd(&b, nir_channel(&b, coord, 0),
1288 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1289 nir_iadd(&b, nir_channel(&b, coord, 1),
1290 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1291 inbounds = nir_iand(
1292 &b,
1293 nir_iand(&b, nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1294 nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1295 nir_iand(&b, nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1296 nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1297 break;
1298 case 3:
1299 imgcoords = nir_vec3(
1300 &b,
1301 nir_iadd(&b, nir_channel(&b, coord, 0),
1302 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1303 nir_iadd(&b, nir_channel(&b, coord, 1),
1304 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)),
1305 nir_iadd(&b, nir_channel(&b, coord, 2),
1306 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1307 inbounds = nir_iand(
1308 &b,
1309 nir_iand(&b, nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1310 nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1311 nir_iand(&b, nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1312 nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1313 break;
1314 default:
1315 unreachable("Invalid texture dimension\n");
1316 }
1317
1318 nir_push_if(&b, inbounds);
1319
1320 /* FIXME: doesn't work for tiled+compressed formats since blocks are 4x4
1321 * blocks instead of 16x16 texels in that case, and there's nothing we can
1322 * do to force the tile size to 4x4 in the render path.
1323 * This being said, compressed textures are not compatible with AFBC, so we
1324 * could use a compute shader arranging the blocks properly.
1325 */
1326 nir_def *offset =
1327 nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
1328 offset = nir_iadd(&b, offset,
1329 nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
1330 offset = nir_iadd(&b, offset,
1331 nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
1332 bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
1333
1334 unsigned imgcompsz =
1335 imgtexelsz <= 4 ? 1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
1336 unsigned nimgcomps = imgtexelsz / imgcompsz;
1337 assert(nimgcomps <= 4);
1338
1339 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
1340 tex->op = nir_texop_txf;
1341 tex->texture_index = 0;
1342 tex->is_array = texisarray;
1343 tex->dest_type =
1344 util_format_is_unorm(key.imgfmt) ? nir_type_float32 : nir_type_uint32;
1345
1346 switch (texdim) {
1347 case 1:
1348 tex->sampler_dim = GLSL_SAMPLER_DIM_1D;
1349 break;
1350 case 2:
1351 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1352 break;
1353 case 3:
1354 tex->sampler_dim = GLSL_SAMPLER_DIM_3D;
1355 break;
1356 default:
1357 unreachable("Invalid texture dimension");
1358 }
1359
1360 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, imgcoords);
1361 tex->coord_components = texdim + texisarray;
1362 nir_def_init(&tex->instr, &tex->def, 4,
1363 nir_alu_type_get_type_size(tex->dest_type));
1364 nir_builder_instr_insert(&b, &tex->instr);
1365
1366 nir_def *texel = &tex->def;
1367
1368 unsigned fullmask = (1 << util_format_get_nr_components(key.imgfmt)) - 1;
1369 unsigned nbufcomps = util_bitcount(fullmask);
1370 if (key.mask != fullmask) {
1371 nir_def *bufcomps[4];
1372 nbufcomps = 0;
1373 for (unsigned i = 0; i < nimgcomps; i++) {
1374 if (key.mask & BITFIELD_BIT(i))
1375 bufcomps[nbufcomps++] = nir_channel(&b, texel, i);
1376 }
1377
1378 texel = nir_vec(&b, bufcomps, nbufcomps);
1379 }
1380
1381 unsigned bufcompsz = buftexelsz / nbufcomps;
1382
1383 if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
1384 texel = nir_fmul(&b, texel,
1385 nir_vec3(&b, nir_imm_float(&b, 31),
1386 nir_imm_float(&b, 63), nir_imm_float(&b, 31)));
1387 texel = nir_f2u16(&b, texel);
1388 texel = nir_ior(
1389 &b, nir_channel(&b, texel, 0),
1390 nir_ior(&b,
1391 nir_ishl(&b, nir_channel(&b, texel, 1), nir_imm_int(&b, 5)),
1392 nir_ishl(&b, nir_channel(&b, texel, 2), nir_imm_int(&b, 11))));
1393 imgcompsz = 2;
1394 bufcompsz = 2;
1395 nbufcomps = 1;
1396 nimgcomps = 1;
1397 } else if (imgcompsz == 1) {
1398 nir_def *packed = nir_channel(&b, texel, 0);
1399 for (unsigned i = 1; i < nbufcomps; i++) {
1400 packed = nir_ior(
1401 &b, packed,
1402 nir_ishl(&b, nir_iand_imm(&b, nir_channel(&b, texel, i), 0xff),
1403 nir_imm_int(&b, i * 8)));
1404 }
1405 texel = packed;
1406
1407 bufcompsz = nbufcomps == 3 ? 4 : nbufcomps;
1408 nbufcomps = 1;
1409 }
1410
1411 assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
1412 assert(nbufcomps <= 4 && nimgcomps <= 4);
1413 texel = nir_u2uN(&b, texel, bufcompsz * 8);
1414
1415 nir_store_global(&b, bufptr, bufcompsz, texel, (1 << nbufcomps) - 1);
1416 nir_pop_if(&b, NULL);
1417
1418 struct panfrost_compile_inputs inputs = {
1419 .gpu_id = dev->physical_device->kmod.props.gpu_prod_id,
1420 .is_blit = true,
1421 .no_ubo_to_push = true,
1422 };
1423
1424 struct util_dynarray binary;
1425
1426 util_dynarray_init(&binary, NULL);
1427 pan_shader_preprocess(b.shader, inputs.gpu_id);
1428 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1429
1430 shader_info->push.count =
1431 DIV_ROUND_UP(sizeof(struct panvk_meta_copy_img2buf_info), 4);
1432
1433 mali_ptr shader =
1434 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1435
1436 util_dynarray_fini(&binary);
1437 ralloc_free(b.shader);
1438
1439 return shader;
1440 }
1441
1442 static unsigned
panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key)1443 panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key)
1444 {
1445 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1446 if (!memcmp(&key, &panvk_meta_copy_img2buf_fmts[i], sizeof(key)))
1447 return i;
1448 }
1449
1450 unreachable("Invalid texel size\n");
1451 }
1452
1453 static void
panvk_meta_copy_img2buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * buf,const struct panvk_image * img,const VkBufferImageCopy2 * region)1454 panvk_meta_copy_img2buf(struct panvk_cmd_buffer *cmdbuf,
1455 const struct panvk_buffer *buf,
1456 const struct panvk_image *img,
1457 const VkBufferImageCopy2 *region)
1458 {
1459 struct panvk_meta_copy_format_info key = {
1460 .imgfmt = panvk_meta_copy_img2buf_format(img->pimage.layout.format),
1461 .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1462 region->imageSubresource.aspectMask),
1463 };
1464 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1465 unsigned texdimidx = panvk_meta_copy_tex_type(
1466 img->pimage.layout.dim, img->pimage.layout.array_size > 1);
1467 unsigned fmtidx = panvk_meta_copy_img2buf_format_idx(key);
1468
1469 mali_ptr rsd =
1470 cmdbuf->device->meta.copy.img2buf[texdimidx][fmtidx].rsd;
1471
1472 struct panvk_meta_copy_img2buf_info info = {
1473 .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset),
1474 .buf.stride.line =
1475 (region->bufferRowLength ?: region->imageExtent.width) * buftexelsz,
1476 .img.offset.x = MAX2(region->imageOffset.x & ~15, 0),
1477 .img.extent.minx = MAX2(region->imageOffset.x, 0),
1478 .img.extent.maxx =
1479 MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0),
1480 };
1481
1482 if (img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D) {
1483 info.img.extent.maxy = region->imageSubresource.layerCount - 1;
1484 } else {
1485 info.img.offset.y = MAX2(region->imageOffset.y & ~15, 0);
1486 info.img.offset.z = MAX2(region->imageOffset.z, 0);
1487 info.img.extent.miny = MAX2(region->imageOffset.y, 0);
1488 info.img.extent.maxy =
1489 MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1490 }
1491
1492 info.buf.stride.surf =
1493 (region->bufferImageHeight ?: region->imageExtent.height) *
1494 info.buf.stride.line;
1495
1496 mali_ptr pushconsts =
1497 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1498
1499 struct pan_image_view view = {
1500 .format = key.imgfmt,
1501 .dim = img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE
1502 ? MALI_TEXTURE_DIMENSION_2D
1503 : img->pimage.layout.dim,
1504 .planes[0] = &img->pimage,
1505 .nr_samples = img->pimage.layout.nr_samples,
1506 .first_level = region->imageSubresource.mipLevel,
1507 .last_level = region->imageSubresource.mipLevel,
1508 .first_layer = region->imageSubresource.baseArrayLayer,
1509 .last_layer = region->imageSubresource.baseArrayLayer +
1510 region->imageSubresource.layerCount - 1,
1511 .swizzle = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
1512 PIPE_SWIZZLE_W},
1513 };
1514
1515 mali_ptr texture =
1516 panvk_meta_copy_img_emit_texture(&cmdbuf->desc_pool.base, &view);
1517 mali_ptr sampler = panvk_meta_copy_img_emit_sampler(&cmdbuf->desc_pool.base);
1518
1519 panvk_per_arch(cmd_close_batch)(cmdbuf);
1520
1521 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1522
1523 struct pan_tls_info tlsinfo = {0};
1524
1525 batch->blit.src = img->bo;
1526 batch->blit.dst = buf->bo;
1527 batch->tls = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE);
1528 GENX(pan_emit_tls)(&tlsinfo, batch->tls.cpu);
1529
1530 mali_ptr tsd = batch->tls.gpu;
1531
1532 struct pan_compute_dim wg_sz = {
1533 16,
1534 img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1 : 16,
1535 1,
1536 };
1537
1538 struct pan_compute_dim num_wg = {
1539 (ALIGN_POT(info.img.extent.maxx + 1, 16) - info.img.offset.x) / 16,
1540 img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D
1541 ? region->imageSubresource.layerCount
1542 : (ALIGN_POT(info.img.extent.maxy + 1, 16) - info.img.offset.y) / 16,
1543 img->pimage.layout.dim != MALI_TEXTURE_DIMENSION_1D
1544 ? MAX2(region->imageSubresource.layerCount, region->imageExtent.depth)
1545 : 1,
1546 };
1547
1548 struct panfrost_ptr job = panvk_meta_copy_emit_compute_job(
1549 &cmdbuf->desc_pool.base, &batch->jc, &num_wg, &wg_sz, texture, sampler,
1550 pushconsts, rsd, tsd);
1551
1552 util_dynarray_append(&batch->jobs, void *, job.cpu);
1553
1554 panvk_per_arch(cmd_close_batch)(cmdbuf);
1555 }
1556
1557 static void
panvk_meta_copy_img2buf_init(struct panvk_device * dev)1558 panvk_meta_copy_img2buf_init(struct panvk_device *dev)
1559 {
1560 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2buf_fmts) ==
1561 PANVK_META_COPY_IMG2BUF_NUM_FORMATS);
1562
1563 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1564 for (unsigned texdim = 1; texdim <= 3; texdim++) {
1565 unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
1566 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1567
1568 struct pan_shader_info shader_info;
1569 mali_ptr shader = panvk_meta_copy_img2buf_shader(
1570 dev, panvk_meta_copy_img2buf_fmts[i], texdim, false, &shader_info);
1571 dev->meta.copy.img2buf[texdimidx][i].rsd =
1572 panvk_meta_copy_to_buf_emit_rsd(&dev->meta.desc_pool.base, shader,
1573 &shader_info, true);
1574
1575 if (texdim == 3)
1576 continue;
1577
1578 memset(&shader_info, 0, sizeof(shader_info));
1579 texdimidx = panvk_meta_copy_tex_type(texdim, true);
1580 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1581 shader = panvk_meta_copy_img2buf_shader(
1582 dev, panvk_meta_copy_img2buf_fmts[i], texdim, true, &shader_info);
1583 dev->meta.copy.img2buf[texdimidx][i].rsd =
1584 panvk_meta_copy_to_buf_emit_rsd(&dev->meta.desc_pool.base, shader,
1585 &shader_info, true);
1586 }
1587 }
1588 }
1589
1590 void
panvk_per_arch(CmdCopyImageToBuffer2)1591 panvk_per_arch(CmdCopyImageToBuffer2)(
1592 VkCommandBuffer commandBuffer,
1593 const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1594 {
1595 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1596 VK_FROM_HANDLE(panvk_buffer, buf, pCopyImageToBufferInfo->dstBuffer);
1597 VK_FROM_HANDLE(panvk_image, img, pCopyImageToBufferInfo->srcImage);
1598
1599 for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; i++) {
1600 panvk_meta_copy_img2buf(cmdbuf, buf, img,
1601 &pCopyImageToBufferInfo->pRegions[i]);
1602 }
1603 }
1604
1605 struct panvk_meta_copy_buf2buf_info {
1606 mali_ptr src;
1607 mali_ptr dst;
1608 } PACKED;
1609
1610 #define panvk_meta_copy_buf2buf_get_info_field(b, field) \
1611 nir_load_push_constant( \
1612 (b), 1, sizeof(((struct panvk_meta_copy_buf2buf_info *)0)->field) * 8, \
1613 nir_imm_int(b, 0), \
1614 .base = offsetof(struct panvk_meta_copy_buf2buf_info, field), \
1615 .range = ~0)
1616
1617 static mali_ptr
panvk_meta_copy_buf2buf_shader(struct panvk_device * dev,unsigned blksz,struct pan_shader_info * shader_info)1618 panvk_meta_copy_buf2buf_shader(struct panvk_device *dev,
1619 unsigned blksz,
1620 struct pan_shader_info *shader_info)
1621 {
1622 struct pan_pool *bin_pool = &dev->meta.bin_pool.base;
1623
1624 /* FIXME: Won't work on compute queues, but we can't do that with
1625 * a compute shader if the destination is an AFBC surface.
1626 */
1627 nir_builder b = nir_builder_init_simple_shader(
1628 MESA_SHADER_COMPUTE, GENX(pan_shader_get_compiler_options)(),
1629 "panvk_meta_copy_buf2buf(blksz=%d)", blksz);
1630
1631 nir_def *coord = nir_load_global_invocation_id(&b, 32);
1632
1633 nir_def *offset = nir_u2u64(
1634 &b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, blksz)));
1635 nir_def *srcptr =
1636 nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, src), offset);
1637 nir_def *dstptr =
1638 nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, dst), offset);
1639
1640 unsigned compsz = blksz < 4 ? blksz : 4;
1641 unsigned ncomps = blksz / compsz;
1642 nir_store_global(&b, dstptr, blksz,
1643 nir_load_global(&b, srcptr, blksz, ncomps, compsz * 8),
1644 (1 << ncomps) - 1);
1645
1646 struct panfrost_compile_inputs inputs = {
1647 .gpu_id = dev->physical_device->kmod.props.gpu_prod_id,
1648 .is_blit = true,
1649 .no_ubo_to_push = true,
1650 };
1651
1652 struct util_dynarray binary;
1653
1654 util_dynarray_init(&binary, NULL);
1655 pan_shader_preprocess(b.shader, inputs.gpu_id);
1656 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1657
1658 shader_info->push.count =
1659 DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2buf_info), 4);
1660
1661 mali_ptr shader =
1662 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1663
1664 util_dynarray_fini(&binary);
1665 ralloc_free(b.shader);
1666
1667 return shader;
1668 }
1669
1670 static void
panvk_meta_copy_buf2buf_init(struct panvk_device * dev)1671 panvk_meta_copy_buf2buf_init(struct panvk_device *dev)
1672 {
1673 for (unsigned i = 0; i < ARRAY_SIZE(dev->meta.copy.buf2buf); i++) {
1674 struct pan_shader_info shader_info;
1675 mali_ptr shader =
1676 panvk_meta_copy_buf2buf_shader(dev, 1 << i, &shader_info);
1677 dev->meta.copy.buf2buf[i].rsd = panvk_meta_copy_to_buf_emit_rsd(
1678 &dev->meta.desc_pool.base, shader, &shader_info, false);
1679 }
1680 }
1681
1682 static void
panvk_meta_copy_buf2buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * src,const struct panvk_buffer * dst,const VkBufferCopy2 * region)1683 panvk_meta_copy_buf2buf(struct panvk_cmd_buffer *cmdbuf,
1684 const struct panvk_buffer *src,
1685 const struct panvk_buffer *dst,
1686 const VkBufferCopy2 *region)
1687 {
1688 struct panvk_meta_copy_buf2buf_info info = {
1689 .src = panvk_buffer_gpu_ptr(src, region->srcOffset),
1690 .dst = panvk_buffer_gpu_ptr(dst, region->dstOffset),
1691 };
1692
1693 unsigned alignment = ffs((info.src | info.dst | region->size) & 15);
1694 unsigned log2blksz = alignment ? alignment - 1 : 4;
1695
1696 assert(log2blksz <
1697 ARRAY_SIZE(cmdbuf->device->meta.copy.buf2buf));
1698 mali_ptr rsd =
1699 cmdbuf->device->meta.copy.buf2buf[log2blksz].rsd;
1700
1701 mali_ptr pushconsts =
1702 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1703
1704 panvk_per_arch(cmd_close_batch)(cmdbuf);
1705
1706 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1707
1708 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1709
1710 mali_ptr tsd = batch->tls.gpu;
1711
1712 unsigned nblocks = region->size >> log2blksz;
1713 struct pan_compute_dim num_wg = {nblocks, 1, 1};
1714 struct pan_compute_dim wg_sz = {1, 1, 1};
1715 struct panfrost_ptr job = panvk_meta_copy_emit_compute_job(
1716 &cmdbuf->desc_pool.base, &batch->jc, &num_wg, &wg_sz, 0, 0, pushconsts,
1717 rsd, tsd);
1718
1719 util_dynarray_append(&batch->jobs, void *, job.cpu);
1720
1721 batch->blit.src = src->bo;
1722 batch->blit.dst = dst->bo;
1723 panvk_per_arch(cmd_close_batch)(cmdbuf);
1724 }
1725
1726 void
panvk_per_arch(CmdCopyBuffer2)1727 panvk_per_arch(CmdCopyBuffer2)(VkCommandBuffer commandBuffer,
1728 const VkCopyBufferInfo2 *pCopyBufferInfo)
1729 {
1730 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1731 VK_FROM_HANDLE(panvk_buffer, src, pCopyBufferInfo->srcBuffer);
1732 VK_FROM_HANDLE(panvk_buffer, dst, pCopyBufferInfo->dstBuffer);
1733
1734 for (unsigned i = 0; i < pCopyBufferInfo->regionCount; i++) {
1735 panvk_meta_copy_buf2buf(cmdbuf, src, dst, &pCopyBufferInfo->pRegions[i]);
1736 }
1737 }
1738
1739 struct panvk_meta_fill_buf_info {
1740 mali_ptr start;
1741 uint32_t val;
1742 } PACKED;
1743
1744 #define panvk_meta_fill_buf_get_info_field(b, field) \
1745 nir_load_push_constant( \
1746 (b), 1, sizeof(((struct panvk_meta_fill_buf_info *)0)->field) * 8, \
1747 nir_imm_int(b, 0), \
1748 .base = offsetof(struct panvk_meta_fill_buf_info, field), .range = ~0)
1749
1750 static mali_ptr
panvk_meta_fill_buf_shader(struct panvk_device * dev,struct pan_shader_info * shader_info)1751 panvk_meta_fill_buf_shader(struct panvk_device *dev,
1752 struct pan_shader_info *shader_info)
1753 {
1754 struct pan_pool *bin_pool = &dev->meta.bin_pool.base;
1755
1756 /* FIXME: Won't work on compute queues, but we can't do that with
1757 * a compute shader if the destination is an AFBC surface.
1758 */
1759 nir_builder b = nir_builder_init_simple_shader(
1760 MESA_SHADER_COMPUTE, GENX(pan_shader_get_compiler_options)(),
1761 "panvk_meta_fill_buf()");
1762
1763 nir_def *coord = nir_load_global_invocation_id(&b, 32);
1764
1765 nir_def *offset = nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0),
1766 nir_imm_int(&b, sizeof(uint32_t))));
1767 nir_def *ptr =
1768 nir_iadd(&b, panvk_meta_fill_buf_get_info_field(&b, start), offset);
1769 nir_def *val = panvk_meta_fill_buf_get_info_field(&b, val);
1770
1771 nir_store_global(&b, ptr, sizeof(uint32_t), val, 1);
1772
1773 struct panfrost_compile_inputs inputs = {
1774 .gpu_id = dev->physical_device->kmod.props.gpu_prod_id,
1775 .is_blit = true,
1776 .no_ubo_to_push = true,
1777 };
1778
1779 struct util_dynarray binary;
1780
1781 util_dynarray_init(&binary, NULL);
1782 pan_shader_preprocess(b.shader, inputs.gpu_id);
1783 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1784
1785 shader_info->push.count =
1786 DIV_ROUND_UP(sizeof(struct panvk_meta_fill_buf_info), 4);
1787
1788 mali_ptr shader =
1789 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1790
1791 util_dynarray_fini(&binary);
1792 ralloc_free(b.shader);
1793
1794 return shader;
1795 }
1796
1797 static mali_ptr
panvk_meta_fill_buf_emit_rsd(struct panvk_device * dev)1798 panvk_meta_fill_buf_emit_rsd(struct panvk_device *dev)
1799 {
1800 struct pan_pool *desc_pool = &dev->meta.desc_pool.base;
1801 struct pan_shader_info shader_info;
1802
1803 mali_ptr shader = panvk_meta_fill_buf_shader(dev, &shader_info);
1804
1805 struct panfrost_ptr rsd_ptr =
1806 pan_pool_alloc_desc_aggregate(desc_pool, PAN_DESC(RENDERER_STATE));
1807
1808 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
1809 pan_shader_prepare_rsd(&shader_info, shader, &cfg);
1810 }
1811
1812 return rsd_ptr.gpu;
1813 }
1814
1815 static void
panvk_meta_fill_buf_init(struct panvk_device * dev)1816 panvk_meta_fill_buf_init(struct panvk_device *dev)
1817 {
1818 dev->meta.copy.fillbuf.rsd = panvk_meta_fill_buf_emit_rsd(dev);
1819 }
1820
1821 static void
panvk_meta_fill_buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * dst,VkDeviceSize size,VkDeviceSize offset,uint32_t val)1822 panvk_meta_fill_buf(struct panvk_cmd_buffer *cmdbuf,
1823 const struct panvk_buffer *dst, VkDeviceSize size,
1824 VkDeviceSize offset, uint32_t val)
1825 {
1826 struct panvk_meta_fill_buf_info info = {
1827 .start = panvk_buffer_gpu_ptr(dst, offset),
1828 .val = val,
1829 };
1830 size = panvk_buffer_range(dst, offset, size);
1831
1832 /* From the Vulkan spec:
1833 *
1834 * "size is the number of bytes to fill, and must be either a multiple
1835 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
1836 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
1837 * buffer is not a multiple of 4, then the nearest smaller multiple is
1838 * used."
1839 */
1840 size &= ~3ull;
1841
1842 assert(!(offset & 3) && !(size & 3));
1843
1844 unsigned nwords = size / sizeof(uint32_t);
1845 mali_ptr rsd = cmdbuf->device->meta.copy.fillbuf.rsd;
1846
1847 mali_ptr pushconsts =
1848 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1849
1850 panvk_per_arch(cmd_close_batch)(cmdbuf);
1851
1852 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1853
1854 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1855
1856 mali_ptr tsd = batch->tls.gpu;
1857
1858 struct pan_compute_dim num_wg = {nwords, 1, 1};
1859 struct pan_compute_dim wg_sz = {1, 1, 1};
1860 struct panfrost_ptr job = panvk_meta_copy_emit_compute_job(
1861 &cmdbuf->desc_pool.base, &batch->jc, &num_wg, &wg_sz, 0, 0, pushconsts,
1862 rsd, tsd);
1863
1864 util_dynarray_append(&batch->jobs, void *, job.cpu);
1865
1866 batch->blit.dst = dst->bo;
1867 panvk_per_arch(cmd_close_batch)(cmdbuf);
1868 }
1869
1870 void
panvk_per_arch(CmdFillBuffer)1871 panvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer,
1872 VkDeviceSize dstOffset, VkDeviceSize fillSize,
1873 uint32_t data)
1874 {
1875 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1876 VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
1877
1878 panvk_meta_fill_buf(cmdbuf, dst, fillSize, dstOffset, data);
1879 }
1880
1881 static void
panvk_meta_update_buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * dst,VkDeviceSize offset,VkDeviceSize size,const void * data)1882 panvk_meta_update_buf(struct panvk_cmd_buffer *cmdbuf,
1883 const struct panvk_buffer *dst, VkDeviceSize offset,
1884 VkDeviceSize size, const void *data)
1885 {
1886 struct panvk_meta_copy_buf2buf_info info = {
1887 .src = pan_pool_upload_aligned(&cmdbuf->desc_pool.base, data, size, 4),
1888 .dst = panvk_buffer_gpu_ptr(dst, offset),
1889 };
1890
1891 unsigned log2blksz = ffs(sizeof(uint32_t)) - 1;
1892
1893 mali_ptr rsd =
1894 cmdbuf->device->meta.copy.buf2buf[log2blksz].rsd;
1895
1896 mali_ptr pushconsts =
1897 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1898
1899 panvk_per_arch(cmd_close_batch)(cmdbuf);
1900
1901 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1902
1903 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1904
1905 mali_ptr tsd = batch->tls.gpu;
1906
1907 unsigned nblocks = size >> log2blksz;
1908 struct pan_compute_dim num_wg = {nblocks, 1, 1};
1909 struct pan_compute_dim wg_sz = {1, 1, 1};
1910 struct panfrost_ptr job = panvk_meta_copy_emit_compute_job(
1911 &cmdbuf->desc_pool.base, &batch->jc, &num_wg, &wg_sz, 0, 0, pushconsts,
1912 rsd, tsd);
1913
1914 util_dynarray_append(&batch->jobs, void *, job.cpu);
1915
1916 batch->blit.dst = dst->bo;
1917 panvk_per_arch(cmd_close_batch)(cmdbuf);
1918 }
1919
1920 void
panvk_per_arch(CmdUpdateBuffer)1921 panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer,
1922 VkBuffer dstBuffer, VkDeviceSize dstOffset,
1923 VkDeviceSize dataSize, const void *pData)
1924 {
1925 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1926 VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
1927
1928 panvk_meta_update_buf(cmdbuf, dst, dstOffset, dataSize, pData);
1929 }
1930
1931 void
panvk_per_arch(meta_copy_init)1932 panvk_per_arch(meta_copy_init)(struct panvk_device *dev)
1933 {
1934 panvk_meta_copy_img2img_init(dev, false);
1935 panvk_meta_copy_img2img_init(dev, true);
1936 panvk_meta_copy_buf2img_init(dev);
1937 panvk_meta_copy_img2buf_init(dev);
1938 panvk_meta_copy_buf2buf_init(dev);
1939 panvk_meta_fill_buf_init(dev);
1940 }
1941