1 /*
2 * Copyright © 2021 Collabora Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "panvk_cmd_alloc.h"
7 #include "panvk_cmd_fb_preload.h"
8 #include "panvk_image_view.h"
9 #include "panvk_meta.h"
10 #include "panvk_shader.h"
11
12 #include "nir_builder.h"
13
14 #include "pan_shader.h"
15
16 struct panvk_fb_preload_shader_key {
17 enum panvk_meta_object_key_type type;
18 VkImageViewType view_type;
19 VkSampleCountFlagBits samples;
20 VkImageAspectFlags aspects;
21 bool needs_layer_id;
22 struct {
23 nir_alu_type type;
24 } color[8];
25 };
26
27 static nir_def *
texel_fetch(nir_builder * b,VkImageViewType view_type,nir_alu_type reg_type,unsigned tex_idx,nir_def * sample_id,nir_def * coords)28 texel_fetch(nir_builder *b, VkImageViewType view_type,
29 nir_alu_type reg_type, unsigned tex_idx,
30 nir_def *sample_id, nir_def *coords)
31 {
32 nir_tex_instr *tex = nir_tex_instr_create(b->shader, sample_id ? 3 : 2);
33
34 tex->op = sample_id ? nir_texop_txf_ms : nir_texop_txf;
35 tex->dest_type = reg_type;
36 tex->is_array = vk_image_view_type_is_array(view_type);
37 tex->sampler_dim = sample_id ? GLSL_SAMPLER_DIM_MS
38 : vk_image_view_type_to_sampler_dim(view_type);
39 tex->coord_components = coords->num_components;
40 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coords);
41 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(b, 0));
42
43 if (sample_id)
44 tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_ms_index, sample_id);
45
46 #if PAN_ARCH <= 7
47 tex->sampler_index = 0;
48 tex->texture_index = tex_idx;
49 #else
50 tex->sampler_index = pan_res_handle(0, 0);
51 tex->texture_index = pan_res_handle(0, tex_idx + 1);
52 #endif
53
54 nir_def_init(&tex->instr, &tex->def, 4, 32);
55 nir_builder_instr_insert(b, &tex->instr);
56
57 return &tex->def;
58 }
59
60 static nir_variable *
color_output_var(nir_builder * b,VkImageViewType view_type,VkImageAspectFlags aspect,VkSampleCountFlagBits samples,nir_alu_type fmt_type,unsigned rt)61 color_output_var(nir_builder *b, VkImageViewType view_type,
62 VkImageAspectFlags aspect, VkSampleCountFlagBits samples,
63 nir_alu_type fmt_type, unsigned rt)
64 {
65 enum glsl_base_type base_type =
66 nir_get_glsl_base_type_for_nir_type(fmt_type);
67 const struct glsl_type *var_type = glsl_vector_type(base_type, 4);
68 static const char *var_names[] = {
69 "gl_FragData[0]", "gl_FragData[1]", "gl_FragData[2]", "gl_FragData[3]",
70 "gl_FragData[4]", "gl_FragData[5]", "gl_FragData[6]", "gl_FragData[7]",
71 };
72
73 assert(rt < ARRAY_SIZE(var_names));
74
75 nir_variable *var = nir_variable_create(b->shader, nir_var_shader_out,
76 var_type, var_names[rt]);
77 var->data.location = FRAG_RESULT_DATA0 + rt;
78
79 return var;
80 }
81
82 static nir_def *
get_layer_id(nir_builder * b)83 get_layer_id(nir_builder *b)
84 {
85 #if PAN_ARCH <= 7
86 return nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0));
87 #else
88 return nir_load_layer_id(b);
89 #endif
90 }
91
92 static nir_shader *
get_preload_nir_shader(const struct panvk_fb_preload_shader_key * key)93 get_preload_nir_shader(const struct panvk_fb_preload_shader_key *key)
94 {
95 nir_builder builder = nir_builder_init_simple_shader(
96 MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(),
97 "panvk-meta-preload");
98 nir_builder *b = &builder;
99 nir_def *sample_id =
100 key->samples != VK_SAMPLE_COUNT_1_BIT ? nir_load_sample_id(b) : NULL;
101 nir_def *coords = nir_u2u32(b, nir_load_pixel_coord(b));
102
103 if (key->view_type == VK_IMAGE_VIEW_TYPE_2D_ARRAY ||
104 key->view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY ||
105 key->view_type == VK_IMAGE_VIEW_TYPE_CUBE ||
106 key->view_type == VK_IMAGE_VIEW_TYPE_3D) {
107 coords =
108 nir_vec3(b, nir_channel(b, coords, 0), nir_channel(b, coords, 1),
109 key->needs_layer_id ? get_layer_id(b) : nir_imm_int(b, 0));
110 }
111
112 if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
113 for (uint32_t i = 0; i < ARRAY_SIZE(key->color); i++) {
114 if (key->color[i].type == nir_type_invalid)
115 continue;
116
117 nir_def *texel = texel_fetch(b, key->view_type, key->color[i].type, i,
118 sample_id, coords);
119
120 nir_store_output(
121 b, texel, nir_imm_int(b, 0), .base = i,
122 .src_type = key->color[i].type,
123 .io_semantics.location = FRAG_RESULT_DATA0 + i,
124 .io_semantics.num_slots = 1,
125 .write_mask = nir_component_mask(texel->num_components));
126 }
127 }
128
129 if (key->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
130 nir_def *texel = texel_fetch(b, key->view_type, nir_type_float32, 0,
131 sample_id, coords);
132
133 nir_store_output(b, nir_channel(b, texel, 0), nir_imm_int(b, 0),
134 .base = 0, .src_type = nir_type_float32,
135 .io_semantics.location = FRAG_RESULT_DEPTH,
136 .io_semantics.num_slots = 1,
137 .write_mask = nir_component_mask(1));
138 }
139
140 if (key->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
141 nir_def *texel = texel_fetch(
142 b, key->view_type, nir_type_uint32,
143 key->aspects & VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 0, sample_id, coords);
144
145 nir_store_output(b, nir_channel(b, texel, 0), nir_imm_int(b, 0),
146 .base = 0, .src_type = nir_type_uint32,
147 .io_semantics.location = FRAG_RESULT_STENCIL,
148 .io_semantics.num_slots = 1,
149 .write_mask = nir_component_mask(1));
150 }
151
152 return b->shader;
153 }
154
155 static VkResult
get_preload_shader(struct panvk_device * dev,const struct panvk_fb_preload_shader_key * key,struct panvk_internal_shader ** shader_out)156 get_preload_shader(struct panvk_device *dev,
157 const struct panvk_fb_preload_shader_key *key,
158 struct panvk_internal_shader **shader_out)
159 {
160 struct panvk_physical_device *phys_dev =
161 to_panvk_physical_device(dev->vk.physical);
162 struct panvk_internal_shader *shader;
163 VkShaderEXT shader_handle = (VkShaderEXT)vk_meta_lookup_object(
164 &dev->meta, VK_OBJECT_TYPE_SHADER_EXT, key, sizeof(*key));
165 if (shader_handle != VK_NULL_HANDLE)
166 goto out;
167
168 nir_shader *nir = get_preload_nir_shader(key);
169
170 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
171
172 struct panfrost_compile_inputs inputs = {
173 .gpu_id = phys_dev->kmod.props.gpu_prod_id,
174 .no_ubo_to_push = true,
175 .is_blit = true,
176 };
177
178 pan_shader_preprocess(nir, inputs.gpu_id);
179
180 VkResult result = panvk_per_arch(create_internal_shader)(
181 dev, nir, &inputs, &shader);
182 ralloc_free(nir);
183
184 if (result != VK_SUCCESS)
185 return result;
186
187 #if PAN_ARCH >= 9
188 shader->spd = panvk_pool_alloc_desc(&dev->mempools.rw, SHADER_PROGRAM);
189 if (!panvk_priv_mem_host_addr(shader->spd)) {
190 vk_shader_destroy(&dev->vk, &shader->vk, NULL);
191 return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
192 }
193
194 pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM,
195 cfg) {
196 cfg.stage = MALI_SHADER_STAGE_FRAGMENT;
197 cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
198 cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;
199 cfg.binary = panvk_priv_mem_dev_addr(shader->code_mem);
200 cfg.preload.r48_r63 = shader->info.preload >> 48;
201 }
202 #endif
203
204 shader_handle = (VkShaderEXT)vk_meta_cache_object(
205 &dev->vk, &dev->meta, key, sizeof(*key), VK_OBJECT_TYPE_SHADER_EXT,
206 (uint64_t)panvk_internal_shader_to_handle(shader));
207
208 out:
209 shader = panvk_internal_shader_from_handle(shader_handle);
210 *shader_out = shader;
211 return VK_SUCCESS;
212 }
213
214 static VkResult
alloc_pre_post_dcds(struct panvk_cmd_buffer * cmdbuf,struct pan_fb_info * fbinfo)215 alloc_pre_post_dcds(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo)
216 {
217 if (fbinfo->bifrost.pre_post.dcds.gpu)
218 return VK_SUCCESS;
219
220 uint32_t dcd_count =
221 3 * (PAN_ARCH <= 7 ? cmdbuf->state.gfx.render.layer_count : 1);
222
223 fbinfo->bifrost.pre_post.dcds = panvk_cmd_alloc_desc_array(cmdbuf, dcd_count, DRAW);
224 if (!fbinfo->bifrost.pre_post.dcds.cpu)
225 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
226
227 return VK_SUCCESS;
228 }
229
230 static enum mali_register_file_format
get_reg_fmt(nir_alu_type type)231 get_reg_fmt(nir_alu_type type)
232 {
233 switch (type) {
234 case nir_type_float32:
235 return MALI_REGISTER_FILE_FORMAT_F32;
236 case nir_type_uint32:
237 return MALI_REGISTER_FILE_FORMAT_U32;
238 case nir_type_int32:
239 return MALI_REGISTER_FILE_FORMAT_I32;
240 default:
241 assert(!"Invalid reg type");
242 return MALI_REGISTER_FILE_FORMAT_F32;
243 }
244 }
245
246 static void
fill_textures(struct panvk_cmd_buffer * cmdbuf,struct pan_fb_info * fbinfo,const struct panvk_fb_preload_shader_key * key,struct mali_texture_packed * textures)247 fill_textures(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
248 const struct panvk_fb_preload_shader_key *key,
249 struct mali_texture_packed *textures)
250 {
251 if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
252 for (unsigned i = 0; i < fbinfo->rt_count; i++) {
253 struct panvk_image_view *iview =
254 cmdbuf->state.gfx.render.color_attachments.iviews[i];
255
256 if (iview)
257 textures[i] = iview->descs.tex;
258 else
259 textures[i] = (struct mali_texture_packed){0};
260 }
261 return;
262 }
263
264 uint32_t idx = 0;
265 if (key->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
266 struct panvk_image_view *iview =
267 cmdbuf->state.gfx.render.z_attachment.iview
268 ?: cmdbuf->state.gfx.render.s_attachment.iview;
269
270 textures[idx++] = vk_format_has_depth(iview->vk.view_format)
271 ? iview->descs.tex
272 : iview->descs.other_aspect_tex;
273 }
274
275 if (key->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
276 struct panvk_image_view *iview =
277 cmdbuf->state.gfx.render.s_attachment.iview
278 ?: cmdbuf->state.gfx.render.z_attachment.iview;
279
280 textures[idx++] = vk_format_has_depth(iview->vk.view_format)
281 ? iview->descs.other_aspect_tex
282 : iview->descs.tex;
283 }
284 }
285
286 static void
fill_bds(struct pan_fb_info * fbinfo,const struct panvk_fb_preload_shader_key * key,struct mali_blend_packed * bds)287 fill_bds(struct pan_fb_info *fbinfo,
288 const struct panvk_fb_preload_shader_key *key,
289 struct mali_blend_packed *bds)
290 {
291 uint32_t bd_count = MAX2(fbinfo->rt_count, 1);
292
293 for (unsigned i = 0; i < bd_count; i++) {
294 const struct pan_image_view *pview =
295 fbinfo->rts[i].preload ? fbinfo->rts[i].view : NULL;
296
297 pan_pack(&bds[i], BLEND, cfg) {
298 if (key->aspects != VK_IMAGE_ASPECT_COLOR_BIT || !pview) {
299 cfg.enable = false;
300 cfg.internal.mode = MALI_BLEND_MODE_OFF;
301 continue;
302 }
303
304 cfg.round_to_fb_precision = true;
305 cfg.srgb = util_format_is_srgb(pview->format);
306 cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
307 cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
308 cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
309 cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
310 cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
311 cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
312 cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
313 cfg.equation.color_mask = 0xf;
314
315 cfg.internal.fixed_function.num_comps = 4;
316 cfg.internal.fixed_function.conversion.memory_format = GENX(
317 panfrost_dithered_format_from_pipe_format)(pview->format, false);
318 cfg.internal.fixed_function.rt = i;
319 #if PAN_ARCH <= 7
320 cfg.internal.fixed_function.conversion.register_format =
321 get_reg_fmt(key->color[i].type);
322 #endif
323 }
324 }
325 }
326
327 #if PAN_ARCH <= 7
328 static VkResult
cmd_emit_dcd(struct panvk_cmd_buffer * cmdbuf,struct pan_fb_info * fbinfo,const struct panvk_fb_preload_shader_key * key)329 cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
330 const struct panvk_fb_preload_shader_key *key)
331 {
332 struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
333 struct panvk_internal_shader *shader = NULL;
334
335 VkResult result = get_preload_shader(dev, key, &shader);
336 if (result != VK_SUCCESS)
337 return result;
338
339 uint32_t tex_count = key->aspects == VK_IMAGE_ASPECT_COLOR_BIT
340 ? fbinfo->rt_count
341 : util_bitcount(key->aspects);
342 uint32_t bd_count = MAX2(fbinfo->rt_count, 1);
343
344 struct panfrost_ptr rsd = panvk_cmd_alloc_desc_aggregate(
345 cmdbuf, PAN_DESC(RENDERER_STATE),
346 PAN_DESC_ARRAY(bd_count, BLEND));
347 if (!rsd.cpu)
348 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
349
350 pan_cast_and_pack(rsd.cpu, RENDERER_STATE, cfg) {
351 pan_shader_prepare_rsd(&shader->info,
352 panvk_priv_mem_dev_addr(shader->code_mem), &cfg);
353
354 cfg.shader.texture_count = tex_count;
355 cfg.shader.sampler_count = 1;
356
357 cfg.multisample_misc.sample_mask = 0xFFFF;
358 cfg.multisample_misc.multisample_enable = key->samples > 1;
359 cfg.multisample_misc.evaluate_per_sample = key->samples > 1;
360
361 cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
362 cfg.multisample_misc.depth_write_mask =
363 (key->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) != 0;
364
365 cfg.stencil_mask_misc.stencil_enable =
366 (key->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) != 0;
367 cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
368 cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
369 cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
370 cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
371 cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
372 cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
373 cfg.stencil_front.mask = 0xFF;
374
375 cfg.stencil_back = cfg.stencil_front;
376
377 if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
378 /* Skipping ATEST requires forcing Z/S */
379 cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
380 cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
381 } else {
382 /* Writing Z/S requires late updates */
383 cfg.properties.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
384 cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
385 }
386
387 /* However, while shaders writing Z/S can normally be killed, on v6
388 * for frame shaders it can cause GPU timeouts, so only allow colour
389 * blit shaders to be killed. */
390 cfg.properties.allow_forward_pixel_to_kill =
391 key->aspects == VK_IMAGE_ASPECT_COLOR_BIT;
392
393 if (PAN_ARCH == 6)
394 cfg.properties.allow_forward_pixel_to_be_killed =
395 key->aspects == VK_IMAGE_ASPECT_COLOR_BIT;
396 }
397
398 fill_bds(fbinfo, key, rsd.cpu + pan_size(RENDERER_STATE));
399
400 struct panvk_batch *batch = cmdbuf->cur_batch;
401 uint16_t minx = 0, miny = 0, maxx, maxy;
402
403 /* Align on 32x32 tiles */
404 minx = fbinfo->extent.minx & ~31;
405 miny = fbinfo->extent.miny & ~31;
406 maxx = MIN2(ALIGN_POT(fbinfo->extent.maxx + 1, 32), fbinfo->width) - 1;
407 maxy = MIN2(ALIGN_POT(fbinfo->extent.maxy + 1, 32), fbinfo->height) - 1;
408
409 struct panfrost_ptr vpd = panvk_cmd_alloc_desc(cmdbuf, VIEWPORT);
410 if (!vpd.cpu)
411 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
412
413 pan_cast_and_pack(vpd.cpu, VIEWPORT, cfg) {
414 cfg.scissor_minimum_x = minx;
415 cfg.scissor_minimum_y = miny;
416 cfg.scissor_maximum_x = maxx;
417 cfg.scissor_maximum_y = maxy;
418 }
419
420 struct panfrost_ptr sampler = panvk_cmd_alloc_desc(cmdbuf, SAMPLER);
421 if (!sampler.cpu)
422 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
423
424 pan_cast_and_pack(sampler.cpu, SAMPLER, cfg) {
425 cfg.seamless_cube_map = false;
426 cfg.normalized_coordinates = false;
427 cfg.clamp_integer_array_indices = false;
428 cfg.minify_nearest = true;
429 cfg.magnify_nearest = true;
430 }
431
432 struct panfrost_ptr textures =
433 panvk_cmd_alloc_desc_array(cmdbuf, tex_count, TEXTURE);
434 if (!textures.cpu)
435 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
436
437 fill_textures(cmdbuf, fbinfo, key, textures.cpu);
438
439 result = alloc_pre_post_dcds(cmdbuf, fbinfo);
440 if (result != VK_SUCCESS)
441 return result;
442
443 struct mali_draw_packed dcd_base;
444
445 pan_pack(&dcd_base, DRAW, cfg) {
446 cfg.thread_storage = batch->tls.gpu;
447 cfg.state = rsd.gpu;
448
449 cfg.viewport = vpd.gpu;
450
451 cfg.textures = textures.gpu;
452 cfg.samplers = sampler.gpu;
453
454 #if PAN_ARCH >= 6
455 /* Until we decide to support FB CRC, we can consider that untouched tiles
456 * should never be written back. */
457 cfg.clean_fragment_write = true;
458 #endif
459 }
460
461 struct mali_draw_packed *dcds = fbinfo->bifrost.pre_post.dcds.cpu;
462 uint32_t dcd_idx = key->aspects == VK_IMAGE_ASPECT_COLOR_BIT ? 0 : 1;
463
464 if (key->needs_layer_id) {
465 struct panfrost_ptr layer_ids = panvk_cmd_alloc_dev_mem(
466 cmdbuf, desc,
467 cmdbuf->state.gfx.render.layer_count * sizeof(uint64_t),
468 sizeof(uint64_t));
469 uint32_t *layer_id = layer_ids.cpu;
470
471 for (uint32_t l = 0; l < cmdbuf->state.gfx.render.layer_count; l++) {
472 struct mali_draw_packed dcd_layer;
473
474 /* Push uniform pointer has to be 8-byte aligned, so we have to skip
475 * odd layer_id entries. */
476 layer_id[2 * l] = l;
477 pan_pack(&dcd_layer, DRAW, cfg) {
478 cfg.push_uniforms = layer_ids.gpu + (sizeof(uint64_t) * l);
479 };
480
481 pan_merge(dcd_layer, dcd_base, DRAW);
482 dcds[(l * 3) + dcd_idx] = dcd_layer;
483 }
484 } else {
485 dcds[dcd_idx] = dcd_base;
486 }
487
488 if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
489 fbinfo->bifrost.pre_post.modes[dcd_idx] =
490 MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
491 } else {
492 const struct pan_image *plane =
493 fbinfo->zs.view.zs ? pan_image_view_get_zs_plane(fbinfo->zs.view.zs)
494 : pan_image_view_get_s_plane(fbinfo->zs.view.s);
495 enum pipe_format fmt = plane->layout.format;
496 bool always = false;
497
498 /* If we're dealing with a combined ZS resource and only one
499 * component is cleared, we need to reload the whole surface
500 * because the zs_clean_pixel_write_enable flag is set in that
501 * case.
502 */
503 if (util_format_is_depth_and_stencil(fmt) &&
504 fbinfo->zs.clear.z != fbinfo->zs.clear.s)
505 always = true;
506
507 /* We could use INTERSECT on Bifrost v7 too, but
508 * EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
509 * buffer one or more tiles ahead, making ZS data immediately
510 * available for any ZS tests taking place in other shaders.
511 * Thing's haven't been benchmarked to determine what's
512 * preferable (saving bandwidth vs having ZS preloaded
513 * earlier), so let's leave it like that for now.
514 */
515 fbinfo->bifrost.pre_post.modes[dcd_idx] =
516 PAN_ARCH > 6
517 ? MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS
518 : always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS
519 : MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
520 }
521
522 return VK_SUCCESS;
523 }
524 #else
525 static VkResult
cmd_emit_dcd(struct panvk_cmd_buffer * cmdbuf,struct pan_fb_info * fbinfo,struct panvk_fb_preload_shader_key * key)526 cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
527 struct panvk_fb_preload_shader_key *key)
528 {
529 struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
530 struct panvk_internal_shader *shader = NULL;
531
532 VkResult result = get_preload_shader(dev, key, &shader);
533 if (result != VK_SUCCESS)
534 return result;
535
536 uint32_t bd_count =
537 key->aspects == VK_IMAGE_ASPECT_COLOR_BIT ? fbinfo->rt_count : 0;
538 struct panfrost_ptr bds =
539 panvk_cmd_alloc_desc_array(cmdbuf, bd_count, BLEND);
540 if (bd_count > 0 && !bds.cpu)
541 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
542
543 uint32_t tex_count = key->aspects == VK_IMAGE_ASPECT_COLOR_BIT
544 ? fbinfo->rt_count
545 : util_bitcount(key->aspects);
546 uint32_t desc_count = tex_count + 1;
547
548 struct panfrost_ptr descs = panvk_cmd_alloc_dev_mem(
549 cmdbuf, desc, desc_count * PANVK_DESCRIPTOR_SIZE, PANVK_DESCRIPTOR_SIZE);
550 if (!descs.cpu)
551 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
552
553 struct mali_sampler_packed *sampler = descs.cpu;
554
555 pan_pack(sampler, SAMPLER, cfg) {
556 cfg.seamless_cube_map = false;
557 cfg.normalized_coordinates = false;
558 cfg.clamp_integer_array_indices = false;
559 cfg.minify_nearest = true;
560 cfg.magnify_nearest = true;
561 }
562
563 fill_textures(cmdbuf, fbinfo, key, descs.cpu + PANVK_DESCRIPTOR_SIZE);
564
565 if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT)
566 fill_bds(fbinfo, key, bds.cpu);
567
568 struct panfrost_ptr res_table = panvk_cmd_alloc_desc(cmdbuf, RESOURCE);
569 if (!res_table.cpu)
570 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
571
572 pan_cast_and_pack(res_table.cpu, RESOURCE, cfg) {
573 cfg.address = descs.gpu;
574 cfg.size = desc_count * PANVK_DESCRIPTOR_SIZE;
575 }
576
577 struct panfrost_ptr zsd = panvk_cmd_alloc_desc(cmdbuf, DEPTH_STENCIL);
578 if (!zsd.cpu)
579 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
580
581 bool preload_z =
582 key->aspects != VK_IMAGE_ASPECT_COLOR_BIT && fbinfo->zs.preload.z;
583 bool preload_s =
584 key->aspects != VK_IMAGE_ASPECT_COLOR_BIT && fbinfo->zs.preload.s;
585
586 pan_cast_and_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
587 cfg.depth_function = MALI_FUNC_ALWAYS;
588 cfg.depth_write_enable = preload_z;
589
590 if (preload_z)
591 cfg.depth_source = MALI_DEPTH_SOURCE_SHADER;
592
593 cfg.stencil_test_enable = preload_s;
594 cfg.stencil_from_shader = preload_s;
595
596 cfg.front_compare_function = MALI_FUNC_ALWAYS;
597 cfg.front_stencil_fail = MALI_STENCIL_OP_REPLACE;
598 cfg.front_depth_fail = MALI_STENCIL_OP_REPLACE;
599 cfg.front_depth_pass = MALI_STENCIL_OP_REPLACE;
600 cfg.front_write_mask = 0xFF;
601 cfg.front_value_mask = 0xFF;
602
603 cfg.back_compare_function = MALI_FUNC_ALWAYS;
604 cfg.back_stencil_fail = MALI_STENCIL_OP_REPLACE;
605 cfg.back_depth_fail = MALI_STENCIL_OP_REPLACE;
606 cfg.back_depth_pass = MALI_STENCIL_OP_REPLACE;
607 cfg.back_write_mask = 0xFF;
608 cfg.back_value_mask = 0xFF;
609
610 cfg.depth_cull_enable = false;
611 }
612
613 result = alloc_pre_post_dcds(cmdbuf, fbinfo);
614 if (result != VK_SUCCESS)
615 return result;
616
617 struct mali_draw_packed *dcds = fbinfo->bifrost.pre_post.dcds.cpu;
618 uint32_t dcd_idx = key->aspects == VK_IMAGE_ASPECT_COLOR_BIT ? 0 : 1;
619
620 pan_pack(&dcds[dcd_idx], DRAW, cfg) {
621 if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
622 /* Skipping ATEST requires forcing Z/S */
623 cfg.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
624 cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
625
626 cfg.blend = bds.gpu;
627 cfg.blend_count = bd_count;
628 cfg.render_target_mask = cmdbuf->state.gfx.render.bound_attachments &
629 MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS;
630 } else {
631 /* ZS_EMIT requires late update/kill */
632 cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
633 cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
634 cfg.blend_count = 0;
635 }
636
637 cfg.allow_forward_pixel_to_kill =
638 key->aspects == VK_IMAGE_ASPECT_COLOR_BIT;
639 cfg.allow_forward_pixel_to_be_killed = true;
640 cfg.depth_stencil = zsd.gpu;
641 cfg.sample_mask = 0xFFFF;
642 cfg.multisample_enable = key->samples > 1;
643 cfg.evaluate_per_sample = key->samples > 1;
644 cfg.maximum_z = 1.0;
645 cfg.clean_fragment_write = true;
646 cfg.shader.resources = res_table.gpu | 1;
647 cfg.shader.shader = panvk_priv_mem_dev_addr(shader->spd);
648 cfg.shader.thread_storage = cmdbuf->state.gfx.tsd;
649 }
650
651 if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
652 fbinfo->bifrost.pre_post.modes[dcd_idx] =
653 MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
654 } else {
655 /* We could use INTERSECT on Valhall too, but
656 * EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
657 * buffer one or more tiles ahead, making ZS data immediately
658 * available for any ZS tests taking place in other shaders.
659 * Thing's haven't been benchmarked to determine what's
660 * preferable (saving bandwidth vs having ZS preloaded
661 * earlier), so let's leave it like that for now.
662 */
663 fbinfo->bifrost.pre_post.modes[dcd_idx] =
664 MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS;
665 }
666
667 return VK_SUCCESS;
668 }
669 #endif
670
671 static VkResult
cmd_preload_zs_attachments(struct panvk_cmd_buffer * cmdbuf,struct pan_fb_info * fbinfo)672 cmd_preload_zs_attachments(struct panvk_cmd_buffer *cmdbuf,
673 struct pan_fb_info *fbinfo)
674 {
675 if (!fbinfo->zs.preload.s && !fbinfo->zs.preload.z)
676 return VK_SUCCESS;
677
678 struct panvk_fb_preload_shader_key key = {
679 .type = PANVK_META_OBJECT_KEY_FB_PRELOAD_SHADER,
680 .samples = fbinfo->nr_samples,
681 .needs_layer_id = cmdbuf->state.gfx.render.layer_count > 1,
682 };
683
684 if (fbinfo->zs.preload.z) {
685 key.aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
686 key.view_type =
687 cmdbuf->state.gfx.render.z_attachment.iview
688 ? cmdbuf->state.gfx.render.z_attachment.iview->vk.view_type
689 : cmdbuf->state.gfx.render.s_attachment.iview->vk.view_type;
690 }
691
692 if (fbinfo->zs.preload.s) {
693 VkImageViewType view_type =
694 cmdbuf->state.gfx.render.s_attachment.iview
695 ? cmdbuf->state.gfx.render.s_attachment.iview->vk.view_type
696 : cmdbuf->state.gfx.render.z_attachment.iview->vk.view_type;
697
698 key.aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
699 if (!fbinfo->zs.preload.z)
700 key.view_type = view_type;
701
702 assert(key.view_type == view_type);
703 }
704
705 return cmd_emit_dcd(cmdbuf, fbinfo, &key);
706 }
707
708 static VkResult
cmd_preload_color_attachments(struct panvk_cmd_buffer * cmdbuf,struct pan_fb_info * fbinfo)709 cmd_preload_color_attachments(struct panvk_cmd_buffer *cmdbuf,
710 struct pan_fb_info *fbinfo)
711 {
712 struct panvk_fb_preload_shader_key key = {
713 .type = PANVK_META_OBJECT_KEY_FB_PRELOAD_SHADER,
714 .samples = fbinfo->nr_samples,
715 .needs_layer_id = cmdbuf->state.gfx.render.layer_count > 1,
716 .aspects = VK_IMAGE_ASPECT_COLOR_BIT,
717 };
718 bool needs_preload = false;
719
720 for (uint32_t i = 0; i < fbinfo->rt_count; i++) {
721 if (!fbinfo->rts[i].preload)
722 continue;
723
724 enum pipe_format pfmt = fbinfo->rts[i].view->format;
725 struct panvk_image_view *iview =
726 cmdbuf->state.gfx.render.color_attachments.iviews[i];
727
728 key.color[i].type = util_format_is_pure_uint(pfmt) ? nir_type_uint32
729 : util_format_is_pure_sint(pfmt) ? nir_type_int32
730 : nir_type_float32;
731
732 if (!needs_preload) {
733 key.view_type = iview->vk.view_type;
734 needs_preload = true;
735 }
736
737 assert(key.view_type == iview->vk.view_type);
738 }
739
740 if (!needs_preload)
741 return VK_SUCCESS;
742
743 return cmd_emit_dcd(cmdbuf, fbinfo, &key);
744 }
745
746 VkResult
panvk_per_arch(cmd_fb_preload)747 panvk_per_arch(cmd_fb_preload)(struct panvk_cmd_buffer *cmdbuf,
748 struct pan_fb_info *fbinfo)
749 {
750 VkResult result = cmd_preload_color_attachments(cmdbuf, fbinfo);
751 if (result != VK_SUCCESS)
752 return result;
753
754 return cmd_preload_zs_attachments(cmdbuf, fbinfo);
755 }
756