• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Collabora Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "panvk_cmd_alloc.h"
7 #include "panvk_cmd_fb_preload.h"
8 #include "panvk_image_view.h"
9 #include "panvk_meta.h"
10 #include "panvk_shader.h"
11 
12 #include "nir_builder.h"
13 
14 #include "pan_shader.h"
15 
16 struct panvk_fb_preload_shader_key {
17    enum panvk_meta_object_key_type type;
18    VkImageViewType view_type;
19    VkSampleCountFlagBits samples;
20    VkImageAspectFlags aspects;
21    bool needs_layer_id;
22    struct {
23       nir_alu_type type;
24    } color[8];
25 };
26 
27 static nir_def *
texel_fetch(nir_builder * b,VkImageViewType view_type,nir_alu_type reg_type,unsigned tex_idx,nir_def * sample_id,nir_def * coords)28 texel_fetch(nir_builder *b, VkImageViewType view_type,
29             nir_alu_type reg_type, unsigned tex_idx,
30             nir_def *sample_id, nir_def *coords)
31 {
32    nir_tex_instr *tex = nir_tex_instr_create(b->shader, sample_id ? 3 : 2);
33 
34    tex->op = sample_id ? nir_texop_txf_ms : nir_texop_txf;
35    tex->dest_type = reg_type;
36    tex->is_array = vk_image_view_type_is_array(view_type);
37    tex->sampler_dim = sample_id ? GLSL_SAMPLER_DIM_MS
38                                 : vk_image_view_type_to_sampler_dim(view_type);
39    tex->coord_components = coords->num_components;
40    tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coords);
41    tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(b, 0));
42 
43    if (sample_id)
44       tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_ms_index, sample_id);
45 
46 #if PAN_ARCH <= 7
47    tex->sampler_index = 0;
48    tex->texture_index = tex_idx;
49 #else
50    tex->sampler_index = pan_res_handle(0, 0);
51    tex->texture_index = pan_res_handle(0, tex_idx + 1);
52 #endif
53 
54    nir_def_init(&tex->instr, &tex->def, 4, 32);
55    nir_builder_instr_insert(b, &tex->instr);
56 
57    return &tex->def;
58 }
59 
60 static nir_variable *
color_output_var(nir_builder * b,VkImageViewType view_type,VkImageAspectFlags aspect,VkSampleCountFlagBits samples,nir_alu_type fmt_type,unsigned rt)61 color_output_var(nir_builder *b, VkImageViewType view_type,
62                  VkImageAspectFlags aspect, VkSampleCountFlagBits samples,
63                  nir_alu_type fmt_type, unsigned rt)
64 {
65    enum glsl_base_type base_type =
66       nir_get_glsl_base_type_for_nir_type(fmt_type);
67    const struct glsl_type *var_type = glsl_vector_type(base_type, 4);
68    static const char *var_names[] = {
69       "gl_FragData[0]", "gl_FragData[1]", "gl_FragData[2]", "gl_FragData[3]",
70       "gl_FragData[4]", "gl_FragData[5]", "gl_FragData[6]", "gl_FragData[7]",
71    };
72 
73    assert(rt < ARRAY_SIZE(var_names));
74 
75    nir_variable *var = nir_variable_create(b->shader, nir_var_shader_out,
76                                            var_type, var_names[rt]);
77    var->data.location = FRAG_RESULT_DATA0 + rt;
78 
79    return var;
80 }
81 
82 static nir_def *
get_layer_id(nir_builder * b)83 get_layer_id(nir_builder *b)
84 {
85 #if PAN_ARCH <= 7
86    return nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0));
87 #else
88    return nir_load_layer_id(b);
89 #endif
90 }
91 
92 static nir_shader *
get_preload_nir_shader(const struct panvk_fb_preload_shader_key * key)93 get_preload_nir_shader(const struct panvk_fb_preload_shader_key *key)
94 {
95    nir_builder builder = nir_builder_init_simple_shader(
96       MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(),
97       "panvk-meta-preload");
98    nir_builder *b = &builder;
99    nir_def *sample_id =
100       key->samples != VK_SAMPLE_COUNT_1_BIT ? nir_load_sample_id(b) : NULL;
101    nir_def *coords = nir_u2u32(b, nir_load_pixel_coord(b));
102 
103    if (key->view_type == VK_IMAGE_VIEW_TYPE_2D_ARRAY ||
104        key->view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY ||
105        key->view_type == VK_IMAGE_VIEW_TYPE_CUBE ||
106        key->view_type == VK_IMAGE_VIEW_TYPE_3D) {
107       coords =
108          nir_vec3(b, nir_channel(b, coords, 0), nir_channel(b, coords, 1),
109                   key->needs_layer_id ? get_layer_id(b) : nir_imm_int(b, 0));
110    }
111 
112    if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
113       for (uint32_t i = 0; i < ARRAY_SIZE(key->color); i++) {
114          if (key->color[i].type == nir_type_invalid)
115             continue;
116 
117          nir_def *texel = texel_fetch(b, key->view_type, key->color[i].type, i,
118                                       sample_id, coords);
119 
120          nir_store_output(
121             b, texel, nir_imm_int(b, 0), .base = i,
122             .src_type = key->color[i].type,
123             .io_semantics.location = FRAG_RESULT_DATA0 + i,
124             .io_semantics.num_slots = 1,
125             .write_mask = nir_component_mask(texel->num_components));
126       }
127    }
128 
129    if (key->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
130       nir_def *texel = texel_fetch(b, key->view_type, nir_type_float32, 0,
131                                    sample_id, coords);
132 
133       nir_store_output(b, nir_channel(b, texel, 0), nir_imm_int(b, 0),
134                        .base = 0, .src_type = nir_type_float32,
135                        .io_semantics.location = FRAG_RESULT_DEPTH,
136                        .io_semantics.num_slots = 1,
137                        .write_mask = nir_component_mask(1));
138    }
139 
140    if (key->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
141       nir_def *texel = texel_fetch(
142          b, key->view_type, nir_type_uint32,
143          key->aspects & VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 0, sample_id, coords);
144 
145       nir_store_output(b, nir_channel(b, texel, 0), nir_imm_int(b, 0),
146                        .base = 0, .src_type = nir_type_uint32,
147                        .io_semantics.location = FRAG_RESULT_STENCIL,
148                        .io_semantics.num_slots = 1,
149                        .write_mask = nir_component_mask(1));
150    }
151 
152    return b->shader;
153 }
154 
155 static VkResult
get_preload_shader(struct panvk_device * dev,const struct panvk_fb_preload_shader_key * key,struct panvk_internal_shader ** shader_out)156 get_preload_shader(struct panvk_device *dev,
157                    const struct panvk_fb_preload_shader_key *key,
158                    struct panvk_internal_shader **shader_out)
159 {
160    struct panvk_physical_device *phys_dev =
161       to_panvk_physical_device(dev->vk.physical);
162    struct panvk_internal_shader *shader;
163    VkShaderEXT shader_handle = (VkShaderEXT)vk_meta_lookup_object(
164       &dev->meta, VK_OBJECT_TYPE_SHADER_EXT, key, sizeof(*key));
165    if (shader_handle != VK_NULL_HANDLE)
166       goto out;
167 
168    nir_shader *nir = get_preload_nir_shader(key);
169 
170    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
171 
172    struct panfrost_compile_inputs inputs = {
173       .gpu_id = phys_dev->kmod.props.gpu_prod_id,
174       .no_ubo_to_push = true,
175       .is_blit = true,
176    };
177 
178    pan_shader_preprocess(nir, inputs.gpu_id);
179 
180    VkResult result = panvk_per_arch(create_internal_shader)(
181       dev, nir, &inputs, &shader);
182    ralloc_free(nir);
183 
184    if (result != VK_SUCCESS)
185       return result;
186 
187 #if PAN_ARCH >= 9
188    shader->spd = panvk_pool_alloc_desc(&dev->mempools.rw, SHADER_PROGRAM);
189    if (!panvk_priv_mem_host_addr(shader->spd)) {
190       vk_shader_destroy(&dev->vk, &shader->vk, NULL);
191       return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
192    }
193 
194    pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM,
195                      cfg) {
196       cfg.stage = MALI_SHADER_STAGE_FRAGMENT;
197       cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
198       cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;
199       cfg.binary = panvk_priv_mem_dev_addr(shader->code_mem);
200       cfg.preload.r48_r63 = shader->info.preload >> 48;
201    }
202 #endif
203 
204    shader_handle = (VkShaderEXT)vk_meta_cache_object(
205       &dev->vk, &dev->meta, key, sizeof(*key), VK_OBJECT_TYPE_SHADER_EXT,
206       (uint64_t)panvk_internal_shader_to_handle(shader));
207 
208 out:
209    shader = panvk_internal_shader_from_handle(shader_handle);
210    *shader_out = shader;
211    return VK_SUCCESS;
212 }
213 
214 static VkResult
alloc_pre_post_dcds(struct panvk_cmd_buffer * cmdbuf,struct pan_fb_info * fbinfo)215 alloc_pre_post_dcds(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo)
216 {
217    if (fbinfo->bifrost.pre_post.dcds.gpu)
218       return VK_SUCCESS;
219 
220    uint32_t dcd_count =
221       3 * (PAN_ARCH <= 7 ? cmdbuf->state.gfx.render.layer_count : 1);
222 
223    fbinfo->bifrost.pre_post.dcds = panvk_cmd_alloc_desc_array(cmdbuf, dcd_count, DRAW);
224    if (!fbinfo->bifrost.pre_post.dcds.cpu)
225       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
226 
227    return VK_SUCCESS;
228 }
229 
230 static enum mali_register_file_format
get_reg_fmt(nir_alu_type type)231 get_reg_fmt(nir_alu_type type)
232 {
233    switch (type) {
234    case nir_type_float32:
235       return MALI_REGISTER_FILE_FORMAT_F32;
236    case nir_type_uint32:
237       return MALI_REGISTER_FILE_FORMAT_U32;
238    case nir_type_int32:
239       return MALI_REGISTER_FILE_FORMAT_I32;
240    default:
241       assert(!"Invalid reg type");
242       return MALI_REGISTER_FILE_FORMAT_F32;
243    }
244 }
245 
246 static void
fill_textures(struct panvk_cmd_buffer * cmdbuf,struct pan_fb_info * fbinfo,const struct panvk_fb_preload_shader_key * key,struct mali_texture_packed * textures)247 fill_textures(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
248               const struct panvk_fb_preload_shader_key *key,
249               struct mali_texture_packed *textures)
250 {
251    if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
252       for (unsigned i = 0; i < fbinfo->rt_count; i++) {
253          struct panvk_image_view *iview =
254             cmdbuf->state.gfx.render.color_attachments.iviews[i];
255 
256          if (iview)
257             textures[i] = iview->descs.tex;
258          else
259             textures[i] = (struct mali_texture_packed){0};
260       }
261       return;
262    }
263 
264    uint32_t idx = 0;
265    if (key->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
266       struct panvk_image_view *iview =
267          cmdbuf->state.gfx.render.z_attachment.iview
268             ?: cmdbuf->state.gfx.render.s_attachment.iview;
269 
270       textures[idx++] = vk_format_has_depth(iview->vk.view_format)
271                            ? iview->descs.tex
272                            : iview->descs.other_aspect_tex;
273    }
274 
275    if (key->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
276       struct panvk_image_view *iview =
277          cmdbuf->state.gfx.render.s_attachment.iview
278             ?: cmdbuf->state.gfx.render.z_attachment.iview;
279 
280       textures[idx++] = vk_format_has_depth(iview->vk.view_format)
281                            ? iview->descs.other_aspect_tex
282                            : iview->descs.tex;
283    }
284 }
285 
286 static void
fill_bds(struct pan_fb_info * fbinfo,const struct panvk_fb_preload_shader_key * key,struct mali_blend_packed * bds)287 fill_bds(struct pan_fb_info *fbinfo,
288          const struct panvk_fb_preload_shader_key *key,
289          struct mali_blend_packed *bds)
290 {
291    uint32_t bd_count = MAX2(fbinfo->rt_count, 1);
292 
293    for (unsigned i = 0; i < bd_count; i++) {
294       const struct pan_image_view *pview =
295          fbinfo->rts[i].preload ? fbinfo->rts[i].view : NULL;
296 
297       pan_pack(&bds[i], BLEND, cfg) {
298          if (key->aspects != VK_IMAGE_ASPECT_COLOR_BIT || !pview) {
299             cfg.enable = false;
300             cfg.internal.mode = MALI_BLEND_MODE_OFF;
301             continue;
302          }
303 
304          cfg.round_to_fb_precision = true;
305          cfg.srgb = util_format_is_srgb(pview->format);
306          cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
307          cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
308          cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
309          cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
310          cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
311          cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
312          cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
313          cfg.equation.color_mask = 0xf;
314 
315          cfg.internal.fixed_function.num_comps = 4;
316          cfg.internal.fixed_function.conversion.memory_format = GENX(
317             panfrost_dithered_format_from_pipe_format)(pview->format, false);
318          cfg.internal.fixed_function.rt = i;
319 #if PAN_ARCH <= 7
320          cfg.internal.fixed_function.conversion.register_format =
321             get_reg_fmt(key->color[i].type);
322 #endif
323       }
324    }
325 }
326 
327 #if PAN_ARCH <= 7
328 static VkResult
cmd_emit_dcd(struct panvk_cmd_buffer * cmdbuf,struct pan_fb_info * fbinfo,const struct panvk_fb_preload_shader_key * key)329 cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
330              const struct panvk_fb_preload_shader_key *key)
331 {
332    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
333    struct panvk_internal_shader *shader = NULL;
334 
335    VkResult result = get_preload_shader(dev, key, &shader);
336    if (result != VK_SUCCESS)
337       return result;
338 
339    uint32_t tex_count = key->aspects == VK_IMAGE_ASPECT_COLOR_BIT
340                            ? fbinfo->rt_count
341                            : util_bitcount(key->aspects);
342    uint32_t bd_count = MAX2(fbinfo->rt_count, 1);
343 
344    struct panfrost_ptr rsd = panvk_cmd_alloc_desc_aggregate(
345       cmdbuf, PAN_DESC(RENDERER_STATE),
346       PAN_DESC_ARRAY(bd_count, BLEND));
347    if (!rsd.cpu)
348       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
349 
350    pan_cast_and_pack(rsd.cpu, RENDERER_STATE, cfg) {
351       pan_shader_prepare_rsd(&shader->info,
352                              panvk_priv_mem_dev_addr(shader->code_mem), &cfg);
353 
354       cfg.shader.texture_count = tex_count;
355       cfg.shader.sampler_count = 1;
356 
357       cfg.multisample_misc.sample_mask = 0xFFFF;
358       cfg.multisample_misc.multisample_enable = key->samples > 1;
359       cfg.multisample_misc.evaluate_per_sample = key->samples > 1;
360 
361       cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
362       cfg.multisample_misc.depth_write_mask =
363          (key->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) != 0;
364 
365       cfg.stencil_mask_misc.stencil_enable =
366          (key->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) != 0;
367       cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
368       cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
369       cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
370       cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
371       cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
372       cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
373       cfg.stencil_front.mask = 0xFF;
374 
375       cfg.stencil_back = cfg.stencil_front;
376 
377       if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
378          /* Skipping ATEST requires forcing Z/S */
379          cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
380          cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
381       } else {
382          /* Writing Z/S requires late updates */
383          cfg.properties.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
384          cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
385       }
386 
387       /* However, while shaders writing Z/S can normally be killed, on v6
388        * for frame shaders it can cause GPU timeouts, so only allow colour
389        * blit shaders to be killed. */
390       cfg.properties.allow_forward_pixel_to_kill =
391          key->aspects == VK_IMAGE_ASPECT_COLOR_BIT;
392 
393       if (PAN_ARCH == 6)
394          cfg.properties.allow_forward_pixel_to_be_killed =
395             key->aspects == VK_IMAGE_ASPECT_COLOR_BIT;
396    }
397 
398    fill_bds(fbinfo, key, rsd.cpu + pan_size(RENDERER_STATE));
399 
400    struct panvk_batch *batch = cmdbuf->cur_batch;
401    uint16_t minx = 0, miny = 0, maxx, maxy;
402 
403    /* Align on 32x32 tiles */
404    minx = fbinfo->extent.minx & ~31;
405    miny = fbinfo->extent.miny & ~31;
406    maxx = MIN2(ALIGN_POT(fbinfo->extent.maxx + 1, 32), fbinfo->width) - 1;
407    maxy = MIN2(ALIGN_POT(fbinfo->extent.maxy + 1, 32), fbinfo->height) - 1;
408 
409    struct panfrost_ptr vpd = panvk_cmd_alloc_desc(cmdbuf, VIEWPORT);
410    if (!vpd.cpu)
411       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
412 
413    pan_cast_and_pack(vpd.cpu, VIEWPORT, cfg) {
414       cfg.scissor_minimum_x = minx;
415       cfg.scissor_minimum_y = miny;
416       cfg.scissor_maximum_x = maxx;
417       cfg.scissor_maximum_y = maxy;
418    }
419 
420    struct panfrost_ptr sampler = panvk_cmd_alloc_desc(cmdbuf, SAMPLER);
421    if (!sampler.cpu)
422       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
423 
424    pan_cast_and_pack(sampler.cpu, SAMPLER, cfg) {
425       cfg.seamless_cube_map = false;
426       cfg.normalized_coordinates = false;
427       cfg.clamp_integer_array_indices = false;
428       cfg.minify_nearest = true;
429       cfg.magnify_nearest = true;
430    }
431 
432    struct panfrost_ptr textures =
433       panvk_cmd_alloc_desc_array(cmdbuf, tex_count, TEXTURE);
434    if (!textures.cpu)
435       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
436 
437    fill_textures(cmdbuf, fbinfo, key, textures.cpu);
438 
439    result = alloc_pre_post_dcds(cmdbuf, fbinfo);
440    if (result != VK_SUCCESS)
441       return result;
442 
443    struct mali_draw_packed dcd_base;
444 
445    pan_pack(&dcd_base, DRAW, cfg) {
446       cfg.thread_storage = batch->tls.gpu;
447       cfg.state = rsd.gpu;
448 
449       cfg.viewport = vpd.gpu;
450 
451       cfg.textures = textures.gpu;
452       cfg.samplers = sampler.gpu;
453 
454 #if PAN_ARCH >= 6
455       /* Until we decide to support FB CRC, we can consider that untouched tiles
456        * should never be written back. */
457       cfg.clean_fragment_write = true;
458 #endif
459    }
460 
461    struct mali_draw_packed *dcds = fbinfo->bifrost.pre_post.dcds.cpu;
462    uint32_t dcd_idx = key->aspects == VK_IMAGE_ASPECT_COLOR_BIT ? 0 : 1;
463 
464    if (key->needs_layer_id) {
465       struct panfrost_ptr layer_ids = panvk_cmd_alloc_dev_mem(
466          cmdbuf, desc,
467          cmdbuf->state.gfx.render.layer_count * sizeof(uint64_t),
468          sizeof(uint64_t));
469       uint32_t *layer_id = layer_ids.cpu;
470 
471       for (uint32_t l = 0; l < cmdbuf->state.gfx.render.layer_count; l++) {
472          struct mali_draw_packed dcd_layer;
473 
474          /* Push uniform pointer has to be 8-byte aligned, so we have to skip
475           * odd layer_id entries. */
476          layer_id[2 * l] = l;
477          pan_pack(&dcd_layer, DRAW, cfg) {
478             cfg.push_uniforms = layer_ids.gpu + (sizeof(uint64_t) * l);
479          };
480 
481          pan_merge(dcd_layer, dcd_base, DRAW);
482 	 dcds[(l * 3) + dcd_idx] = dcd_layer;
483       }
484    } else {
485       dcds[dcd_idx] = dcd_base;
486    }
487 
488    if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
489       fbinfo->bifrost.pre_post.modes[dcd_idx] =
490          MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
491    } else {
492       const struct pan_image *plane =
493          fbinfo->zs.view.zs ? pan_image_view_get_zs_plane(fbinfo->zs.view.zs)
494                             : pan_image_view_get_s_plane(fbinfo->zs.view.s);
495       enum pipe_format fmt = plane->layout.format;
496       bool always = false;
497 
498       /* If we're dealing with a combined ZS resource and only one
499        * component is cleared, we need to reload the whole surface
500        * because the zs_clean_pixel_write_enable flag is set in that
501        * case.
502        */
503       if (util_format_is_depth_and_stencil(fmt) &&
504           fbinfo->zs.clear.z != fbinfo->zs.clear.s)
505          always = true;
506 
507       /* We could use INTERSECT on Bifrost v7 too, but
508        * EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
509        * buffer one or more tiles ahead, making ZS data immediately
510        * available for any ZS tests taking place in other shaders.
511        * Thing's haven't been benchmarked to determine what's
512        * preferable (saving bandwidth vs having ZS preloaded
513        * earlier), so let's leave it like that for now.
514        */
515       fbinfo->bifrost.pre_post.modes[dcd_idx] =
516          PAN_ARCH > 6
517             ? MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS
518          : always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS
519                   : MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
520    }
521 
522    return VK_SUCCESS;
523 }
524 #else
525 static VkResult
cmd_emit_dcd(struct panvk_cmd_buffer * cmdbuf,struct pan_fb_info * fbinfo,struct panvk_fb_preload_shader_key * key)526 cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
527              struct panvk_fb_preload_shader_key *key)
528 {
529    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
530    struct panvk_internal_shader *shader = NULL;
531 
532    VkResult result = get_preload_shader(dev, key, &shader);
533    if (result != VK_SUCCESS)
534       return result;
535 
536    uint32_t bd_count =
537       key->aspects == VK_IMAGE_ASPECT_COLOR_BIT ? fbinfo->rt_count : 0;
538    struct panfrost_ptr bds =
539       panvk_cmd_alloc_desc_array(cmdbuf, bd_count, BLEND);
540    if (bd_count > 0 && !bds.cpu)
541       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
542 
543    uint32_t tex_count = key->aspects == VK_IMAGE_ASPECT_COLOR_BIT
544                            ? fbinfo->rt_count
545                            : util_bitcount(key->aspects);
546    uint32_t desc_count = tex_count + 1;
547 
548    struct panfrost_ptr descs = panvk_cmd_alloc_dev_mem(
549       cmdbuf, desc, desc_count * PANVK_DESCRIPTOR_SIZE, PANVK_DESCRIPTOR_SIZE);
550    if (!descs.cpu)
551       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
552 
553    struct mali_sampler_packed *sampler = descs.cpu;
554 
555    pan_pack(sampler, SAMPLER, cfg) {
556       cfg.seamless_cube_map = false;
557       cfg.normalized_coordinates = false;
558       cfg.clamp_integer_array_indices = false;
559       cfg.minify_nearest = true;
560       cfg.magnify_nearest = true;
561    }
562 
563    fill_textures(cmdbuf, fbinfo, key, descs.cpu + PANVK_DESCRIPTOR_SIZE);
564 
565    if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT)
566       fill_bds(fbinfo, key, bds.cpu);
567 
568    struct panfrost_ptr res_table = panvk_cmd_alloc_desc(cmdbuf, RESOURCE);
569    if (!res_table.cpu)
570       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
571 
572    pan_cast_and_pack(res_table.cpu, RESOURCE, cfg) {
573       cfg.address = descs.gpu;
574       cfg.size = desc_count * PANVK_DESCRIPTOR_SIZE;
575    }
576 
577    struct panfrost_ptr zsd = panvk_cmd_alloc_desc(cmdbuf, DEPTH_STENCIL);
578    if (!zsd.cpu)
579       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
580 
581    bool preload_z =
582       key->aspects != VK_IMAGE_ASPECT_COLOR_BIT && fbinfo->zs.preload.z;
583    bool preload_s =
584       key->aspects != VK_IMAGE_ASPECT_COLOR_BIT && fbinfo->zs.preload.s;
585 
586    pan_cast_and_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
587       cfg.depth_function = MALI_FUNC_ALWAYS;
588       cfg.depth_write_enable = preload_z;
589 
590       if (preload_z)
591          cfg.depth_source = MALI_DEPTH_SOURCE_SHADER;
592 
593       cfg.stencil_test_enable = preload_s;
594       cfg.stencil_from_shader = preload_s;
595 
596       cfg.front_compare_function = MALI_FUNC_ALWAYS;
597       cfg.front_stencil_fail = MALI_STENCIL_OP_REPLACE;
598       cfg.front_depth_fail = MALI_STENCIL_OP_REPLACE;
599       cfg.front_depth_pass = MALI_STENCIL_OP_REPLACE;
600       cfg.front_write_mask = 0xFF;
601       cfg.front_value_mask = 0xFF;
602 
603       cfg.back_compare_function = MALI_FUNC_ALWAYS;
604       cfg.back_stencil_fail = MALI_STENCIL_OP_REPLACE;
605       cfg.back_depth_fail = MALI_STENCIL_OP_REPLACE;
606       cfg.back_depth_pass = MALI_STENCIL_OP_REPLACE;
607       cfg.back_write_mask = 0xFF;
608       cfg.back_value_mask = 0xFF;
609 
610       cfg.depth_cull_enable = false;
611    }
612 
613    result = alloc_pre_post_dcds(cmdbuf, fbinfo);
614    if (result != VK_SUCCESS)
615       return result;
616 
617    struct mali_draw_packed *dcds = fbinfo->bifrost.pre_post.dcds.cpu;
618    uint32_t dcd_idx = key->aspects == VK_IMAGE_ASPECT_COLOR_BIT ? 0 : 1;
619 
620    pan_pack(&dcds[dcd_idx], DRAW, cfg) {
621       if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
622          /* Skipping ATEST requires forcing Z/S */
623          cfg.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
624          cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
625 
626          cfg.blend = bds.gpu;
627          cfg.blend_count = bd_count;
628          cfg.render_target_mask = cmdbuf->state.gfx.render.bound_attachments &
629                                   MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS;
630       } else {
631          /* ZS_EMIT requires late update/kill */
632          cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
633          cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
634          cfg.blend_count = 0;
635       }
636 
637       cfg.allow_forward_pixel_to_kill =
638          key->aspects == VK_IMAGE_ASPECT_COLOR_BIT;
639       cfg.allow_forward_pixel_to_be_killed = true;
640       cfg.depth_stencil = zsd.gpu;
641       cfg.sample_mask = 0xFFFF;
642       cfg.multisample_enable = key->samples > 1;
643       cfg.evaluate_per_sample = key->samples > 1;
644       cfg.maximum_z = 1.0;
645       cfg.clean_fragment_write = true;
646       cfg.shader.resources = res_table.gpu | 1;
647       cfg.shader.shader = panvk_priv_mem_dev_addr(shader->spd);
648       cfg.shader.thread_storage = cmdbuf->state.gfx.tsd;
649    }
650 
651    if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
652       fbinfo->bifrost.pre_post.modes[dcd_idx] =
653          MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
654    } else {
655       /* We could use INTERSECT on Valhall too, but
656        * EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
657        * buffer one or more tiles ahead, making ZS data immediately
658        * available for any ZS tests taking place in other shaders.
659        * Thing's haven't been benchmarked to determine what's
660        * preferable (saving bandwidth vs having ZS preloaded
661        * earlier), so let's leave it like that for now.
662        */
663       fbinfo->bifrost.pre_post.modes[dcd_idx] =
664          MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS;
665    }
666 
667    return VK_SUCCESS;
668 }
669 #endif
670 
671 static VkResult
cmd_preload_zs_attachments(struct panvk_cmd_buffer * cmdbuf,struct pan_fb_info * fbinfo)672 cmd_preload_zs_attachments(struct panvk_cmd_buffer *cmdbuf,
673                            struct pan_fb_info *fbinfo)
674 {
675    if (!fbinfo->zs.preload.s && !fbinfo->zs.preload.z)
676       return VK_SUCCESS;
677 
678    struct panvk_fb_preload_shader_key key = {
679       .type = PANVK_META_OBJECT_KEY_FB_PRELOAD_SHADER,
680       .samples = fbinfo->nr_samples,
681       .needs_layer_id = cmdbuf->state.gfx.render.layer_count > 1,
682    };
683 
684    if (fbinfo->zs.preload.z) {
685       key.aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
686       key.view_type =
687          cmdbuf->state.gfx.render.z_attachment.iview
688             ? cmdbuf->state.gfx.render.z_attachment.iview->vk.view_type
689             : cmdbuf->state.gfx.render.s_attachment.iview->vk.view_type;
690    }
691 
692    if (fbinfo->zs.preload.s) {
693       VkImageViewType view_type =
694          cmdbuf->state.gfx.render.s_attachment.iview
695             ? cmdbuf->state.gfx.render.s_attachment.iview->vk.view_type
696             : cmdbuf->state.gfx.render.z_attachment.iview->vk.view_type;
697 
698       key.aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
699       if (!fbinfo->zs.preload.z)
700          key.view_type = view_type;
701 
702       assert(key.view_type == view_type);
703    }
704 
705    return cmd_emit_dcd(cmdbuf, fbinfo, &key);
706 }
707 
708 static VkResult
cmd_preload_color_attachments(struct panvk_cmd_buffer * cmdbuf,struct pan_fb_info * fbinfo)709 cmd_preload_color_attachments(struct panvk_cmd_buffer *cmdbuf,
710                               struct pan_fb_info *fbinfo)
711 {
712    struct panvk_fb_preload_shader_key key = {
713       .type = PANVK_META_OBJECT_KEY_FB_PRELOAD_SHADER,
714       .samples = fbinfo->nr_samples,
715       .needs_layer_id = cmdbuf->state.gfx.render.layer_count > 1,
716       .aspects = VK_IMAGE_ASPECT_COLOR_BIT,
717    };
718    bool needs_preload = false;
719 
720    for (uint32_t i = 0; i < fbinfo->rt_count; i++) {
721       if (!fbinfo->rts[i].preload)
722          continue;
723 
724       enum pipe_format pfmt = fbinfo->rts[i].view->format;
725       struct panvk_image_view *iview =
726          cmdbuf->state.gfx.render.color_attachments.iviews[i];
727 
728       key.color[i].type = util_format_is_pure_uint(pfmt)   ? nir_type_uint32
729                           : util_format_is_pure_sint(pfmt) ? nir_type_int32
730                                                            : nir_type_float32;
731 
732       if (!needs_preload) {
733          key.view_type = iview->vk.view_type;
734          needs_preload = true;
735       }
736 
737       assert(key.view_type == iview->vk.view_type);
738    }
739 
740    if (!needs_preload)
741       return VK_SUCCESS;
742 
743    return cmd_emit_dcd(cmdbuf, fbinfo, &key);
744 }
745 
746 VkResult
panvk_per_arch(cmd_fb_preload)747 panvk_per_arch(cmd_fb_preload)(struct panvk_cmd_buffer *cmdbuf,
748                                struct pan_fb_info *fbinfo)
749 {
750    VkResult result = cmd_preload_color_attachments(cmdbuf, fbinfo);
751    if (result != VK_SUCCESS)
752       return result;
753 
754    return cmd_preload_zs_attachments(cmdbuf, fbinfo);
755 }
756