• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2020-2021 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  *   Boris Brezillon <boris.brezillon@collabora.com>
26  */
27 
28 #include "pan_blitter.h"
29 #include <math.h>
30 #include <stdio.h>
31 #include "compiler/nir/nir_builder.h"
32 #include "util/u_math.h"
33 #include "pan_blend.h"
34 #include "pan_desc.h"
35 #include "pan_encoder.h"
36 #include "pan_jc.h"
37 #include "pan_pool.h"
38 #include "pan_shader.h"
39 #include "pan_texture.h"
40 
41 #if PAN_ARCH >= 6
42 /* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
43  * missing in many cases. We instead use software paths as fallbacks to
44  * implement blits, which are done as TILER jobs. No vertex shader is
45  * necessary since we can supply screen-space coordinates directly.
46  *
47  * This is primarily designed as a fallback for preloads but could be extended
48  * for other clears/blits if needed in the future. */
49 
50 static enum mali_register_file_format
blit_type_to_reg_fmt(nir_alu_type in)51 blit_type_to_reg_fmt(nir_alu_type in)
52 {
53    switch (in) {
54    case nir_type_float32:
55       return MALI_REGISTER_FILE_FORMAT_F32;
56    case nir_type_int32:
57       return MALI_REGISTER_FILE_FORMAT_I32;
58    case nir_type_uint32:
59       return MALI_REGISTER_FILE_FORMAT_U32;
60    default:
61       unreachable("Invalid blit type");
62    }
63 }
64 #endif
65 
66 struct pan_blit_surface {
67    gl_frag_result loc              : 4;
68    nir_alu_type type               : 8;
69    enum mali_texture_dimension dim : 2;
70    bool array                      : 1;
71    unsigned src_samples            : 5;
72    unsigned dst_samples            : 5;
73 };
74 
75 struct pan_blit_shader_key {
76    struct pan_blit_surface surfaces[8];
77 };
78 
79 struct pan_blit_shader_data {
80    struct pan_blit_shader_key key;
81    struct pan_shader_info info;
82    mali_ptr address;
83    unsigned blend_ret_offsets[8];
84    nir_alu_type blend_types[8];
85 };
86 
87 struct pan_blit_blend_shader_key {
88    enum pipe_format format;
89    nir_alu_type type;
90    unsigned rt         : 3;
91    unsigned nr_samples : 5;
92    unsigned pad        : 24;
93 };
94 
95 struct pan_blit_blend_shader_data {
96    struct pan_blit_blend_shader_key key;
97    mali_ptr address;
98 };
99 
100 struct pan_blit_rsd_key {
101    struct {
102       enum pipe_format format;
103       nir_alu_type type               : 8;
104       unsigned src_samples            : 5;
105       unsigned dst_samples            : 5;
106       enum mali_texture_dimension dim : 2;
107       bool array                      : 1;
108    } rts[8], z, s;
109 };
110 
111 struct pan_blit_rsd_data {
112    struct pan_blit_rsd_key key;
113    mali_ptr address;
114 };
115 
116 #if PAN_ARCH >= 5
117 static void
pan_blitter_emit_blend(unsigned rt,const struct pan_image_view * iview,const struct pan_blit_shader_data * blit_shader,mali_ptr blend_shader,void * out)118 pan_blitter_emit_blend(unsigned rt,
119                        const struct pan_image_view *iview,
120                        const struct pan_blit_shader_data *blit_shader,
121                        mali_ptr blend_shader, void *out)
122 {
123    assert(blend_shader == 0 || PAN_ARCH <= 5);
124 
125    pan_pack(out, BLEND, cfg) {
126       if (!iview) {
127          cfg.enable = false;
128 #if PAN_ARCH >= 6
129          cfg.internal.mode = MALI_BLEND_MODE_OFF;
130 #endif
131          continue;
132       }
133 
134       cfg.round_to_fb_precision = true;
135       cfg.srgb = util_format_is_srgb(iview->format);
136 
137 #if PAN_ARCH >= 6
138       cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
139 #endif
140 
141       if (!blend_shader) {
142          cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
143          cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
144          cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
145          cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
146          cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
147          cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
148          cfg.equation.color_mask = 0xf;
149 
150 #if PAN_ARCH >= 6
151          nir_alu_type type = blit_shader->key.surfaces[rt].type;
152 
153          cfg.internal.fixed_function.num_comps = 4;
154          cfg.internal.fixed_function.conversion.memory_format = GENX(
155             panfrost_dithered_format_from_pipe_format)(iview->format, false);
156          cfg.internal.fixed_function.conversion.register_format =
157             blit_type_to_reg_fmt(type);
158 
159          cfg.internal.fixed_function.rt = rt;
160 #endif
161       } else {
162 #if PAN_ARCH <= 5
163          cfg.blend_shader = true;
164          cfg.shader_pc = blend_shader;
165 #endif
166       }
167    }
168 }
169 #endif
170 
171 struct pan_blitter_views {
172    unsigned rt_count;
173    const struct pan_image_view *src_rts[8];
174    const struct pan_image_view *dst_rts[8];
175    const struct pan_image_view *src_z;
176    const struct pan_image_view *dst_z;
177    const struct pan_image_view *src_s;
178    const struct pan_image_view *dst_s;
179 };
180 
181 static bool
pan_blitter_is_ms(struct pan_blitter_views * views)182 pan_blitter_is_ms(struct pan_blitter_views *views)
183 {
184    for (unsigned i = 0; i < views->rt_count; i++) {
185       if (views->dst_rts[i]) {
186          if (pan_image_view_get_nr_samples(views->dst_rts[i]) > 1)
187             return true;
188       }
189    }
190 
191    if (views->dst_z && pan_image_view_get_nr_samples(views->dst_z) > 1)
192       return true;
193 
194    if (views->dst_s && pan_image_view_get_nr_samples(views->dst_s) > 1)
195       return true;
196 
197    return false;
198 }
199 
200 #if PAN_ARCH >= 5
201 static void
pan_blitter_emit_blends(const struct pan_blit_shader_data * blit_shader,struct pan_blitter_views * views,mali_ptr * blend_shaders,void * out)202 pan_blitter_emit_blends(const struct pan_blit_shader_data *blit_shader,
203                         struct pan_blitter_views *views,
204                         mali_ptr *blend_shaders, void *out)
205 {
206    for (unsigned i = 0; i < MAX2(views->rt_count, 1); ++i) {
207       void *dest = out + pan_size(BLEND) * i;
208       const struct pan_image_view *rt_view = views->dst_rts[i];
209       mali_ptr blend_shader = blend_shaders ? blend_shaders[i] : 0;
210 
211       pan_blitter_emit_blend(i, rt_view, blit_shader, blend_shader, dest);
212    }
213 }
214 #endif
215 
216 #if PAN_ARCH <= 7
217 static void
pan_blitter_emit_rsd(const struct pan_blit_shader_data * blit_shader,struct pan_blitter_views * views,mali_ptr * blend_shaders,void * out)218 pan_blitter_emit_rsd(const struct pan_blit_shader_data *blit_shader,
219                      struct pan_blitter_views *views, mali_ptr *blend_shaders,
220                      void *out)
221 {
222    UNUSED bool zs = (views->dst_z || views->dst_s);
223    bool ms = pan_blitter_is_ms(views);
224 
225    pan_pack(out, RENDERER_STATE, cfg) {
226       assert(blit_shader->address);
227       pan_shader_prepare_rsd(&blit_shader->info, blit_shader->address, &cfg);
228 
229       cfg.multisample_misc.sample_mask = 0xFFFF;
230       cfg.multisample_misc.multisample_enable = ms;
231       cfg.multisample_misc.evaluate_per_sample = ms;
232       cfg.multisample_misc.depth_write_mask = views->dst_z != NULL;
233       cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
234 
235       cfg.stencil_mask_misc.stencil_enable = views->dst_s != NULL;
236       cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
237       cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
238       cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
239       cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
240       cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
241       cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
242       cfg.stencil_front.mask = 0xFF;
243       cfg.stencil_back = cfg.stencil_front;
244 
245 #if PAN_ARCH >= 6
246       if (zs) {
247          /* Writing Z/S requires late updates */
248          cfg.properties.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
249          cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
250       } else {
251          /* Skipping ATEST requires forcing Z/S */
252          cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
253          cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
254       }
255 
256       /* However, while shaders writing Z/S can normally be killed, on v6
257        * for frame shaders it can cause GPU timeouts, so only allow colour
258        * blit shaders to be killed. */
259       cfg.properties.allow_forward_pixel_to_kill = !zs;
260 
261       if (PAN_ARCH == 6)
262          cfg.properties.allow_forward_pixel_to_be_killed = !zs;
263 #else
264 
265       mali_ptr blend_shader =
266          blend_shaders
267             ? panfrost_last_nonnull(blend_shaders, MAX2(views->rt_count, 1))
268             : 0;
269 
270       cfg.properties.work_register_count = 4;
271       cfg.properties.force_early_z = !zs;
272       cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
273 
274       /* Set even on v5 for erratum workaround */
275 #if PAN_ARCH == 5
276       cfg.legacy_blend_shader = blend_shader;
277 #else
278       cfg.blend_shader = blend_shader;
279       cfg.stencil_mask_misc.write_enable = true;
280       cfg.stencil_mask_misc.dither_disable = true;
281       cfg.multisample_misc.blend_shader = !!blend_shader;
282       cfg.blend_shader = blend_shader;
283       if (!cfg.multisample_misc.blend_shader) {
284          cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
285          cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
286          cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
287          cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
288          cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
289          cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
290          cfg.blend_constant = 0;
291 
292          if (views->dst_rts[0] != NULL) {
293             cfg.stencil_mask_misc.srgb =
294                util_format_is_srgb(views->dst_rts[0]->format);
295             cfg.blend_equation.color_mask = 0xf;
296          }
297       }
298 #endif
299 #endif
300    }
301 
302 #if PAN_ARCH >= 5
303    pan_blitter_emit_blends(blit_shader, views, blend_shaders,
304                            out + pan_size(RENDERER_STATE));
305 #endif
306 }
307 #endif
308 
309 #if PAN_ARCH <= 5
310 static void
pan_blitter_get_blend_shaders(struct pan_blitter_cache * cache,unsigned rt_count,const struct pan_image_view ** rts,const struct pan_blit_shader_data * blit_shader,mali_ptr * blend_shaders)311 pan_blitter_get_blend_shaders(struct pan_blitter_cache *cache,
312                               unsigned rt_count,
313                               const struct pan_image_view **rts,
314                               const struct pan_blit_shader_data *blit_shader,
315                               mali_ptr *blend_shaders)
316 {
317    if (!rt_count)
318       return;
319 
320    struct pan_blend_state blend_state = {
321       .rt_count = rt_count,
322    };
323 
324    for (unsigned i = 0; i < rt_count; i++) {
325       if (!rts[i] || panfrost_blendable_formats_v7[rts[i]->format].internal)
326          continue;
327 
328       struct pan_blit_blend_shader_key key = {
329          .format = rts[i]->format,
330          .rt = i,
331          .nr_samples = pan_image_view_get_nr_samples(rts[i]),
332          .type = blit_shader->blend_types[i],
333       };
334 
335       pthread_mutex_lock(&cache->shaders.lock);
336       struct hash_entry *he =
337          _mesa_hash_table_search(cache->shaders.blend, &key);
338       struct pan_blit_blend_shader_data *blend_shader = he ? he->data : NULL;
339       if (blend_shader) {
340          blend_shaders[i] = blend_shader->address;
341          pthread_mutex_unlock(&cache->shaders.lock);
342          continue;
343       }
344 
345       blend_shader =
346          rzalloc(cache->shaders.blend, struct pan_blit_blend_shader_data);
347       blend_shader->key = key;
348 
349       blend_state.rts[i] = (struct pan_blend_rt_state){
350          .format = rts[i]->format,
351          .nr_samples = pan_image_view_get_nr_samples(rts[i]),
352          .equation =
353             {
354                .blend_enable = false,
355                .color_mask = 0xf,
356             },
357       };
358 
359       pthread_mutex_lock(&cache->blend_shader_cache->lock);
360       struct pan_blend_shader_variant *b = GENX(pan_blend_get_shader_locked)(
361          cache->blend_shader_cache, &blend_state, blit_shader->blend_types[i],
362          nir_type_float32, /* unused */
363          i);
364 
365       assert(b->work_reg_count <= 4);
366       struct panfrost_ptr bin =
367          pan_pool_alloc_aligned(cache->shaders.pool, b->binary.size, 64);
368       memcpy(bin.cpu, b->binary.data, b->binary.size);
369 
370       blend_shader->address = bin.gpu | b->first_tag;
371       pthread_mutex_unlock(&cache->blend_shader_cache->lock);
372       _mesa_hash_table_insert(cache->shaders.blend, &blend_shader->key,
373                               blend_shader);
374       pthread_mutex_unlock(&cache->shaders.lock);
375       blend_shaders[i] = blend_shader->address;
376    }
377 }
378 #endif
379 
380 /*
381  * Early Mali GPUs did not respect sampler LOD clamps or bias, so the Midgard
382  * compiler inserts lowering code with a load_sampler_lod_parameters_pan sysval
383  * that we need to lower. Our samplers do not use LOD clamps or bias, so we
384  * lower to the identity settings and let constant folding get rid of the
385  * unnecessary lowering.
386  */
387 static bool
lower_sampler_parameters(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)388 lower_sampler_parameters(nir_builder *b, nir_intrinsic_instr *intr,
389                          UNUSED void *data)
390 {
391    if (intr->intrinsic != nir_intrinsic_load_sampler_lod_parameters_pan)
392       return false;
393 
394    const nir_const_value constants[4] = {
395       nir_const_value_for_float(0.0f, 32),     /* min_lod */
396       nir_const_value_for_float(INFINITY, 32), /* max_lod */
397       nir_const_value_for_float(0.0f, 32),     /* lod_bias */
398    };
399 
400    b->cursor = nir_after_instr(&intr->instr);
401    nir_def_rewrite_uses(&intr->def, nir_build_imm(b, 3, 32, constants));
402    return true;
403 }
404 
405 static const struct pan_blit_shader_data *
pan_blitter_get_blit_shader(struct pan_blitter_cache * cache,const struct pan_blit_shader_key * key)406 pan_blitter_get_blit_shader(struct pan_blitter_cache *cache,
407                             const struct pan_blit_shader_key *key)
408 {
409    pthread_mutex_lock(&cache->shaders.lock);
410    struct hash_entry *he =
411       _mesa_hash_table_search(cache->shaders.blit, key);
412    struct pan_blit_shader_data *shader = he ? he->data : NULL;
413 
414    if (shader)
415       goto out;
416 
417    unsigned coord_comps = 0;
418    unsigned sig_offset = 0;
419    char sig[256];
420    bool first = true;
421    for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
422       const char *type_str, *dim_str;
423       if (key->surfaces[i].type == nir_type_invalid)
424          continue;
425 
426       switch (key->surfaces[i].type) {
427       case nir_type_float32:
428          type_str = "float";
429          break;
430       case nir_type_uint32:
431          type_str = "uint";
432          break;
433       case nir_type_int32:
434          type_str = "int";
435          break;
436       default:
437          unreachable("Invalid type\n");
438       }
439 
440       switch (key->surfaces[i].dim) {
441       case MALI_TEXTURE_DIMENSION_CUBE:
442          dim_str = "cube";
443          break;
444       case MALI_TEXTURE_DIMENSION_1D:
445          dim_str = "1D";
446          break;
447       case MALI_TEXTURE_DIMENSION_2D:
448          dim_str = "2D";
449          break;
450       case MALI_TEXTURE_DIMENSION_3D:
451          dim_str = "3D";
452          break;
453       default:
454          unreachable("Invalid dim\n");
455       }
456 
457       coord_comps = MAX2(coord_comps, (key->surfaces[i].dim ?: 3) +
458                                          (key->surfaces[i].array ? 1 : 0));
459       first = false;
460 
461       if (sig_offset >= sizeof(sig))
462          continue;
463 
464       sig_offset +=
465          snprintf(sig + sig_offset, sizeof(sig) - sig_offset,
466                   "%s[%s;%s;%s%s;src_samples=%d,dst_samples=%d]",
467                   first ? "" : ",", gl_frag_result_name(key->surfaces[i].loc),
468                   type_str, dim_str, key->surfaces[i].array ? "[]" : "",
469                   key->surfaces[i].src_samples, key->surfaces[i].dst_samples);
470    }
471 
472    nir_builder b = nir_builder_init_simple_shader(
473       MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(),
474       "pan_blit(%s)", sig);
475    nir_variable *coord_var = nir_variable_create(
476       b.shader, nir_var_shader_in,
477       glsl_vector_type(GLSL_TYPE_FLOAT, coord_comps), "coord");
478    coord_var->data.location = VARYING_SLOT_VAR0;
479 
480    nir_def *coord = nir_load_var(&b, coord_var);
481 
482    unsigned active_count = 0;
483    for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
484       if (key->surfaces[i].type == nir_type_invalid)
485          continue;
486 
487       /* Resolve operations only work for N -> 1 samples. */
488       assert(key->surfaces[i].dst_samples == 1 ||
489              key->surfaces[i].src_samples == key->surfaces[i].dst_samples);
490 
491       static const char *out_names[] = {
492          "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7",
493       };
494 
495       unsigned ncomps = key->surfaces[i].loc >= FRAG_RESULT_DATA0 ? 4 : 1;
496       enum glsl_base_type type =
497          nir_get_glsl_base_type_for_nir_type(key->surfaces[i].type);
498       nir_variable *out = nir_variable_create(b.shader, nir_var_shader_out,
499                                               glsl_vector_type(type, ncomps),
500                                               out_names[active_count]);
501       out->data.location = key->surfaces[i].loc;
502       out->data.driver_location = active_count;
503 
504       bool resolve =
505          key->surfaces[i].src_samples > key->surfaces[i].dst_samples;
506       bool ms = key->surfaces[i].src_samples > 1;
507       enum glsl_sampler_dim sampler_dim;
508 
509       switch (key->surfaces[i].dim) {
510       case MALI_TEXTURE_DIMENSION_1D:
511          sampler_dim = GLSL_SAMPLER_DIM_1D;
512          break;
513       case MALI_TEXTURE_DIMENSION_2D:
514          sampler_dim = ms ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
515          break;
516       case MALI_TEXTURE_DIMENSION_3D:
517          sampler_dim = GLSL_SAMPLER_DIM_3D;
518          break;
519       case MALI_TEXTURE_DIMENSION_CUBE:
520          sampler_dim = GLSL_SAMPLER_DIM_CUBE;
521          break;
522       }
523 
524       nir_def *res = NULL;
525 
526       if (resolve) {
527          /* When resolving a float type, we need to calculate
528           * the average of all samples. For integer resolve, GL
529           * and Vulkan say that one sample should be chosen
530           * without telling which. Let's just pick the first one
531           * in that case.
532           */
533          nir_alu_type base_type =
534             nir_alu_type_get_base_type(key->surfaces[i].type);
535          unsigned nsamples =
536             base_type == nir_type_float ? key->surfaces[i].src_samples : 1;
537 
538          for (unsigned s = 0; s < nsamples; s++) {
539             nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
540 
541             tex->op = nir_texop_txf_ms;
542             tex->dest_type = key->surfaces[i].type;
543             tex->texture_index = active_count;
544             tex->is_array = key->surfaces[i].array;
545             tex->sampler_dim = sampler_dim;
546 
547             tex->src[0] =
548                nir_tex_src_for_ssa(nir_tex_src_coord, nir_f2i32(&b, coord));
549             tex->coord_components = coord_comps;
550 
551             tex->src[1] =
552                nir_tex_src_for_ssa(nir_tex_src_ms_index, nir_imm_int(&b, s));
553 
554             tex->src[2] =
555                nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(&b, 0));
556             nir_def_init(&tex->instr, &tex->def, 4, 32);
557             nir_builder_instr_insert(&b, &tex->instr);
558 
559             res = res ? nir_fadd(&b, res, &tex->def) : &tex->def;
560          }
561 
562          if (base_type == nir_type_float)
563             res = nir_fmul_imm(&b, res, 1.0f / nsamples);
564       } else {
565          nir_tex_instr *tex = nir_tex_instr_create(b.shader, ms ? 3 : 1);
566 
567          tex->dest_type = key->surfaces[i].type;
568          tex->texture_index = active_count;
569          tex->is_array = key->surfaces[i].array;
570          tex->sampler_dim = sampler_dim;
571 
572          if (ms) {
573             tex->op = nir_texop_txf_ms;
574 
575             tex->src[0] =
576                nir_tex_src_for_ssa(nir_tex_src_coord, nir_f2i32(&b, coord));
577             tex->coord_components = coord_comps;
578 
579             tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
580                                               nir_load_sample_id(&b));
581 
582             tex->src[2] =
583                nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(&b, 0));
584          } else {
585             tex->op = nir_texop_txl;
586 
587             tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord);
588             tex->coord_components = coord_comps;
589          }
590 
591          nir_def_init(&tex->instr, &tex->def, 4, 32);
592          nir_builder_instr_insert(&b, &tex->instr);
593          res = &tex->def;
594       }
595 
596       assert(res);
597 
598       if (key->surfaces[i].loc >= FRAG_RESULT_DATA0) {
599          nir_store_var(&b, out, res, 0xFF);
600       } else {
601          unsigned c = key->surfaces[i].loc == FRAG_RESULT_STENCIL ? 1 : 0;
602          nir_store_var(&b, out, nir_channel(&b, res, c), 0xFF);
603       }
604       active_count++;
605    }
606 
607    struct panfrost_compile_inputs inputs = {
608       .gpu_id = cache->gpu_id,
609       .is_blit = true,
610       .no_idvs = true,
611    };
612    struct util_dynarray binary;
613 
614    util_dynarray_init(&binary, NULL);
615 
616    shader = rzalloc(cache->shaders.blit, struct pan_blit_shader_data);
617 
618    nir_shader_gather_info(b.shader, nir_shader_get_entrypoint(b.shader));
619 
620    for (unsigned i = 0; i < active_count; ++i)
621       BITSET_SET(b.shader->info.textures_used, i);
622 
623    pan_shader_preprocess(b.shader, inputs.gpu_id);
624 
625    if (PAN_ARCH == 4) {
626       NIR_PASS_V(b.shader, nir_shader_intrinsics_pass, lower_sampler_parameters,
627                  nir_metadata_block_index | nir_metadata_dominance, NULL);
628    }
629 
630    GENX(pan_shader_compile)(b.shader, &inputs, &binary, &shader->info);
631 
632    shader->key = *key;
633    shader->address =
634       pan_pool_upload_aligned(cache->shaders.pool, binary.data,
635                               binary.size, PAN_ARCH >= 6 ? 128 : 64);
636 
637    util_dynarray_fini(&binary);
638    ralloc_free(b.shader);
639 
640 #if PAN_ARCH >= 6
641    for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) {
642       shader->blend_ret_offsets[i] =
643          shader->info.bifrost.blend[i].return_offset;
644       shader->blend_types[i] = shader->info.bifrost.blend[i].type;
645    }
646 #endif
647 
648    _mesa_hash_table_insert(cache->shaders.blit, &shader->key, shader);
649 
650 out:
651    pthread_mutex_unlock(&cache->shaders.lock);
652    return shader;
653 }
654 
655 static struct pan_blit_shader_key
pan_blitter_get_key(struct pan_blitter_views * views)656 pan_blitter_get_key(struct pan_blitter_views *views)
657 {
658    struct pan_blit_shader_key key = {0};
659 
660    if (views->src_z) {
661       assert(views->dst_z);
662       key.surfaces[0].loc = FRAG_RESULT_DEPTH;
663       key.surfaces[0].type = nir_type_float32;
664       key.surfaces[0].src_samples = pan_image_view_get_nr_samples(views->src_z);
665       key.surfaces[0].dst_samples = pan_image_view_get_nr_samples(views->dst_z);
666       key.surfaces[0].dim = views->src_z->dim;
667       key.surfaces[0].array =
668          views->src_z->first_layer != views->src_z->last_layer;
669    }
670 
671    if (views->src_s) {
672       assert(views->dst_s);
673       key.surfaces[1].loc = FRAG_RESULT_STENCIL;
674       key.surfaces[1].type = nir_type_uint32;
675       key.surfaces[1].src_samples = pan_image_view_get_nr_samples(views->src_s);
676       key.surfaces[1].dst_samples = pan_image_view_get_nr_samples(views->dst_s);
677       key.surfaces[1].dim = views->src_s->dim;
678       key.surfaces[1].array =
679          views->src_s->first_layer != views->src_s->last_layer;
680    }
681 
682    for (unsigned i = 0; i < views->rt_count; i++) {
683       if (!views->src_rts[i])
684          continue;
685 
686       assert(views->dst_rts[i]);
687       key.surfaces[i].loc = FRAG_RESULT_DATA0 + i;
688       key.surfaces[i].type =
689          util_format_is_pure_uint(views->src_rts[i]->format) ? nir_type_uint32
690          : util_format_is_pure_sint(views->src_rts[i]->format)
691             ? nir_type_int32
692             : nir_type_float32;
693       key.surfaces[i].src_samples =
694          pan_image_view_get_nr_samples(views->src_rts[i]);
695       key.surfaces[i].dst_samples =
696          pan_image_view_get_nr_samples(views->dst_rts[i]);
697       key.surfaces[i].dim = views->src_rts[i]->dim;
698       key.surfaces[i].array =
699          views->src_rts[i]->first_layer != views->src_rts[i]->last_layer;
700    }
701 
702    return key;
703 }
704 
705 #if PAN_ARCH <= 7
706 static mali_ptr
pan_blitter_get_rsd(struct pan_blitter_cache * cache,struct pan_blitter_views * views)707 pan_blitter_get_rsd(struct pan_blitter_cache *cache,
708                     struct pan_blitter_views *views)
709 {
710    struct pan_blit_rsd_key rsd_key = {0};
711 
712    assert(!views->rt_count || (!views->src_z && !views->src_s));
713 
714    struct pan_blit_shader_key blit_key = pan_blitter_get_key(views);
715 
716    if (views->src_z) {
717       assert(views->dst_z);
718       rsd_key.z.format = views->dst_z->format;
719       rsd_key.z.type = blit_key.surfaces[0].type;
720       rsd_key.z.src_samples = blit_key.surfaces[0].src_samples;
721       rsd_key.z.dst_samples = blit_key.surfaces[0].dst_samples;
722       rsd_key.z.dim = blit_key.surfaces[0].dim;
723       rsd_key.z.array = blit_key.surfaces[0].array;
724    }
725 
726    if (views->src_s) {
727       assert(views->dst_s);
728       rsd_key.s.format = views->dst_s->format;
729       rsd_key.s.type = blit_key.surfaces[1].type;
730       rsd_key.s.src_samples = blit_key.surfaces[1].src_samples;
731       rsd_key.s.dst_samples = blit_key.surfaces[1].dst_samples;
732       rsd_key.s.dim = blit_key.surfaces[1].dim;
733       rsd_key.s.array = blit_key.surfaces[1].array;
734    }
735 
736    for (unsigned i = 0; i < views->rt_count; i++) {
737       if (!views->src_rts[i])
738          continue;
739 
740       assert(views->dst_rts[i]);
741       rsd_key.rts[i].format = views->dst_rts[i]->format;
742       rsd_key.rts[i].type = blit_key.surfaces[i].type;
743       rsd_key.rts[i].src_samples = blit_key.surfaces[i].src_samples;
744       rsd_key.rts[i].dst_samples = blit_key.surfaces[i].dst_samples;
745       rsd_key.rts[i].dim = blit_key.surfaces[i].dim;
746       rsd_key.rts[i].array = blit_key.surfaces[i].array;
747    }
748 
749    pthread_mutex_lock(&cache->rsds.lock);
750    struct hash_entry *he =
751       _mesa_hash_table_search(cache->rsds.rsds, &rsd_key);
752    struct pan_blit_rsd_data *rsd = he ? he->data : NULL;
753    if (rsd)
754       goto out;
755 
756    rsd = rzalloc(cache->rsds.rsds, struct pan_blit_rsd_data);
757    rsd->key = rsd_key;
758 
759 #if PAN_ARCH == 4
760    struct panfrost_ptr rsd_ptr =
761       pan_pool_alloc_desc(cache->rsds.pool, RENDERER_STATE);
762 #else
763    unsigned bd_count = PAN_ARCH >= 5 ? MAX2(views->rt_count, 1) : 0;
764    struct panfrost_ptr rsd_ptr = pan_pool_alloc_desc_aggregate(
765       cache->rsds.pool, PAN_DESC(RENDERER_STATE),
766       PAN_DESC_ARRAY(bd_count, BLEND));
767 #endif
768 
769    mali_ptr blend_shaders[8] = {0};
770 
771    const struct pan_blit_shader_data *blit_shader =
772       pan_blitter_get_blit_shader(cache, &blit_key);
773 
774 #if PAN_ARCH <= 5
775    pan_blitter_get_blend_shaders(cache,
776                                  views->rt_count, views->dst_rts, blit_shader,
777                                  blend_shaders);
778 #endif
779 
780    pan_blitter_emit_rsd(blit_shader, views, blend_shaders, rsd_ptr.cpu);
781    rsd->address = rsd_ptr.gpu;
782    _mesa_hash_table_insert(cache->rsds.rsds, &rsd->key, rsd);
783 
784 out:
785    pthread_mutex_unlock(&cache->rsds.lock);
786    return rsd->address;
787 }
788 
789 static mali_ptr
pan_blit_get_rsd(struct pan_blitter_cache * cache,const struct pan_image_view * src_views,const struct pan_image_view * dst_view)790 pan_blit_get_rsd(struct pan_blitter_cache *cache,
791                  const struct pan_image_view *src_views,
792                  const struct pan_image_view *dst_view)
793 {
794    const struct util_format_description *desc =
795       util_format_description(src_views[0].format);
796 
797    struct pan_blitter_views views = {};
798 
799    if (util_format_has_depth(desc)) {
800       views.src_z = &src_views[0];
801       views.dst_z = dst_view;
802    }
803 
804    if (src_views[1].format) {
805       views.src_s = &src_views[1];
806       views.dst_s = dst_view;
807    } else if (util_format_has_stencil(desc)) {
808       views.src_s = &src_views[0];
809       views.dst_s = dst_view;
810    }
811 
812    if (!views.src_z && !views.src_s) {
813       views.rt_count = 1;
814       views.src_rts[0] = src_views;
815       views.dst_rts[0] = dst_view;
816    }
817 
818    return pan_blitter_get_rsd(cache, &views);
819 }
820 #endif
821 
822 static struct pan_blitter_views
pan_preload_get_views(const struct pan_fb_info * fb,bool zs,struct pan_image_view * patched_s)823 pan_preload_get_views(const struct pan_fb_info *fb, bool zs,
824                       struct pan_image_view *patched_s)
825 {
826    struct pan_blitter_views views = {0};
827 
828    if (zs) {
829       if (fb->zs.preload.z)
830          views.src_z = views.dst_z = fb->zs.view.zs;
831 
832       if (fb->zs.preload.s) {
833          const struct pan_image_view *view = fb->zs.view.s ?: fb->zs.view.zs;
834          enum pipe_format fmt = util_format_get_depth_only(view->format);
835 
836          switch (view->format) {
837          case PIPE_FORMAT_Z24_UNORM_S8_UINT:
838             fmt = PIPE_FORMAT_X24S8_UINT;
839             break;
840          case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
841             fmt = PIPE_FORMAT_X32_S8X24_UINT;
842             break;
843          default:
844             fmt = view->format;
845             break;
846          }
847 
848          if (fmt != view->format) {
849             *patched_s = *view;
850             patched_s->format = fmt;
851             views.src_s = views.dst_s = patched_s;
852          } else {
853             views.src_s = views.dst_s = view;
854          }
855       }
856    } else {
857       for (unsigned i = 0; i < fb->rt_count; i++) {
858          if (fb->rts[i].preload) {
859             views.src_rts[i] = fb->rts[i].view;
860             views.dst_rts[i] = fb->rts[i].view;
861          }
862       }
863 
864       views.rt_count = fb->rt_count;
865    }
866 
867    return views;
868 }
869 
870 static bool
pan_preload_needed(const struct pan_fb_info * fb,bool zs)871 pan_preload_needed(const struct pan_fb_info *fb, bool zs)
872 {
873    if (zs) {
874       if (fb->zs.preload.z || fb->zs.preload.s)
875          return true;
876    } else {
877       for (unsigned i = 0; i < fb->rt_count; i++) {
878          if (fb->rts[i].preload)
879             return true;
880       }
881    }
882 
883    return false;
884 }
885 
886 static mali_ptr
pan_blitter_emit_varying(struct pan_pool * pool)887 pan_blitter_emit_varying(struct pan_pool *pool)
888 {
889    struct panfrost_ptr varying = pan_pool_alloc_desc(pool, ATTRIBUTE);
890 
891    pan_pack(varying.cpu, ATTRIBUTE, cfg) {
892       cfg.buffer_index = 0;
893       cfg.offset_enable = PAN_ARCH <= 5;
894       cfg.format =
895          GENX(panfrost_format_from_pipe_format)(PIPE_FORMAT_R32G32B32_FLOAT)->hw;
896 
897 #if PAN_ARCH >= 9
898       cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D;
899       cfg.table = PAN_TABLE_ATTRIBUTE_BUFFER;
900       cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX;
901       cfg.stride = 4 * sizeof(float);
902 #endif
903    }
904 
905    return varying.gpu;
906 }
907 
908 static mali_ptr
pan_blitter_emit_varying_buffer(struct pan_pool * pool,mali_ptr coordinates)909 pan_blitter_emit_varying_buffer(struct pan_pool *pool, mali_ptr coordinates)
910 {
911 #if PAN_ARCH >= 9
912    struct panfrost_ptr varying_buffer = pan_pool_alloc_desc(pool, BUFFER);
913 
914    pan_pack(varying_buffer.cpu, BUFFER, cfg) {
915       cfg.address = coordinates;
916       cfg.size = 4 * sizeof(float) * 4;
917    }
918 #else
919    /* Bifrost needs an empty desc to mark end of prefetching */
920    bool padding_buffer = PAN_ARCH >= 6;
921 
922    struct panfrost_ptr varying_buffer = pan_pool_alloc_desc_array(
923       pool, (padding_buffer ? 2 : 1), ATTRIBUTE_BUFFER);
924 
925    pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
926       cfg.pointer = coordinates;
927       cfg.stride = 4 * sizeof(float);
928       cfg.size = cfg.stride * 4;
929    }
930 
931    if (padding_buffer) {
932       pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
933                ATTRIBUTE_BUFFER, cfg)
934          ;
935    }
936 #endif
937 
938    return varying_buffer.gpu;
939 }
940 
941 static mali_ptr
pan_blitter_emit_sampler(struct pan_pool * pool,bool nearest_filter)942 pan_blitter_emit_sampler(struct pan_pool *pool, bool nearest_filter)
943 {
944    struct panfrost_ptr sampler = pan_pool_alloc_desc(pool, SAMPLER);
945 
946    pan_pack(sampler.cpu, SAMPLER, cfg) {
947       cfg.seamless_cube_map = false;
948       cfg.normalized_coordinates = false;
949       cfg.minify_nearest = nearest_filter;
950       cfg.magnify_nearest = nearest_filter;
951    }
952 
953    return sampler.gpu;
954 }
955 
956 static mali_ptr
pan_blitter_emit_textures(struct pan_pool * pool,unsigned tex_count,const struct pan_image_view ** views)957 pan_blitter_emit_textures(struct pan_pool *pool, unsigned tex_count,
958                           const struct pan_image_view **views)
959 {
960 #if PAN_ARCH >= 6
961    struct panfrost_ptr textures =
962       pan_pool_alloc_desc_array(pool, tex_count, TEXTURE);
963 
964    for (unsigned i = 0; i < tex_count; i++) {
965       void *texture = textures.cpu + (pan_size(TEXTURE) * i);
966       size_t payload_size =
967          GENX(panfrost_estimate_texture_payload_size)(views[i]);
968       struct panfrost_ptr surfaces =
969          pan_pool_alloc_aligned(pool, payload_size, 64);
970 
971       GENX(panfrost_new_texture)(views[i], texture, &surfaces);
972    }
973 
974    return textures.gpu;
975 #else
976    mali_ptr textures[8] = {0};
977 
978    for (unsigned i = 0; i < tex_count; i++) {
979       size_t sz = pan_size(TEXTURE) +
980                   GENX(panfrost_estimate_texture_payload_size)(views[i]);
981       struct panfrost_ptr texture =
982          pan_pool_alloc_aligned(pool, sz, pan_alignment(TEXTURE));
983       struct panfrost_ptr surfaces = {
984          .cpu = texture.cpu + pan_size(TEXTURE),
985          .gpu = texture.gpu + pan_size(TEXTURE),
986       };
987 
988       GENX(panfrost_new_texture)(views[i], texture.cpu, &surfaces);
989       textures[i] = texture.gpu;
990    }
991 
992    return pan_pool_upload_aligned(pool, textures, tex_count * sizeof(mali_ptr),
993                                   sizeof(mali_ptr));
994 #endif
995 }
996 
997 static mali_ptr
pan_preload_emit_textures(struct pan_pool * pool,const struct pan_fb_info * fb,bool zs,unsigned * tex_count_out)998 pan_preload_emit_textures(struct pan_pool *pool, const struct pan_fb_info *fb,
999                           bool zs, unsigned *tex_count_out)
1000 {
1001    const struct pan_image_view *views[8];
1002    struct pan_image_view patched_s_view;
1003    unsigned tex_count = 0;
1004 
1005    if (zs) {
1006       if (fb->zs.preload.z)
1007          views[tex_count++] = fb->zs.view.zs;
1008 
1009       if (fb->zs.preload.s) {
1010          const struct pan_image_view *view = fb->zs.view.s ?: fb->zs.view.zs;
1011          enum pipe_format fmt = util_format_get_depth_only(view->format);
1012 
1013          switch (view->format) {
1014          case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1015             fmt = PIPE_FORMAT_X24S8_UINT;
1016             break;
1017          case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1018             fmt = PIPE_FORMAT_X32_S8X24_UINT;
1019             break;
1020          default:
1021             fmt = view->format;
1022             break;
1023          }
1024 
1025          if (fmt != view->format) {
1026             patched_s_view = *view;
1027             patched_s_view.format = fmt;
1028             view = &patched_s_view;
1029          }
1030          views[tex_count++] = view;
1031       }
1032    } else {
1033       for (unsigned i = 0; i < fb->rt_count; i++) {
1034          if (fb->rts[i].preload)
1035             views[tex_count++] = fb->rts[i].view;
1036       }
1037    }
1038 
1039    *tex_count_out = tex_count;
1040 
1041    return pan_blitter_emit_textures(pool, tex_count, views);
1042 }
1043 
1044 #if PAN_ARCH >= 8
1045 /* TODO: cache */
1046 static mali_ptr
pan_blitter_emit_zs(struct pan_pool * pool,bool z,bool s)1047 pan_blitter_emit_zs(struct pan_pool *pool, bool z, bool s)
1048 {
1049    struct panfrost_ptr zsd = pan_pool_alloc_desc(pool, DEPTH_STENCIL);
1050 
1051    pan_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
1052       cfg.depth_function = MALI_FUNC_ALWAYS;
1053       cfg.depth_write_enable = z;
1054 
1055       if (z)
1056          cfg.depth_source = MALI_DEPTH_SOURCE_SHADER;
1057 
1058       cfg.stencil_test_enable = s;
1059       cfg.stencil_from_shader = s;
1060 
1061       cfg.front_compare_function = MALI_FUNC_ALWAYS;
1062       cfg.front_stencil_fail = MALI_STENCIL_OP_REPLACE;
1063       cfg.front_depth_fail = MALI_STENCIL_OP_REPLACE;
1064       cfg.front_depth_pass = MALI_STENCIL_OP_REPLACE;
1065       cfg.front_write_mask = 0xFF;
1066       cfg.front_value_mask = 0xFF;
1067 
1068       cfg.back_compare_function = MALI_FUNC_ALWAYS;
1069       cfg.back_stencil_fail = MALI_STENCIL_OP_REPLACE;
1070       cfg.back_depth_fail = MALI_STENCIL_OP_REPLACE;
1071       cfg.back_depth_pass = MALI_STENCIL_OP_REPLACE;
1072       cfg.back_write_mask = 0xFF;
1073       cfg.back_value_mask = 0xFF;
1074 
1075       cfg.depth_cull_enable = false;
1076    }
1077 
1078    return zsd.gpu;
1079 }
1080 #else
1081 static mali_ptr
pan_blitter_emit_viewport(struct pan_pool * pool,uint16_t minx,uint16_t miny,uint16_t maxx,uint16_t maxy)1082 pan_blitter_emit_viewport(struct pan_pool *pool, uint16_t minx, uint16_t miny,
1083                           uint16_t maxx, uint16_t maxy)
1084 {
1085    struct panfrost_ptr vp = pan_pool_alloc_desc(pool, VIEWPORT);
1086 
1087    pan_pack(vp.cpu, VIEWPORT, cfg) {
1088       cfg.scissor_minimum_x = minx;
1089       cfg.scissor_minimum_y = miny;
1090       cfg.scissor_maximum_x = maxx;
1091       cfg.scissor_maximum_y = maxy;
1092    }
1093 
1094    return vp.gpu;
1095 }
1096 #endif
1097 
1098 static void
pan_preload_emit_dcd(struct pan_blitter_cache * cache,struct pan_pool * pool,struct pan_fb_info * fb,bool zs,mali_ptr coordinates,mali_ptr tsd,void * out,bool always_write)1099 pan_preload_emit_dcd(struct pan_blitter_cache *cache,
1100                      struct pan_pool *pool, struct pan_fb_info *fb, bool zs,
1101                      mali_ptr coordinates, mali_ptr tsd, void *out,
1102                      bool always_write)
1103 {
1104    unsigned tex_count = 0;
1105    mali_ptr textures = pan_preload_emit_textures(pool, fb, zs, &tex_count);
1106    mali_ptr samplers = pan_blitter_emit_sampler(pool, true);
1107    mali_ptr varyings = pan_blitter_emit_varying(pool);
1108    mali_ptr varying_buffers =
1109       pan_blitter_emit_varying_buffer(pool, coordinates);
1110 
1111    /* Tiles updated by blit shaders are still considered clean (separate
1112     * for colour and Z/S), allowing us to suppress unnecessary writeback
1113     */
1114    UNUSED bool clean_fragment_write = !always_write;
1115 
1116    /* Image view used when patching stencil formats for combined
1117     * depth/stencil preloads.
1118     */
1119    struct pan_image_view patched_s;
1120 
1121    struct pan_blitter_views views = pan_preload_get_views(fb, zs, &patched_s);
1122 
1123 #if PAN_ARCH <= 7
1124    pan_pack(out, DRAW, cfg) {
1125       uint16_t minx = 0, miny = 0, maxx, maxy;
1126 
1127       if (PAN_ARCH == 4) {
1128          maxx = fb->width - 1;
1129          maxy = fb->height - 1;
1130       } else {
1131          /* Align on 32x32 tiles */
1132          minx = fb->extent.minx & ~31;
1133          miny = fb->extent.miny & ~31;
1134          maxx = MIN2(ALIGN_POT(fb->extent.maxx + 1, 32), fb->width) - 1;
1135          maxy = MIN2(ALIGN_POT(fb->extent.maxy + 1, 32), fb->height) - 1;
1136       }
1137 
1138       cfg.thread_storage = tsd;
1139       cfg.state = pan_blitter_get_rsd(cache, &views);
1140 
1141       cfg.position = coordinates;
1142       cfg.viewport = pan_blitter_emit_viewport(pool, minx, miny, maxx, maxy);
1143 
1144       cfg.varyings = varyings;
1145       cfg.varying_buffers = varying_buffers;
1146       cfg.textures = textures;
1147       cfg.samplers = samplers;
1148 
1149 #if PAN_ARCH >= 6
1150       cfg.clean_fragment_write = clean_fragment_write;
1151 #endif
1152    }
1153 #else
1154    struct panfrost_ptr T;
1155    unsigned nr_tables = 12;
1156 
1157    /* Although individual resources need only 16 byte alignment, the
1158     * resource table as a whole must be 64-byte aligned.
1159     */
1160    T = pan_pool_alloc_aligned(pool, nr_tables * pan_size(RESOURCE), 64);
1161    memset(T.cpu, 0, nr_tables * pan_size(RESOURCE));
1162 
1163    panfrost_make_resource_table(T, PAN_TABLE_TEXTURE, textures, tex_count);
1164    panfrost_make_resource_table(T, PAN_TABLE_SAMPLER, samplers, 1);
1165    panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE, varyings, 1);
1166    panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE_BUFFER, varying_buffers,
1167                                 1);
1168 
1169    struct pan_blit_shader_key key = pan_blitter_get_key(&views);
1170    const struct pan_blit_shader_data *blit_shader =
1171       pan_blitter_get_blit_shader(cache, &key);
1172 
1173    bool z = fb->zs.preload.z;
1174    bool s = fb->zs.preload.s;
1175    bool ms = pan_blitter_is_ms(&views);
1176 
1177    struct panfrost_ptr spd = pan_pool_alloc_desc(pool, SHADER_PROGRAM);
1178    pan_pack(spd.cpu, SHADER_PROGRAM, cfg) {
1179       cfg.stage = MALI_SHADER_STAGE_FRAGMENT;
1180       cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
1181       cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;
1182       cfg.binary = blit_shader->address;
1183       cfg.preload.r48_r63 = blit_shader->info.preload >> 48;
1184    }
1185 
1186    unsigned bd_count = views.rt_count;
1187    struct panfrost_ptr blend = pan_pool_alloc_desc_array(pool, bd_count, BLEND);
1188 
1189    if (!zs) {
1190       pan_blitter_emit_blends(blit_shader, &views, NULL, blend.cpu);
1191    }
1192 
1193    pan_pack(out, DRAW, cfg) {
1194       if (zs) {
1195          /* ZS_EMIT requires late update/kill */
1196          cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
1197          cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
1198          cfg.blend_count = 0;
1199       } else {
1200          /* Skipping ATEST requires forcing Z/S */
1201          cfg.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
1202          cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
1203 
1204          cfg.blend = blend.gpu;
1205          cfg.blend_count = bd_count;
1206          cfg.render_target_mask = 0x1;
1207       }
1208 
1209       cfg.allow_forward_pixel_to_kill = !zs;
1210       cfg.allow_forward_pixel_to_be_killed = true;
1211       cfg.depth_stencil = pan_blitter_emit_zs(pool, z, s);
1212       cfg.sample_mask = 0xFFFF;
1213       cfg.multisample_enable = ms;
1214       cfg.evaluate_per_sample = ms;
1215       cfg.maximum_z = 1.0;
1216       cfg.clean_fragment_write = clean_fragment_write;
1217       cfg.shader.resources = T.gpu | nr_tables;
1218       cfg.shader.shader = spd.gpu;
1219       cfg.shader.thread_storage = tsd;
1220    }
1221 #endif
1222 }
1223 
1224 #if PAN_ARCH <= 7
1225 static void *
pan_blit_emit_tiler_job(struct pan_pool * pool,struct pan_jc * jc,mali_ptr tiler,struct panfrost_ptr * job)1226 pan_blit_emit_tiler_job(struct pan_pool *pool, struct pan_jc *jc,
1227                         mali_ptr tiler, struct panfrost_ptr *job)
1228 {
1229    *job = pan_pool_alloc_desc(pool, TILER_JOB);
1230 
1231    pan_section_pack(job->cpu, TILER_JOB, PRIMITIVE, cfg) {
1232       cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
1233       cfg.index_count = 4;
1234       cfg.job_task_split = 6;
1235    }
1236 
1237    pan_section_pack(job->cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
1238       cfg.constant = 1.0f;
1239    }
1240 
1241    void *invoc = pan_section_ptr(job->cpu, TILER_JOB, INVOCATION);
1242    panfrost_pack_work_groups_compute(invoc, 1, 4, 1, 1, 1, 1, true, false);
1243 
1244 #if PAN_ARCH >= 6
1245    pan_section_pack(job->cpu, TILER_JOB, PADDING, cfg)
1246       ;
1247    pan_section_pack(job->cpu, TILER_JOB, TILER, cfg) {
1248       cfg.address = tiler;
1249    }
1250 #endif
1251 
1252    pan_jc_add_job(pool, jc, MALI_JOB_TYPE_TILER, false, false, 0, 0, job,
1253                   false);
1254    return pan_section_ptr(job->cpu, TILER_JOB, DRAW);
1255 }
1256 #endif
1257 
1258 #if PAN_ARCH >= 6
1259 static void
pan_preload_fb_alloc_pre_post_dcds(struct pan_pool * desc_pool,struct pan_fb_info * fb)1260 pan_preload_fb_alloc_pre_post_dcds(struct pan_pool *desc_pool,
1261                                    struct pan_fb_info *fb)
1262 {
1263    if (fb->bifrost.pre_post.dcds.gpu)
1264       return;
1265 
1266    fb->bifrost.pre_post.dcds = pan_pool_alloc_desc_array(desc_pool, 3, DRAW);
1267 }
1268 
1269 static void
pan_preload_emit_pre_frame_dcd(struct pan_blitter_cache * cache,struct pan_pool * desc_pool,struct pan_fb_info * fb,bool zs,mali_ptr coords,mali_ptr tsd)1270 pan_preload_emit_pre_frame_dcd(struct pan_blitter_cache *cache,
1271                                struct pan_pool *desc_pool,
1272                                struct pan_fb_info *fb, bool zs, mali_ptr coords,
1273                                mali_ptr tsd)
1274 {
1275    unsigned dcd_idx = zs ? 1 : 0;
1276    pan_preload_fb_alloc_pre_post_dcds(desc_pool, fb);
1277    assert(fb->bifrost.pre_post.dcds.cpu);
1278    void *dcd = fb->bifrost.pre_post.dcds.cpu + (dcd_idx * pan_size(DRAW));
1279 
1280    /* We only use crc_rt to determine whether to force writes for updating
1281     * the CRCs, so use a conservative tile size (16x16).
1282     */
1283    int crc_rt = GENX(pan_select_crc_rt)(fb, 16 * 16);
1284 
1285    bool always_write = false;
1286 
1287    /* If CRC data is currently invalid and this batch will make it valid,
1288     * write even clean tiles to make sure CRC data is updated. */
1289    if (crc_rt >= 0) {
1290       bool *valid = fb->rts[crc_rt].crc_valid;
1291       bool full = !fb->extent.minx && !fb->extent.miny &&
1292                   fb->extent.maxx == (fb->width - 1) &&
1293                   fb->extent.maxy == (fb->height - 1);
1294 
1295       if (full && !(*valid))
1296          always_write = true;
1297    }
1298 
1299    pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd, dcd,
1300                         always_write);
1301    if (zs) {
1302       enum pipe_format fmt = fb->zs.view.zs
1303                                 ? fb->zs.view.zs->planes[0]->layout.format
1304                                 : fb->zs.view.s->planes[0]->layout.format;
1305       bool always = false;
1306 
1307       /* If we're dealing with a combined ZS resource and only one
1308        * component is cleared, we need to reload the whole surface
1309        * because the zs_clean_pixel_write_enable flag is set in that
1310        * case.
1311        */
1312       if (util_format_is_depth_and_stencil(fmt) &&
1313           fb->zs.clear.z != fb->zs.clear.s)
1314          always = true;
1315 
1316       /* We could use INTERSECT on Bifrost v7 too, but
1317        * EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
1318        * buffer one or more tiles ahead, making ZS data immediately
1319        * available for any ZS tests taking place in other shaders.
1320        * Thing's haven't been benchmarked to determine what's
1321        * preferable (saving bandwidth vs having ZS preloaded
1322        * earlier), so let's leave it like that for now.
1323        */
1324       fb->bifrost.pre_post.modes[dcd_idx] =
1325          PAN_ARCH > 6
1326             ? MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS
1327          : always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS
1328                   : MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
1329    } else {
1330       fb->bifrost.pre_post.modes[dcd_idx] =
1331          always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS
1332                       : MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
1333    }
1334 }
1335 #else
1336 static struct panfrost_ptr
pan_preload_emit_tiler_job(struct pan_blitter_cache * cache,struct pan_pool * desc_pool,struct pan_jc * jc,struct pan_fb_info * fb,bool zs,mali_ptr coords,mali_ptr tsd)1337 pan_preload_emit_tiler_job(struct pan_blitter_cache *cache, struct pan_pool *desc_pool, struct pan_jc *jc,
1338                            struct pan_fb_info *fb, bool zs, mali_ptr coords,
1339                            mali_ptr tsd)
1340 {
1341    struct panfrost_ptr job = pan_pool_alloc_desc(desc_pool, TILER_JOB);
1342 
1343    pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd,
1344                         pan_section_ptr(job.cpu, TILER_JOB, DRAW), false);
1345 
1346    pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
1347       cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
1348       cfg.index_count = 4;
1349       cfg.job_task_split = 6;
1350    }
1351 
1352    pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
1353       cfg.constant = 1.0f;
1354    }
1355 
1356    void *invoc = pan_section_ptr(job.cpu, TILER_JOB, INVOCATION);
1357    panfrost_pack_work_groups_compute(invoc, 1, 4, 1, 1, 1, 1, true, false);
1358 
1359    pan_jc_add_job(desc_pool, jc, MALI_JOB_TYPE_TILER, false, false, 0, 0, &job,
1360                   true);
1361    return job;
1362 }
1363 #endif
1364 
1365 static struct panfrost_ptr
pan_preload_fb_part(struct pan_blitter_cache * cache,struct pan_pool * pool,struct pan_jc * jc,struct pan_fb_info * fb,bool zs,mali_ptr coords,mali_ptr tsd,mali_ptr tiler)1366 pan_preload_fb_part(struct pan_blitter_cache *cache, struct pan_pool *pool,
1367                     struct pan_jc *jc, struct pan_fb_info *fb, bool zs,
1368                     mali_ptr coords, mali_ptr tsd, mali_ptr tiler)
1369 {
1370    struct panfrost_ptr job = {0};
1371 
1372 #if PAN_ARCH >= 6
1373    pan_preload_emit_pre_frame_dcd(cache, pool, fb, zs, coords, tsd);
1374 #else
1375    job = pan_preload_emit_tiler_job(cache, pool, jc, fb, zs, coords, tsd);
1376 #endif
1377    return job;
1378 }
1379 
1380 unsigned
GENX(pan_preload_fb)1381 GENX(pan_preload_fb)(struct pan_blitter_cache *cache, struct pan_pool *pool,
1382                      struct pan_jc *jc, struct pan_fb_info *fb, mali_ptr tsd,
1383                      mali_ptr tiler, struct panfrost_ptr *jobs)
1384 {
1385    bool preload_zs = pan_preload_needed(fb, true);
1386    bool preload_rts = pan_preload_needed(fb, false);
1387    mali_ptr coords;
1388 
1389    if (!preload_zs && !preload_rts)
1390       return 0;
1391 
1392    float rect[] = {
1393       0.0, 0.0,        0.0, 1.0, fb->width, 0.0,        0.0, 1.0,
1394       0.0, fb->height, 0.0, 1.0, fb->width, fb->height, 0.0, 1.0,
1395    };
1396 
1397    coords = pan_pool_upload_aligned(pool, rect, sizeof(rect), 64);
1398 
1399    unsigned njobs = 0;
1400    if (preload_zs) {
1401       struct panfrost_ptr job =
1402          pan_preload_fb_part(cache, pool, jc, fb, true, coords, tsd, tiler);
1403       if (jobs && job.cpu)
1404          jobs[njobs++] = job;
1405    }
1406 
1407    if (preload_rts) {
1408       struct panfrost_ptr job =
1409          pan_preload_fb_part(cache, pool, jc, fb, false, coords, tsd, tiler);
1410       if (jobs && job.cpu)
1411          jobs[njobs++] = job;
1412    }
1413 
1414    return njobs;
1415 }
1416 
1417 #if PAN_ARCH <= 7
1418 void
GENX(pan_blit_ctx_init)1419 GENX(pan_blit_ctx_init)(struct pan_blitter_cache *cache,
1420                         const struct pan_blit_info *info,
1421                         struct pan_pool *blit_pool,
1422                         struct pan_blit_context *ctx)
1423 {
1424    memset(ctx, 0, sizeof(*ctx));
1425 
1426    struct pan_image_view sviews[2] = {
1427       {
1428          .format = info->src.planes[0].format,
1429          .planes =
1430             {
1431                info->src.planes[0].image,
1432                info->src.planes[1].image,
1433                info->src.planes[2].image,
1434             },
1435          .dim =
1436             info->src.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_CUBE
1437                ? MALI_TEXTURE_DIMENSION_2D
1438                : info->src.planes[0].image->layout.dim,
1439          .first_level = info->src.level,
1440          .last_level = info->src.level,
1441          .first_layer = info->src.start.layer,
1442          .last_layer = info->src.end.layer,
1443          .swizzle =
1444             {
1445                PIPE_SWIZZLE_X,
1446                PIPE_SWIZZLE_Y,
1447                PIPE_SWIZZLE_Z,
1448                PIPE_SWIZZLE_W,
1449             },
1450       },
1451    };
1452 
1453    struct pan_image_view dview = {
1454       .format = info->dst.planes[0].format,
1455       .planes =
1456          {
1457             info->dst.planes[0].image,
1458             info->dst.planes[1].image,
1459             info->dst.planes[2].image,
1460          },
1461       .dim = info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_1D
1462                 ? MALI_TEXTURE_DIMENSION_1D
1463                 : MALI_TEXTURE_DIMENSION_2D,
1464       .first_level = info->dst.level,
1465       .last_level = info->dst.level,
1466       .first_layer = info->dst.start.layer,
1467       .last_layer = info->dst.start.layer,
1468       .swizzle =
1469          {
1470             PIPE_SWIZZLE_X,
1471             PIPE_SWIZZLE_Y,
1472             PIPE_SWIZZLE_Z,
1473             PIPE_SWIZZLE_W,
1474          },
1475    };
1476 
1477    ctx->src.start.x = info->src.start.x;
1478    ctx->src.start.y = info->src.start.y;
1479    ctx->src.end.x = info->src.end.x;
1480    ctx->src.end.y = info->src.end.y;
1481    ctx->src.dim = sviews[0].dim;
1482 
1483    if (info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_3D) {
1484       unsigned max_z =
1485          u_minify(info->dst.planes[0].image->layout.depth, info->dst.level) - 1;
1486 
1487       ctx->z_scale = (float)(info->src.end.z - info->src.start.z) /
1488                      (info->dst.end.z - info->dst.start.z);
1489       assert(info->dst.start.z != info->dst.end.z);
1490       if (info->dst.start.z > info->dst.end.z) {
1491          ctx->dst.cur_layer = info->dst.start.z - 1;
1492          ctx->dst.last_layer = info->dst.end.z;
1493       } else {
1494          ctx->dst.cur_layer = info->dst.start.z;
1495          ctx->dst.last_layer = info->dst.end.z - 1;
1496       }
1497       ctx->dst.cur_layer = MIN2(MAX2(ctx->dst.cur_layer, 0), max_z);
1498       ctx->dst.last_layer = MIN2(MAX2(ctx->dst.last_layer, 0), max_z);
1499       ctx->dst.layer_offset = ctx->dst.cur_layer;
1500    } else {
1501       unsigned max_layer = info->dst.planes[0].image->layout.array_size - 1;
1502       ctx->dst.layer_offset = info->dst.start.layer;
1503       ctx->dst.cur_layer = info->dst.start.layer;
1504       ctx->dst.last_layer = MIN2(info->dst.end.layer, max_layer);
1505       ctx->z_scale = 1;
1506    }
1507 
1508    if (sviews[0].dim == MALI_TEXTURE_DIMENSION_3D) {
1509       if (info->src.start.z < info->src.end.z)
1510          ctx->src.z_offset = info->src.start.z + fabs(ctx->z_scale * 0.5f);
1511       else
1512          ctx->src.z_offset = info->src.start.z - fabs(ctx->z_scale * 0.5f);
1513    } else {
1514       ctx->src.layer_offset = info->src.start.layer;
1515    }
1516 
1517    /* Split depth and stencil */
1518    if (util_format_is_depth_and_stencil(sviews[0].format)) {
1519       sviews[1] = sviews[0];
1520       sviews[0].format = util_format_get_depth_only(sviews[0].format);
1521       sviews[1].format = util_format_stencil_only(sviews[1].format);
1522    } else if (info->src.planes[1].format) {
1523       sviews[1] = sviews[0];
1524       sviews[1].format = info->src.planes[1].format;
1525       sviews[1].planes[0] = info->src.planes[1].image;
1526    }
1527 
1528    ctx->rsd = pan_blit_get_rsd(cache, sviews, &dview);
1529 
1530    ASSERTED unsigned nlayers = info->src.end.layer - info->src.start.layer + 1;
1531 
1532    assert(nlayers == (info->dst.end.layer - info->dst.start.layer + 1));
1533 
1534    unsigned dst_w =
1535       u_minify(info->dst.planes[0].image->layout.width, info->dst.level);
1536    unsigned dst_h =
1537       u_minify(info->dst.planes[0].image->layout.height, info->dst.level);
1538    unsigned maxx = MIN2(MAX2(info->dst.start.x, info->dst.end.x), dst_w - 1);
1539    unsigned maxy = MIN2(MAX2(info->dst.start.y, info->dst.end.y), dst_h - 1);
1540    unsigned minx = MAX2(MIN3(info->dst.start.x, info->dst.end.x, maxx), 0);
1541    unsigned miny = MAX2(MIN3(info->dst.start.y, info->dst.end.y, maxy), 0);
1542 
1543    if (info->scissor.enable) {
1544       minx = MAX2(minx, info->scissor.minx);
1545       miny = MAX2(miny, info->scissor.miny);
1546       maxx = MIN2(maxx, info->scissor.maxx);
1547       maxy = MIN2(maxy, info->scissor.maxy);
1548    }
1549 
1550    const struct pan_image_view *sview_ptrs[] = {&sviews[0], &sviews[1]};
1551    unsigned nviews = sviews[1].format ? 2 : 1;
1552 
1553    ctx->textures = pan_blitter_emit_textures(blit_pool, nviews, sview_ptrs);
1554    ctx->samplers = pan_blitter_emit_sampler(blit_pool, info->nearest);
1555 
1556    ctx->vpd = pan_blitter_emit_viewport(blit_pool, minx, miny, maxx, maxy);
1557 
1558    float dst_rect[] = {
1559       info->dst.start.x, info->dst.start.y, 0.0, 1.0,
1560       info->dst.end.x,   info->dst.start.y, 0.0, 1.0,
1561       info->dst.start.x, info->dst.end.y,   0.0, 1.0,
1562       info->dst.end.x,   info->dst.end.y,   0.0, 1.0,
1563    };
1564 
1565    ctx->position =
1566       pan_pool_upload_aligned(blit_pool, dst_rect, sizeof(dst_rect), 64);
1567 }
1568 
1569 struct panfrost_ptr
GENX(pan_blit)1570 GENX(pan_blit)(struct pan_blit_context *ctx, struct pan_pool *pool,
1571                struct pan_jc *jc, mali_ptr tsd, mali_ptr tiler)
1572 {
1573    if (ctx->dst.cur_layer < 0 ||
1574        (ctx->dst.last_layer >= ctx->dst.layer_offset &&
1575         ctx->dst.cur_layer > ctx->dst.last_layer) ||
1576        (ctx->dst.last_layer < ctx->dst.layer_offset &&
1577         ctx->dst.cur_layer < ctx->dst.last_layer))
1578       return (struct panfrost_ptr){0};
1579 
1580    int32_t layer = ctx->dst.cur_layer - ctx->dst.layer_offset;
1581    float src_z;
1582    if (ctx->src.dim == MALI_TEXTURE_DIMENSION_3D)
1583       src_z = (ctx->z_scale * layer) + ctx->src.z_offset;
1584    else
1585       src_z = ctx->src.layer_offset + layer;
1586 
1587    float src_rect[] = {
1588       ctx->src.start.x, ctx->src.start.y, src_z, 1.0,
1589       ctx->src.end.x,   ctx->src.start.y, src_z, 1.0,
1590       ctx->src.start.x, ctx->src.end.y,   src_z, 1.0,
1591       ctx->src.end.x,   ctx->src.end.y,   src_z, 1.0,
1592    };
1593 
1594    mali_ptr src_coords =
1595       pan_pool_upload_aligned(pool, src_rect, sizeof(src_rect), 64);
1596 
1597    struct panfrost_ptr job = {0};
1598    void *dcd = pan_blit_emit_tiler_job(pool, jc, tiler, &job);
1599 
1600    pan_pack(dcd, DRAW, cfg) {
1601       cfg.thread_storage = tsd;
1602       cfg.state = ctx->rsd;
1603 
1604       cfg.position = ctx->position;
1605       cfg.varyings = pan_blitter_emit_varying(pool);
1606       cfg.varying_buffers = pan_blitter_emit_varying_buffer(pool, src_coords);
1607       cfg.viewport = ctx->vpd;
1608       cfg.textures = ctx->textures;
1609       cfg.samplers = ctx->samplers;
1610    }
1611 
1612    return job;
1613 }
1614 #endif
1615 
1616 static uint32_t
pan_blit_shader_key_hash(const void * key)1617 pan_blit_shader_key_hash(const void *key)
1618 {
1619    return _mesa_hash_data(key, sizeof(struct pan_blit_shader_key));
1620 }
1621 
1622 static bool
pan_blit_shader_key_equal(const void * a,const void * b)1623 pan_blit_shader_key_equal(const void *a, const void *b)
1624 {
1625    return !memcmp(a, b, sizeof(struct pan_blit_shader_key));
1626 }
1627 
1628 static uint32_t
pan_blit_blend_shader_key_hash(const void * key)1629 pan_blit_blend_shader_key_hash(const void *key)
1630 {
1631    return _mesa_hash_data(key, sizeof(struct pan_blit_blend_shader_key));
1632 }
1633 
1634 static bool
pan_blit_blend_shader_key_equal(const void * a,const void * b)1635 pan_blit_blend_shader_key_equal(const void *a, const void *b)
1636 {
1637    return !memcmp(a, b, sizeof(struct pan_blit_blend_shader_key));
1638 }
1639 
1640 static uint32_t
pan_blit_rsd_key_hash(const void * key)1641 pan_blit_rsd_key_hash(const void *key)
1642 {
1643    return _mesa_hash_data(key, sizeof(struct pan_blit_rsd_key));
1644 }
1645 
1646 static bool
pan_blit_rsd_key_equal(const void * a,const void * b)1647 pan_blit_rsd_key_equal(const void *a, const void *b)
1648 {
1649    return !memcmp(a, b, sizeof(struct pan_blit_rsd_key));
1650 }
1651 
1652 static void
pan_blitter_prefill_blit_shader_cache(struct pan_blitter_cache * cache)1653 pan_blitter_prefill_blit_shader_cache(struct pan_blitter_cache *cache)
1654 {
1655    static const struct pan_blit_shader_key prefill[] = {
1656       {
1657          .surfaces[0] =
1658             {
1659                .loc = FRAG_RESULT_DEPTH,
1660                .type = nir_type_float32,
1661                .dim = MALI_TEXTURE_DIMENSION_2D,
1662                .src_samples = 1,
1663                .dst_samples = 1,
1664             },
1665       },
1666       {
1667          .surfaces[1] =
1668             {
1669                .loc = FRAG_RESULT_STENCIL,
1670                .type = nir_type_uint32,
1671                .dim = MALI_TEXTURE_DIMENSION_2D,
1672                .src_samples = 1,
1673                .dst_samples = 1,
1674             },
1675       },
1676       {
1677          .surfaces[0] =
1678             {
1679                .loc = FRAG_RESULT_DATA0,
1680                .type = nir_type_float32,
1681                .dim = MALI_TEXTURE_DIMENSION_2D,
1682                .src_samples = 1,
1683                .dst_samples = 1,
1684             },
1685       },
1686    };
1687 
1688    for (unsigned i = 0; i < ARRAY_SIZE(prefill); i++)
1689       pan_blitter_get_blit_shader(cache, &prefill[i]);
1690 }
1691 
1692 void
GENX(pan_blitter_cache_init)1693 GENX(pan_blitter_cache_init)(struct pan_blitter_cache *cache,
1694                              unsigned gpu_id,
1695                              struct pan_blend_shader_cache *blend_shader_cache,
1696                              struct pan_pool *bin_pool,
1697                              struct pan_pool *desc_pool)
1698 {
1699    cache->gpu_id = gpu_id;
1700    cache->shaders.blit = _mesa_hash_table_create(NULL, pan_blit_shader_key_hash,
1701                                                  pan_blit_shader_key_equal);
1702    cache->shaders.blend = _mesa_hash_table_create(
1703       NULL, pan_blit_blend_shader_key_hash, pan_blit_blend_shader_key_equal);
1704    cache->shaders.pool = bin_pool;
1705    pthread_mutex_init(&cache->shaders.lock, NULL);
1706    pan_blitter_prefill_blit_shader_cache(cache);
1707 
1708    cache->rsds.pool = desc_pool;
1709    cache->rsds.rsds = _mesa_hash_table_create(NULL, pan_blit_rsd_key_hash,
1710                                               pan_blit_rsd_key_equal);
1711    pthread_mutex_init(&cache->rsds.lock, NULL);
1712    cache->blend_shader_cache = blend_shader_cache;
1713 }
1714 
1715 void
GENX(pan_blitter_cache_cleanup)1716 GENX(pan_blitter_cache_cleanup)(struct pan_blitter_cache *cache)
1717 {
1718    _mesa_hash_table_destroy(cache->shaders.blit, NULL);
1719    _mesa_hash_table_destroy(cache->shaders.blend, NULL);
1720    pthread_mutex_destroy(&cache->shaders.lock);
1721    _mesa_hash_table_destroy(cache->rsds.rsds, NULL);
1722    pthread_mutex_destroy(&cache->rsds.lock);
1723 }
1724