• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2020-2021 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  *   Boris Brezillon <boris.brezillon@collabora.com>
26  */
27 
28 #include <math.h>
29 #include <stdio.h>
30 #include "pan_blend.h"
31 #include "pan_blitter.h"
32 #include "pan_cs.h"
33 #include "pan_encoder.h"
34 #include "pan_pool.h"
35 #include "pan_shader.h"
36 #include "pan_scoreboard.h"
37 #include "pan_texture.h"
38 #include "compiler/nir/nir_builder.h"
39 #include "util/u_math.h"
40 
41 #if PAN_ARCH >= 6
42 /* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
43  * missing in many cases. We instead use software paths as fallbacks to
44  * implement blits, which are done as TILER jobs. No vertex shader is
45  * necessary since we can supply screen-space coordinates directly.
46  *
47  * This is primarily designed as a fallback for preloads but could be extended
48  * for other clears/blits if needed in the future. */
49 
50 static enum mali_register_file_format
blit_type_to_reg_fmt(nir_alu_type in)51 blit_type_to_reg_fmt(nir_alu_type in)
52 {
53         switch (in) {
54         case nir_type_float32:
55                 return MALI_REGISTER_FILE_FORMAT_F32;
56         case nir_type_int32:
57                 return MALI_REGISTER_FILE_FORMAT_I32;
58         case nir_type_uint32:
59                 return MALI_REGISTER_FILE_FORMAT_U32;
60         default:
61                 unreachable("Invalid blit type");
62         }
63 }
64 #endif
65 
66 struct pan_blit_surface {
67         gl_frag_result loc : 4;
68         nir_alu_type type : 8;
69         enum mali_texture_dimension dim : 2;
70         bool array : 1;
71         unsigned src_samples: 5;
72         unsigned dst_samples: 5;
73 };
74 
75 struct pan_blit_shader_key {
76         struct pan_blit_surface surfaces[8];
77 };
78 
79 struct pan_blit_shader_data {
80         struct pan_blit_shader_key key;
81         struct pan_shader_info info;
82         mali_ptr address;
83         unsigned blend_ret_offsets[8];
84         nir_alu_type blend_types[8];
85 };
86 
87 struct pan_blit_blend_shader_key {
88         enum pipe_format format;
89         nir_alu_type type;
90         unsigned rt : 3;
91         unsigned nr_samples : 5;
92         unsigned pad : 24;
93 };
94 
95 struct pan_blit_blend_shader_data {
96         struct pan_blit_blend_shader_key key;
97         mali_ptr address;
98 };
99 
100 struct pan_blit_rsd_key {
101         struct {
102                 enum pipe_format format;
103                 nir_alu_type type : 8;
104                 unsigned src_samples : 5;
105                 unsigned dst_samples : 5;
106                 enum mali_texture_dimension dim : 2;
107                 bool array : 1;
108         } rts[8], z, s;
109 };
110 
111 struct pan_blit_rsd_data {
112         struct pan_blit_rsd_key key;
113         mali_ptr address;
114 };
115 
116 #if PAN_ARCH >= 5
117 static void
pan_blitter_emit_blend(const struct panfrost_device * dev,unsigned rt,const struct pan_image_view * iview,const struct pan_blit_shader_data * blit_shader,mali_ptr blend_shader,void * out)118 pan_blitter_emit_blend(const struct panfrost_device *dev,
119                        unsigned rt,
120                        const struct pan_image_view *iview,
121                        const struct pan_blit_shader_data *blit_shader,
122                        mali_ptr blend_shader,
123                        void *out)
124 {
125         pan_pack(out, BLEND, cfg) {
126                 if (!iview) {
127                         cfg.enable = false;
128 #if PAN_ARCH >= 6
129                         cfg.internal.mode = MALI_BLEND_MODE_OFF;
130 #endif
131                         continue;
132                 }
133 
134                 cfg.round_to_fb_precision = true;
135                 cfg.srgb = util_format_is_srgb(iview->format);
136 
137 #if PAN_ARCH >= 6
138                 cfg.internal.mode = blend_shader ?
139                                     MALI_BLEND_MODE_SHADER :
140                                     MALI_BLEND_MODE_OPAQUE;
141 #endif
142 
143                 if (!blend_shader) {
144                         cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
145                         cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
146                         cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
147                         cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
148                         cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
149                         cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
150                         cfg.equation.color_mask = 0xf;
151 
152 #if PAN_ARCH >= 6
153                         nir_alu_type type = blit_shader->key.surfaces[rt].type;
154 
155                         cfg.internal.fixed_function.num_comps = 4;
156                         cfg.internal.fixed_function.conversion.memory_format =
157                                 panfrost_format_to_bifrost_blend(dev, iview->format, false);
158                         cfg.internal.fixed_function.conversion.register_format =
159                                 blit_type_to_reg_fmt(type);
160 
161                         cfg.internal.fixed_function.rt = rt;
162 #endif
163                 } else {
164 #if PAN_ARCH >= 6
165                         cfg.internal.shader.pc = blend_shader;
166 #if PAN_ARCH <= 7
167                         if (blit_shader->blend_ret_offsets[rt]) {
168                                 cfg.internal.shader.return_value =
169                                         blit_shader->address +
170                                         blit_shader->blend_ret_offsets[rt];
171                         }
172 #endif
173 #else
174                         cfg.blend_shader = true;
175                         cfg.shader_pc = blend_shader;
176 #endif
177                 }
178         }
179 }
180 #endif
181 
182 struct pan_blitter_views {
183         unsigned rt_count;
184         const struct pan_image_view *src_rts[8];
185         const struct pan_image_view *dst_rts[8];
186         const struct pan_image_view *src_z;
187         const struct pan_image_view *dst_z;
188         const struct pan_image_view *src_s;
189         const struct pan_image_view *dst_s;
190 };
191 
192 static bool
pan_blitter_is_ms(struct pan_blitter_views * views)193 pan_blitter_is_ms(struct pan_blitter_views *views)
194 {
195         for (unsigned i = 0; i < views->rt_count; i++) {
196                 if (views->dst_rts[i]) {
197                         if (views->dst_rts[i]->image->layout.nr_samples > 1)
198                                return true;
199                 }
200         }
201 
202         if (views->dst_z && views->dst_z->image->layout.nr_samples > 1)
203                 return true;
204 
205         if (views->dst_s && views->dst_s->image->layout.nr_samples > 1)
206                 return true;
207 
208         return false;
209 }
210 
211 #if PAN_ARCH >= 5
212 static void
pan_blitter_emit_blends(const struct panfrost_device * dev,const struct pan_blit_shader_data * blit_shader,struct pan_blitter_views * views,mali_ptr * blend_shaders,void * out)213 pan_blitter_emit_blends(const struct panfrost_device *dev,
214                         const struct pan_blit_shader_data *blit_shader,
215                         struct pan_blitter_views *views,
216                         mali_ptr *blend_shaders,
217                         void *out)
218 {
219         for (unsigned i = 0; i < MAX2(views->rt_count, 1); ++i) {
220                 void *dest = out + pan_size(BLEND) * i;
221                 const struct pan_image_view *rt_view = views->dst_rts[i];
222                 mali_ptr blend_shader = blend_shaders ? blend_shaders[i] : 0;
223 
224                 pan_blitter_emit_blend(dev, i, rt_view, blit_shader,
225                                        blend_shader, dest);
226         }
227 }
228 #endif
229 
230 #if PAN_ARCH <= 7
231 static void
pan_blitter_emit_rsd(const struct panfrost_device * dev,const struct pan_blit_shader_data * blit_shader,struct pan_blitter_views * views,mali_ptr * blend_shaders,void * out)232 pan_blitter_emit_rsd(const struct panfrost_device *dev,
233                      const struct pan_blit_shader_data *blit_shader,
234                      struct pan_blitter_views *views,
235                      mali_ptr *blend_shaders,
236                      void *out)
237 {
238         UNUSED bool zs = (views->dst_z || views->dst_s);
239         bool ms = pan_blitter_is_ms(views);
240 
241         pan_pack(out, RENDERER_STATE, cfg) {
242                 assert(blit_shader->address);
243                 pan_shader_prepare_rsd(&blit_shader->info, blit_shader->address, &cfg);
244 
245                 cfg.multisample_misc.sample_mask = 0xFFFF;
246                 cfg.multisample_misc.multisample_enable = ms;
247                 cfg.multisample_misc.evaluate_per_sample = ms;
248                 cfg.multisample_misc.depth_write_mask = views->dst_z != NULL;
249                 cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
250 
251                 cfg.stencil_mask_misc.stencil_enable = views->dst_s != NULL;
252                 cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
253                 cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
254                 cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
255                 cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
256                 cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
257                 cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
258                 cfg.stencil_front.mask = 0xFF;
259                 cfg.stencil_back = cfg.stencil_front;
260 
261 #if PAN_ARCH >= 6
262                 if (zs) {
263                         /* Writing Z/S requires late updates */
264                         cfg.properties.zs_update_operation =
265                                 MALI_PIXEL_KILL_FORCE_LATE;
266                         cfg.properties.pixel_kill_operation =
267                                 MALI_PIXEL_KILL_FORCE_LATE;
268                 } else {
269                         /* Skipping ATEST requires forcing Z/S */
270                         cfg.properties.zs_update_operation =
271                                 MALI_PIXEL_KILL_STRONG_EARLY;
272                         cfg.properties.pixel_kill_operation =
273                                 MALI_PIXEL_KILL_FORCE_EARLY;
274                 }
275 
276                 /* However, while shaders writing Z/S can normally be killed, on v6
277                  * for frame shaders it can cause GPU timeouts, so only allow colour
278                  * blit shaders to be killed. */
279                 cfg.properties.allow_forward_pixel_to_kill = !zs;
280 
281                 if (PAN_ARCH == 6)
282                         cfg.properties.allow_forward_pixel_to_be_killed = !zs;
283 #else
284 
285                 mali_ptr blend_shader = blend_shaders ?
286                         panfrost_last_nonnull(blend_shaders, MAX2(views->rt_count, 1)) : 0;
287 
288                 cfg.properties.work_register_count = 4;
289                 cfg.properties.force_early_z = !zs;
290                 cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
291 
292                 /* Set even on v5 for erratum workaround */
293 #if PAN_ARCH == 5
294                 cfg.legacy_blend_shader = blend_shader;
295 #else
296                 cfg.blend_shader = blend_shader;
297                 cfg.stencil_mask_misc.write_enable = true;
298                 cfg.stencil_mask_misc.dither_disable = true;
299                 cfg.multisample_misc.blend_shader = !!blend_shader;
300                 cfg.blend_shader = blend_shader;
301                 if (!cfg.multisample_misc.blend_shader) {
302                         cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
303                         cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
304                         cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
305                         cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
306                         cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
307                         cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
308                         cfg.blend_constant = 0;
309 
310                         if (views->dst_rts[0] != NULL) {
311                                 cfg.stencil_mask_misc.srgb =
312                                         util_format_is_srgb(views->dst_rts[0]->format);
313                                 cfg.blend_equation.color_mask = 0xf;
314                         }
315                }
316 #endif
317 #endif
318         }
319 
320 #if PAN_ARCH >= 5
321         pan_blitter_emit_blends(dev, blit_shader, views, blend_shaders,
322                                 out + pan_size(RENDERER_STATE));
323 #endif
324 }
325 #endif
326 
327 static void
pan_blitter_get_blend_shaders(struct panfrost_device * dev,unsigned rt_count,const struct pan_image_view ** rts,const struct pan_blit_shader_data * blit_shader,mali_ptr * blend_shaders)328 pan_blitter_get_blend_shaders(struct panfrost_device *dev,
329                               unsigned rt_count,
330                               const struct pan_image_view **rts,
331                               const struct pan_blit_shader_data *blit_shader,
332                               mali_ptr *blend_shaders)
333 {
334         if (!rt_count)
335                 return;
336 
337         struct pan_blend_state blend_state = {
338                 .rt_count = rt_count,
339         };
340 
341         for (unsigned i = 0; i < rt_count; i++) {
342                 if (!rts[i] || panfrost_blendable_formats_v7[rts[i]->format].internal)
343                         continue;
344 
345                 struct pan_blit_blend_shader_key key = {
346                         .format = rts[i]->format,
347                         .rt = i,
348                         .nr_samples = rts[i]->image->layout.nr_samples,
349                         .type = blit_shader->blend_types[i],
350                 };
351 
352                 pthread_mutex_lock(&dev->blitter.shaders.lock);
353                 struct hash_entry *he =
354                         _mesa_hash_table_search(dev->blitter.shaders.blend, &key);
355                 struct pan_blit_blend_shader_data *blend_shader = he ? he->data : NULL;
356                 if (blend_shader) {
357                          blend_shaders[i] = blend_shader->address;
358                          pthread_mutex_unlock(&dev->blitter.shaders.lock);
359                          continue;
360                 }
361 
362                 blend_shader = rzalloc(dev->blitter.shaders.blend,
363                                        struct pan_blit_blend_shader_data);
364                 blend_shader->key = key;
365 
366                 blend_state.rts[i] = (struct pan_blend_rt_state) {
367                         .format = rts[i]->format,
368                         .nr_samples = rts[i]->image->layout.nr_samples,
369                         .equation = {
370                                 .blend_enable = true,
371                                 .rgb_src_factor = BLEND_FACTOR_ZERO,
372                                 .rgb_invert_src_factor = true,
373                                 .rgb_dst_factor = BLEND_FACTOR_ZERO,
374                                 .rgb_func = BLEND_FUNC_ADD,
375                                 .alpha_src_factor = BLEND_FACTOR_ZERO,
376                                 .alpha_invert_src_factor = true,
377                                 .alpha_dst_factor = BLEND_FACTOR_ZERO,
378                                 .alpha_func = BLEND_FUNC_ADD,
379                                 .color_mask = 0xf,
380                         },
381                 };
382 
383                 pthread_mutex_lock(&dev->blend_shaders.lock);
384                 struct pan_blend_shader_variant *b =
385                         GENX(pan_blend_get_shader_locked)(dev, &blend_state,
386                                                           blit_shader->blend_types[i],
387                                                           nir_type_float32, /* unused */
388                                                           i);
389 
390                 ASSERTED unsigned full_threads =
391                         (dev->arch >= 7) ? 32 : ((dev->arch == 6) ? 64 : 4);
392                 assert(b->work_reg_count <= full_threads);
393                 struct panfrost_ptr bin =
394                         pan_pool_alloc_aligned(dev->blitter.shaders.pool,
395                                                b->binary.size,
396                                                PAN_ARCH >= 6 ? 128 : 64);
397                 memcpy(bin.cpu, b->binary.data, b->binary.size);
398 
399                 blend_shader->address = bin.gpu | b->first_tag;
400                 pthread_mutex_unlock(&dev->blend_shaders.lock);
401                 _mesa_hash_table_insert(dev->blitter.shaders.blend,
402                                         &blend_shader->key, blend_shader);
403                 pthread_mutex_unlock(&dev->blitter.shaders.lock);
404                 blend_shaders[i] = blend_shader->address;
405         }
406 }
407 
408 static const struct pan_blit_shader_data *
pan_blitter_get_blit_shader(struct panfrost_device * dev,const struct pan_blit_shader_key * key)409 pan_blitter_get_blit_shader(struct panfrost_device *dev,
410                             const struct pan_blit_shader_key *key)
411 {
412         pthread_mutex_lock(&dev->blitter.shaders.lock);
413         struct hash_entry *he = _mesa_hash_table_search(dev->blitter.shaders.blit, key);
414         struct pan_blit_shader_data *shader = he ? he->data : NULL;
415 
416         if (shader)
417                 goto out;
418 
419         unsigned coord_comps = 0;
420         unsigned sig_offset = 0;
421         char sig[256];
422         bool first = true;
423         for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
424                 const char *type_str, *dim_str;
425                 if (key->surfaces[i].type == nir_type_invalid)
426                         continue;
427 
428                 switch (key->surfaces[i].type) {
429                 case nir_type_float32: type_str = "float"; break;
430                 case nir_type_uint32: type_str = "uint"; break;
431                 case nir_type_int32: type_str = "int"; break;
432                 default: unreachable("Invalid type\n");
433                 }
434 
435                 switch (key->surfaces[i].dim) {
436                 case MALI_TEXTURE_DIMENSION_CUBE: dim_str = "cube"; break;
437                 case MALI_TEXTURE_DIMENSION_1D: dim_str = "1D"; break;
438                 case MALI_TEXTURE_DIMENSION_2D: dim_str = "2D"; break;
439                 case MALI_TEXTURE_DIMENSION_3D: dim_str = "3D"; break;
440                 default: unreachable("Invalid dim\n");
441                 }
442 
443                 coord_comps = MAX2(coord_comps,
444                                    (key->surfaces[i].dim ? : 3) +
445                                    (key->surfaces[i].array ? 1 : 0));
446                 first = false;
447 
448                 if (sig_offset >= sizeof(sig))
449                         continue;
450 
451                 sig_offset += snprintf(sig + sig_offset, sizeof(sig) - sig_offset,
452                                        "%s[%s;%s;%s%s;src_samples=%d,dst_samples=%d]",
453                                        first ? "" : ",",
454                                        gl_frag_result_name(key->surfaces[i].loc),
455                                        type_str, dim_str,
456                                        key->surfaces[i].array ? "[]" : "",
457                                        key->surfaces[i].src_samples,
458                                        key->surfaces[i].dst_samples);
459         }
460 
461         nir_builder b =
462                 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
463                                                GENX(pan_shader_get_compiler_options)(),
464                                                "pan_blit(%s)", sig);
465         nir_variable *coord_var =
466                 nir_variable_create(b.shader, nir_var_shader_in,
467                                     glsl_vector_type(GLSL_TYPE_FLOAT, coord_comps),
468                                     "coord");
469         coord_var->data.location = VARYING_SLOT_VAR0;
470 
471         nir_ssa_def *coord = nir_load_var(&b, coord_var);
472 
473         unsigned active_count = 0;
474         for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
475                 if (key->surfaces[i].type == nir_type_invalid)
476                         continue;
477 
478                 /* Resolve operations only work for N -> 1 samples. */
479                 assert(key->surfaces[i].dst_samples == 1 ||
480                        key->surfaces[i].src_samples == key->surfaces[i].dst_samples);
481 
482                 static const char *out_names[] = {
483                         "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7",
484                 };
485 
486                 unsigned ncomps = key->surfaces[i].loc >= FRAG_RESULT_DATA0 ? 4 : 1;
487                 nir_variable *out =
488                         nir_variable_create(b.shader, nir_var_shader_out,
489                                             glsl_vector_type(GLSL_TYPE_FLOAT, ncomps),
490                                             out_names[active_count]);
491                 out->data.location = key->surfaces[i].loc;
492                 out->data.driver_location = active_count;
493 
494                 bool resolve = key->surfaces[i].src_samples > key->surfaces[i].dst_samples;
495                 bool ms = key->surfaces[i].src_samples > 1;
496                 enum glsl_sampler_dim sampler_dim;
497 
498                 switch (key->surfaces[i].dim) {
499                 case MALI_TEXTURE_DIMENSION_1D:
500                         sampler_dim = GLSL_SAMPLER_DIM_1D;
501                         break;
502                 case MALI_TEXTURE_DIMENSION_2D:
503                         sampler_dim = ms ?
504                                       GLSL_SAMPLER_DIM_MS :
505                                       GLSL_SAMPLER_DIM_2D;
506                         break;
507                 case MALI_TEXTURE_DIMENSION_3D:
508                         sampler_dim = GLSL_SAMPLER_DIM_3D;
509                         break;
510                 case MALI_TEXTURE_DIMENSION_CUBE:
511                         sampler_dim = GLSL_SAMPLER_DIM_CUBE;
512                         break;
513                 }
514 
515                 nir_ssa_def *res = NULL;
516 
517                 if (resolve) {
518                         /* When resolving a float type, we need to calculate
519                          * the average of all samples. For integer resolve, GL
520                          * and Vulkan say that one sample should be chosen
521                          * without telling which. Let's just pick the first one
522                          * in that case.
523                          */
524                         nir_alu_type base_type =
525                                 nir_alu_type_get_base_type(key->surfaces[i].type);
526                         unsigned nsamples = base_type == nir_type_float ?
527                                             key->surfaces[i].src_samples : 1;
528 
529                         for (unsigned s = 0; s < nsamples; s++) {
530                                 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
531 
532                                 tex->op = nir_texop_txf_ms;
533                                 tex->dest_type = key->surfaces[i].type;
534                                 tex->texture_index = active_count;
535                                 tex->is_array = key->surfaces[i].array;
536                                 tex->sampler_dim = sampler_dim;
537 
538                                 tex->src[0].src_type = nir_tex_src_coord;
539                                 tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
540                                 tex->coord_components = coord_comps;
541 
542                                 tex->src[1].src_type = nir_tex_src_ms_index;
543                                 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s));
544 
545                                 tex->src[2].src_type = nir_tex_src_lod;
546                                 tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
547                                 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
548                                 nir_builder_instr_insert(&b, &tex->instr);
549 
550                                 res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa;
551 			}
552 
553                         if (base_type == nir_type_float) {
554                                 unsigned type_sz =
555                                         nir_alu_type_get_type_size(key->surfaces[i].type);
556                                 res = nir_fmul(&b, res,
557                                                nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz));
558                         }
559                 } else {
560                         nir_tex_instr *tex =
561                                 nir_tex_instr_create(b.shader, ms ? 3 : 1);
562 
563                         tex->dest_type = key->surfaces[i].type;
564                         tex->texture_index = active_count;
565                         tex->is_array = key->surfaces[i].array;
566                         tex->sampler_dim = sampler_dim;
567 
568                         if (ms) {
569                                 tex->op = nir_texop_txf_ms;
570 
571                                 tex->src[0].src_type = nir_tex_src_coord;
572                                 tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
573                                 tex->coord_components = coord_comps;
574 
575                                 tex->src[1].src_type = nir_tex_src_ms_index;
576                                 tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
577 
578                                 tex->src[2].src_type = nir_tex_src_lod;
579                                 tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
580                         } else {
581                                 tex->op = nir_texop_txl;
582 
583                                 tex->src[0].src_type = nir_tex_src_coord;
584                                 tex->src[0].src = nir_src_for_ssa(coord);
585                                 tex->coord_components = coord_comps;
586                         }
587 
588                         nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
589                         nir_builder_instr_insert(&b, &tex->instr);
590                         res = &tex->dest.ssa;
591                 }
592 
593                 assert(res);
594 
595                 if (key->surfaces[i].loc >= FRAG_RESULT_DATA0) {
596                         nir_store_var(&b, out, res, 0xFF);
597                 } else {
598                         unsigned c = key->surfaces[i].loc == FRAG_RESULT_STENCIL ? 1 : 0;
599                         nir_store_var(&b, out, nir_channel(&b, res, c), 0xFF);
600                 }
601                 active_count++;
602         }
603 
604         struct panfrost_compile_inputs inputs = {
605                 .gpu_id = dev->gpu_id,
606                 .is_blit = true,
607                 .no_idvs = true,
608                 .fixed_sysval_ubo = -1,
609         };
610         struct util_dynarray binary;
611 
612         util_dynarray_init(&binary, NULL);
613 
614         shader = rzalloc(dev->blitter.shaders.blit,
615                          struct pan_blit_shader_data);
616 
617         nir_shader_gather_info(b.shader, nir_shader_get_entrypoint(b.shader));
618 
619         for (unsigned i = 0; i < active_count; ++i)
620                 BITSET_SET(b.shader->info.textures_used, i);
621 
622         GENX(pan_shader_compile)(b.shader, &inputs, &binary, &shader->info);
623 
624         /* Blit shaders shouldn't have sysvals */
625         assert(shader->info.sysvals.sysval_count == 0);
626 
627         shader->key = *key;
628         shader->address =
629                 pan_pool_upload_aligned(dev->blitter.shaders.pool,
630                                         binary.data, binary.size,
631                                         PAN_ARCH >= 6 ? 128 : 64);
632 
633         util_dynarray_fini(&binary);
634         ralloc_free(b.shader);
635 
636 #if PAN_ARCH >= 6
637         for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) {
638                 shader->blend_ret_offsets[i] = shader->info.bifrost.blend[i].return_offset;
639                 shader->blend_types[i] = shader->info.bifrost.blend[i].type;
640         }
641 #endif
642 
643         _mesa_hash_table_insert(dev->blitter.shaders.blit, &shader->key, shader);
644 
645 out:
646         pthread_mutex_unlock(&dev->blitter.shaders.lock);
647         return shader;
648 }
649 
650 static struct pan_blit_shader_key
pan_blitter_get_key(struct pan_blitter_views * views)651 pan_blitter_get_key(struct pan_blitter_views *views)
652 {
653         struct pan_blit_shader_key key = { 0 };
654 
655         if (views->src_z) {
656                 assert(views->dst_z);
657                 key.surfaces[0].loc = FRAG_RESULT_DEPTH;
658                 key.surfaces[0].type = nir_type_float32;
659                 key.surfaces[0].src_samples = views->src_z->image->layout.nr_samples;
660                 key.surfaces[0].dst_samples = views->dst_z->image->layout.nr_samples;
661                 key.surfaces[0].dim = views->src_z->dim;
662                 key.surfaces[0].array = views->src_z->first_layer != views->src_z->last_layer;
663         }
664 
665         if (views->src_s) {
666                 assert(views->dst_s);
667                 key.surfaces[1].loc = FRAG_RESULT_STENCIL;
668                 key.surfaces[1].type = nir_type_uint32;
669                 key.surfaces[1].src_samples = views->src_s->image->layout.nr_samples;
670                 key.surfaces[1].dst_samples = views->dst_s->image->layout.nr_samples;
671                 key.surfaces[1].dim = views->src_s->dim;
672                 key.surfaces[1].array = views->src_s->first_layer != views->src_s->last_layer;
673         }
674 
675         for (unsigned i = 0; i < views->rt_count; i++) {
676                 if (!views->src_rts[i])
677                         continue;
678 
679                 assert(views->dst_rts[i]);
680                 key.surfaces[i].loc = FRAG_RESULT_DATA0 + i;
681                 key.surfaces[i].type =
682                         util_format_is_pure_uint(views->src_rts[i]->format) ? nir_type_uint32 :
683                         util_format_is_pure_sint(views->src_rts[i]->format) ? nir_type_int32 :
684                         nir_type_float32;
685                 key.surfaces[i].src_samples = views->src_rts[i]->image->layout.nr_samples;
686                 key.surfaces[i].dst_samples = views->dst_rts[i]->image->layout.nr_samples;
687                 key.surfaces[i].dim = views->src_rts[i]->dim;
688                 key.surfaces[i].array = views->src_rts[i]->first_layer != views->src_rts[i]->last_layer;
689         }
690 
691         return key;
692 }
693 
694 #if PAN_ARCH <= 7
695 static mali_ptr
pan_blitter_get_rsd(struct panfrost_device * dev,struct pan_blitter_views * views)696 pan_blitter_get_rsd(struct panfrost_device *dev,
697                     struct pan_blitter_views *views)
698 {
699         struct pan_blit_rsd_key rsd_key = { 0 };
700 
701         assert(!views->rt_count || (!views->src_z && !views->src_s));
702 
703         struct pan_blit_shader_key blit_key = pan_blitter_get_key(views);
704 
705         if (views->src_z) {
706                 assert(views->dst_z);
707                 rsd_key.z.format = views->dst_z->format;
708                 rsd_key.z.type = blit_key.surfaces[0].type;
709                 rsd_key.z.src_samples = blit_key.surfaces[0].src_samples;
710                 rsd_key.z.dst_samples = blit_key.surfaces[0].dst_samples;
711                 rsd_key.z.dim = blit_key.surfaces[0].dim;
712                 rsd_key.z.array = blit_key.surfaces[0].array;
713         }
714 
715         if (views->src_s) {
716                 assert(views->dst_s);
717                 rsd_key.s.format = views->dst_s->format;
718                 rsd_key.s.type = blit_key.surfaces[1].type;
719                 rsd_key.s.src_samples = blit_key.surfaces[1].src_samples;
720                 rsd_key.s.dst_samples = blit_key.surfaces[1].dst_samples;
721                 rsd_key.s.dim = blit_key.surfaces[1].dim;
722                 rsd_key.s.array = blit_key.surfaces[1].array;
723         }
724 
725         for (unsigned i = 0; i < views->rt_count; i++) {
726                 if (!views->src_rts[i])
727                         continue;
728 
729                 assert(views->dst_rts[i]);
730                 rsd_key.rts[i].format = views->dst_rts[i]->format;
731                 rsd_key.rts[i].type = blit_key.surfaces[i].type;
732                 rsd_key.rts[i].src_samples = blit_key.surfaces[i].src_samples;
733                 rsd_key.rts[i].dst_samples = blit_key.surfaces[i].dst_samples;
734                 rsd_key.rts[i].dim = blit_key.surfaces[i].dim;
735                 rsd_key.rts[i].array = blit_key.surfaces[i].array;
736         }
737 
738         pthread_mutex_lock(&dev->blitter.rsds.lock);
739         struct hash_entry *he =
740                 _mesa_hash_table_search(dev->blitter.rsds.rsds, &rsd_key);
741         struct pan_blit_rsd_data *rsd = he ? he->data : NULL;
742         if (rsd)
743                 goto out;
744 
745         rsd = rzalloc(dev->blitter.rsds.rsds, struct pan_blit_rsd_data);
746         rsd->key = rsd_key;
747 
748         unsigned bd_count = PAN_ARCH >= 5 ? MAX2(views->rt_count, 1) : 0;
749         struct panfrost_ptr rsd_ptr =
750                 pan_pool_alloc_desc_aggregate(dev->blitter.rsds.pool,
751                                               PAN_DESC(RENDERER_STATE),
752                                               PAN_DESC_ARRAY(bd_count, BLEND));
753 
754         mali_ptr blend_shaders[8] = { 0 };
755 
756         const struct pan_blit_shader_data *blit_shader =
757                 pan_blitter_get_blit_shader(dev, &blit_key);
758 
759         pan_blitter_get_blend_shaders(dev, views->rt_count, views->dst_rts,
760                                       blit_shader, blend_shaders);
761 
762         pan_blitter_emit_rsd(dev, blit_shader, views, blend_shaders,
763                              rsd_ptr.cpu);
764         rsd->address = rsd_ptr.gpu;
765         _mesa_hash_table_insert(dev->blitter.rsds.rsds, &rsd->key, rsd);
766 
767 out:
768         pthread_mutex_unlock(&dev->blitter.rsds.lock);
769         return rsd->address;
770 }
771 
772 static mali_ptr
pan_blit_get_rsd(struct panfrost_device * dev,const struct pan_image_view * src_views,const struct pan_image_view * dst_view)773 pan_blit_get_rsd(struct panfrost_device *dev,
774                  const struct pan_image_view *src_views,
775                  const struct pan_image_view *dst_view)
776 {
777         const struct util_format_description *desc =
778                 util_format_description(src_views[0].format);
779 
780         struct pan_blitter_views views = { };
781 
782         if (util_format_has_depth(desc)) {
783                 views.src_z = &src_views[0];
784                 views.dst_z = dst_view;
785         }
786 
787         if (src_views[1].format) {
788                 views.src_s = &src_views[1];
789                 views.dst_s = dst_view;
790         } else if (util_format_has_stencil(desc)) {
791                 views.src_s = &src_views[0];
792                 views.dst_s = dst_view;
793         }
794 
795         if (!views.src_z && !views.src_s) {
796                 views.rt_count = 1;
797                 views.src_rts[0] = src_views;
798                 views.dst_rts[0] = dst_view;
799         }
800 
801         return pan_blitter_get_rsd(dev, &views);
802 }
803 #endif
804 
805 static struct pan_blitter_views
pan_preload_get_views(const struct pan_fb_info * fb,bool zs,struct pan_image_view * patched_s)806 pan_preload_get_views(const struct pan_fb_info *fb, bool zs, struct pan_image_view *patched_s)
807 {
808         struct pan_blitter_views views = { 0 };
809 
810         if (zs) {
811                 if (fb->zs.preload.z)
812                         views.src_z = views.dst_z = fb->zs.view.zs;
813 
814                 if (fb->zs.preload.s) {
815                         const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs;
816                         enum pipe_format fmt = util_format_get_depth_only(view->format);
817 
818                         switch (view->format) {
819                         case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break;
820                         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break;
821                         default: fmt = view->format; break;
822                         }
823 
824                         if (fmt != view->format) {
825                                 *patched_s = *view;
826                                 patched_s->format = fmt;
827                                 views.src_s = views.dst_s = patched_s;
828                         } else {
829                                 views.src_s = views.dst_s = view;
830                         }
831                 }
832         } else {
833                 for (unsigned i = 0; i < fb->rt_count; i++) {
834                         if (fb->rts[i].preload) {
835                                 views.src_rts[i] = fb->rts[i].view;
836                                 views.dst_rts[i] = fb->rts[i].view;
837                         }
838                 }
839 
840                 views.rt_count = fb->rt_count;
841         }
842 
843         return views;
844 }
845 
846 static bool
pan_preload_needed(const struct pan_fb_info * fb,bool zs)847 pan_preload_needed(const struct pan_fb_info *fb, bool zs)
848 {
849         if (zs) {
850                 if (fb->zs.preload.z || fb->zs.preload.s)
851                         return true;
852         } else {
853                 for (unsigned i = 0; i < fb->rt_count; i++) {
854                         if (fb->rts[i].preload)
855                                 return true;
856                 }
857         }
858 
859         return false;
860 }
861 
862 static mali_ptr
pan_blitter_emit_varying(struct pan_pool * pool)863 pan_blitter_emit_varying(struct pan_pool *pool)
864 {
865         struct panfrost_ptr varying = pan_pool_alloc_desc(pool, ATTRIBUTE);
866 
867         pan_pack(varying.cpu, ATTRIBUTE, cfg) {
868                 cfg.buffer_index = 0;
869                 cfg.offset_enable = PAN_ARCH <= 5;
870                 cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw;
871 
872 #if PAN_ARCH >= 9
873                 cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D;
874                 cfg.table = PAN_TABLE_ATTRIBUTE_BUFFER;
875                 cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX;
876                 cfg.stride = 4 * sizeof(float);
877 #endif
878         }
879 
880         return varying.gpu;
881 }
882 
883 static mali_ptr
pan_blitter_emit_varying_buffer(struct pan_pool * pool,mali_ptr coordinates)884 pan_blitter_emit_varying_buffer(struct pan_pool *pool, mali_ptr coordinates)
885 {
886 #if PAN_ARCH >= 9
887         struct panfrost_ptr varying_buffer = pan_pool_alloc_desc(pool, BUFFER);
888 
889         pan_pack(varying_buffer.cpu, BUFFER, cfg) {
890                 cfg.address = coordinates;
891                 cfg.size = 4 * sizeof(float) * 4;
892         }
893 #else
894         /* Bifrost needs an empty desc to mark end of prefetching */
895         bool padding_buffer = PAN_ARCH >= 6;
896 
897         struct panfrost_ptr varying_buffer =
898                 pan_pool_alloc_desc_array(pool, (padding_buffer ? 2 : 1),
899                                           ATTRIBUTE_BUFFER);
900 
901         pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
902                 cfg.pointer = coordinates;
903                 cfg.stride = 4 * sizeof(float);
904                 cfg.size = cfg.stride * 4;
905         }
906 
907         if (padding_buffer) {
908                 pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
909                          ATTRIBUTE_BUFFER, cfg);
910         }
911 #endif
912 
913         return varying_buffer.gpu;
914 }
915 
916 static mali_ptr
pan_blitter_emit_sampler(struct pan_pool * pool,bool nearest_filter)917 pan_blitter_emit_sampler(struct pan_pool *pool,
918                          bool nearest_filter)
919 {
920         struct panfrost_ptr sampler =
921                  pan_pool_alloc_desc(pool, SAMPLER);
922 
923         pan_pack(sampler.cpu, SAMPLER, cfg) {
924                 cfg.seamless_cube_map = false;
925                 cfg.normalized_coordinates = false;
926                 cfg.minify_nearest = nearest_filter;
927                 cfg.magnify_nearest = nearest_filter;
928         }
929 
930         return sampler.gpu;
931 }
932 
933 static mali_ptr
pan_blitter_emit_textures(struct pan_pool * pool,unsigned tex_count,const struct pan_image_view ** views)934 pan_blitter_emit_textures(struct pan_pool *pool,
935                           unsigned tex_count,
936                           const struct pan_image_view **views)
937 {
938 #if PAN_ARCH >= 6
939         struct panfrost_ptr textures =
940                 pan_pool_alloc_desc_array(pool, tex_count, TEXTURE);
941 
942         for (unsigned i = 0; i < tex_count; i++) {
943                 void *texture = textures.cpu + (pan_size(TEXTURE) * i);
944                 size_t payload_size =
945                         GENX(panfrost_estimate_texture_payload_size)(views[i]);
946                 struct panfrost_ptr surfaces =
947                         pan_pool_alloc_aligned(pool, payload_size, 64);
948 
949                 GENX(panfrost_new_texture)(pool->dev, views[i], texture, &surfaces);
950         }
951 
952         return textures.gpu;
953 #else
954         mali_ptr textures[8] = { 0 };
955 
956         for (unsigned i = 0; i < tex_count; i++) {
957                 size_t sz = pan_size(TEXTURE) +
958                             GENX(panfrost_estimate_texture_payload_size)(views[i]);
959                 struct panfrost_ptr texture =
960                         pan_pool_alloc_aligned(pool, sz, pan_alignment(TEXTURE));
961                 struct panfrost_ptr surfaces = {
962                         .cpu = texture.cpu + pan_size(TEXTURE),
963                         .gpu = texture.gpu + pan_size(TEXTURE),
964                 };
965 
966                 GENX(panfrost_new_texture)(pool->dev, views[i], texture.cpu, &surfaces);
967                 textures[i] = texture.gpu;
968         }
969 
970         return pan_pool_upload_aligned(pool, textures,
971                                        tex_count * sizeof(mali_ptr),
972                                        sizeof(mali_ptr));
973 #endif
974 }
975 
976 static mali_ptr
pan_preload_emit_textures(struct pan_pool * pool,const struct pan_fb_info * fb,bool zs,unsigned * tex_count_out)977 pan_preload_emit_textures(struct pan_pool *pool,
978                           const struct pan_fb_info *fb, bool zs,
979                           unsigned *tex_count_out)
980 {
981         const struct pan_image_view *views[8];
982         struct pan_image_view patched_s_view;
983         unsigned tex_count = 0;
984 
985         if (zs) {
986                 if (fb->zs.preload.z)
987                         views[tex_count++] = fb->zs.view.zs;
988 
989                 if (fb->zs.preload.s) {
990                         const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs;
991                         enum pipe_format fmt = util_format_get_depth_only(view->format);
992 
993                         switch (view->format) {
994                         case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break;
995                         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break;
996                         default: fmt = view->format; break;
997                         }
998 
999                         if (fmt != view->format) {
1000                                 patched_s_view = *view;
1001                                 patched_s_view.format = fmt;
1002                                 view = &patched_s_view;
1003                         }
1004                         views[tex_count++] = view;
1005                 }
1006         } else {
1007                 for (unsigned i = 0; i < fb->rt_count; i++) {
1008                         if (fb->rts[i].preload)
1009                                 views[tex_count++] = fb->rts[i].view;
1010                 }
1011 
1012         }
1013 
1014         *tex_count_out = tex_count;
1015 
1016         return pan_blitter_emit_textures(pool, tex_count, views);
1017 }
1018 
1019 #if PAN_ARCH >= 8
1020 /* TODO: cache */
1021 static mali_ptr
pan_blitter_emit_zs(struct pan_pool * pool,bool z,bool s)1022 pan_blitter_emit_zs(struct pan_pool *pool, bool z, bool s)
1023 {
1024         struct panfrost_ptr zsd = pan_pool_alloc_desc(pool, DEPTH_STENCIL);
1025 
1026         pan_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
1027                 cfg.depth_function = MALI_FUNC_ALWAYS;
1028                 cfg.depth_write_enable = z;
1029 
1030                 if (z)
1031                         cfg.depth_source = MALI_DEPTH_SOURCE_SHADER;
1032 
1033                 cfg.stencil_test_enable = s;
1034                 cfg.stencil_from_shader = s;
1035 
1036                 cfg.front_compare_function = MALI_FUNC_ALWAYS;
1037                 cfg.front_stencil_fail = MALI_STENCIL_OP_REPLACE;
1038                 cfg.front_depth_fail = MALI_STENCIL_OP_REPLACE;
1039                 cfg.front_depth_pass = MALI_STENCIL_OP_REPLACE;
1040                 cfg.front_write_mask = 0xFF;
1041                 cfg.front_value_mask = 0xFF;
1042 
1043                 cfg.back_compare_function = MALI_FUNC_ALWAYS;
1044                 cfg.back_stencil_fail = MALI_STENCIL_OP_REPLACE;
1045                 cfg.back_depth_fail = MALI_STENCIL_OP_REPLACE;
1046                 cfg.back_depth_pass = MALI_STENCIL_OP_REPLACE;
1047                 cfg.back_write_mask = 0xFF;
1048                 cfg.back_value_mask = 0xFF;
1049 
1050                 cfg.depth_cull_enable = false;
1051         }
1052 
1053         return zsd.gpu;
1054 }
1055 #else
1056 static mali_ptr
pan_blitter_emit_viewport(struct pan_pool * pool,uint16_t minx,uint16_t miny,uint16_t maxx,uint16_t maxy)1057 pan_blitter_emit_viewport(struct pan_pool *pool,
1058                           uint16_t minx, uint16_t miny,
1059                           uint16_t maxx, uint16_t maxy)
1060 {
1061         struct panfrost_ptr vp = pan_pool_alloc_desc(pool, VIEWPORT);
1062 
1063         pan_pack(vp.cpu, VIEWPORT, cfg) {
1064                 cfg.scissor_minimum_x = minx;
1065                 cfg.scissor_minimum_y = miny;
1066                 cfg.scissor_maximum_x = maxx;
1067                 cfg.scissor_maximum_y = maxy;
1068         }
1069 
1070         return vp.gpu;
1071 }
1072 #endif
1073 
1074 static void
pan_preload_emit_dcd(struct pan_pool * pool,struct pan_fb_info * fb,bool zs,mali_ptr coordinates,mali_ptr tsd,void * out,bool always_write)1075 pan_preload_emit_dcd(struct pan_pool *pool,
1076                      struct pan_fb_info *fb, bool zs,
1077                      mali_ptr coordinates,
1078                      mali_ptr tsd, void *out, bool always_write)
1079 {
1080         unsigned tex_count = 0;
1081         mali_ptr textures = pan_preload_emit_textures(pool, fb, zs, &tex_count);
1082         mali_ptr samplers = pan_blitter_emit_sampler(pool, true);
1083         mali_ptr varyings = pan_blitter_emit_varying(pool);
1084         mali_ptr varying_buffers = pan_blitter_emit_varying_buffer(pool, coordinates);
1085 
1086         /* Tiles updated by blit shaders are still considered clean (separate
1087          * for colour and Z/S), allowing us to suppress unnecessary writeback
1088          */
1089         UNUSED bool clean_fragment_write = !always_write;
1090 
1091         /* Image view used when patching stencil formats for combined
1092          * depth/stencil preloads.
1093          */
1094         struct pan_image_view patched_s;
1095 
1096         struct pan_blitter_views views = pan_preload_get_views(fb, zs, &patched_s);
1097 
1098 #if PAN_ARCH <= 7
1099         pan_pack(out, DRAW, cfg) {
1100                 uint16_t minx = 0, miny = 0, maxx, maxy;
1101 
1102                 if (PAN_ARCH == 4) {
1103                         maxx = fb->width - 1;
1104                         maxy = fb->height - 1;
1105                 } else {
1106                         /* Align on 32x32 tiles */
1107                         minx = fb->extent.minx & ~31;
1108                         miny = fb->extent.miny & ~31;
1109                         maxx = MIN2(ALIGN_POT(fb->extent.maxx + 1, 32), fb->width) - 1;
1110                         maxy = MIN2(ALIGN_POT(fb->extent.maxy + 1, 32), fb->height) - 1;
1111                 }
1112 
1113                 cfg.thread_storage = tsd;
1114                 cfg.state = pan_blitter_get_rsd(pool->dev, &views);
1115 
1116                 cfg.position = coordinates;
1117                 cfg.viewport =
1118                         pan_blitter_emit_viewport(pool, minx, miny, maxx, maxy);
1119 
1120                 cfg.varyings = varyings;
1121                 cfg.varying_buffers = varying_buffers;
1122                 cfg.textures = textures;
1123                 cfg.samplers = samplers;
1124 
1125 #if PAN_ARCH >= 6
1126                 cfg.clean_fragment_write = clean_fragment_write;
1127 #endif
1128         }
1129 #else
1130         struct panfrost_ptr T;
1131         unsigned nr_tables = 12;
1132 
1133         /* Although individual resources need only 16 byte alignment, the
1134          * resource table as a whole must be 64-byte aligned.
1135          */
1136         T = pan_pool_alloc_aligned(pool, nr_tables * pan_size(RESOURCE), 64);
1137         memset(T.cpu, 0, nr_tables * pan_size(RESOURCE));
1138 
1139         panfrost_make_resource_table(T, PAN_TABLE_TEXTURE, textures, tex_count);
1140         panfrost_make_resource_table(T, PAN_TABLE_SAMPLER, samplers, 1);
1141         panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE, varyings, 1);
1142         panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE_BUFFER, varying_buffers, 1);
1143 
1144         struct pan_blit_shader_key key = pan_blitter_get_key(&views);
1145         const struct pan_blit_shader_data *blit_shader =
1146                 pan_blitter_get_blit_shader(pool->dev, &key);
1147 
1148         bool z = fb->zs.preload.z;
1149         bool s = fb->zs.preload.s;
1150         bool ms = pan_blitter_is_ms(&views);
1151 
1152         struct panfrost_ptr spd = pan_pool_alloc_desc(pool, SHADER_PROGRAM);
1153         pan_pack(spd.cpu, SHADER_PROGRAM, cfg) {
1154                 cfg.stage = MALI_SHADER_STAGE_FRAGMENT;
1155                 cfg.primary_shader = true;
1156                 cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;
1157                 cfg.binary = blit_shader->address;
1158                 cfg.preload.r48_r63 = blit_shader->info.preload >> 48;
1159         }
1160 
1161         unsigned bd_count = views.rt_count;
1162         struct panfrost_ptr blend = pan_pool_alloc_desc_array(pool, bd_count, BLEND);
1163         mali_ptr blend_shaders[8] = { 0 };
1164 
1165         if (!zs) {
1166                 pan_blitter_get_blend_shaders(pool->dev, views.rt_count, views.dst_rts,
1167                                               blit_shader, blend_shaders);
1168 
1169                 pan_blitter_emit_blends(pool->dev, blit_shader, &views, blend_shaders,
1170                                         blend.cpu);
1171         }
1172 
1173         pan_pack(out, DRAW, cfg) {
1174                 if (zs) {
1175                         /* ZS_EMIT requires late update/kill */
1176                         cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
1177                         cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
1178                         cfg.blend_count = 0;
1179                 } else {
1180                         /* Skipping ATEST requires forcing Z/S */
1181                         cfg.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
1182                         cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
1183 
1184                         cfg.blend = blend.gpu;
1185                         cfg.blend_count = bd_count;
1186                         cfg.render_target_mask = 0x1;
1187                 }
1188 
1189                 cfg.allow_forward_pixel_to_kill = !zs;
1190                 cfg.allow_forward_pixel_to_be_killed = true;
1191                 cfg.depth_stencil = pan_blitter_emit_zs(pool, z, s);
1192                 cfg.sample_mask = 0xFFFF;
1193                 cfg.multisample_enable = ms;
1194                 cfg.evaluate_per_sample = ms;
1195                 cfg.maximum_z = 1.0;
1196                 cfg.clean_fragment_write = clean_fragment_write;
1197                 cfg.shader.resources = T.gpu | nr_tables;
1198                 cfg.shader.shader = spd.gpu;
1199                 cfg.shader.thread_storage = tsd;
1200         }
1201 #endif
1202 }
1203 
1204 #if PAN_ARCH <= 7
1205 static void *
pan_blit_emit_tiler_job(struct pan_pool * pool,struct pan_scoreboard * scoreboard,mali_ptr tiler,struct panfrost_ptr * job)1206 pan_blit_emit_tiler_job(struct pan_pool *pool,
1207                         struct pan_scoreboard *scoreboard,
1208                         mali_ptr tiler,
1209                         struct panfrost_ptr *job)
1210 {
1211         *job = pan_pool_alloc_desc(pool, TILER_JOB);
1212 
1213         pan_section_pack(job->cpu, TILER_JOB, PRIMITIVE, cfg) {
1214                 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
1215                 cfg.index_count = 4;
1216                 cfg.job_task_split = 6;
1217         }
1218 
1219         pan_section_pack(job->cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
1220                 cfg.constant = 1.0f;
1221         }
1222 
1223         void *invoc = pan_section_ptr(job->cpu, TILER_JOB, INVOCATION);
1224         panfrost_pack_work_groups_compute(invoc, 1, 4, 1, 1, 1, 1, true, false);
1225 
1226 #if PAN_ARCH >= 6
1227         pan_section_pack(job->cpu, TILER_JOB, PADDING, cfg);
1228         pan_section_pack(job->cpu, TILER_JOB, TILER, cfg) {
1229                 cfg.address = tiler;
1230         }
1231 #endif
1232 
1233         panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_TILER,
1234                          false, false, 0, 0, job, false);
1235         return pan_section_ptr(job->cpu, TILER_JOB, DRAW);
1236 }
1237 #endif
1238 
1239 #if PAN_ARCH >= 6
1240 static void
pan_preload_fb_alloc_pre_post_dcds(struct pan_pool * desc_pool,struct pan_fb_info * fb)1241 pan_preload_fb_alloc_pre_post_dcds(struct pan_pool *desc_pool,
1242                                    struct pan_fb_info *fb)
1243 {
1244         if (fb->bifrost.pre_post.dcds.gpu)
1245                 return;
1246 
1247         fb->bifrost.pre_post.dcds =
1248                 pan_pool_alloc_desc_array(desc_pool, 3, DRAW);
1249 }
1250 
1251 static void
pan_preload_emit_pre_frame_dcd(struct pan_pool * desc_pool,struct pan_fb_info * fb,bool zs,mali_ptr coords,mali_ptr tsd)1252 pan_preload_emit_pre_frame_dcd(struct pan_pool *desc_pool,
1253                                struct pan_fb_info *fb, bool zs,
1254                                mali_ptr coords, mali_ptr tsd)
1255 {
1256         unsigned dcd_idx = zs ? 1 : 0;
1257         pan_preload_fb_alloc_pre_post_dcds(desc_pool, fb);
1258         assert(fb->bifrost.pre_post.dcds.cpu);
1259         void *dcd = fb->bifrost.pre_post.dcds.cpu +
1260                     (dcd_idx * pan_size(DRAW));
1261 
1262         /* We only use crc_rt to determine whether to force writes for updating
1263          * the CRCs, so use a conservative tile size (16x16).
1264          */
1265         int crc_rt = GENX(pan_select_crc_rt)(fb, 16 * 16);
1266 
1267         bool always_write = false;
1268 
1269         /* If CRC data is currently invalid and this batch will make it valid,
1270          * write even clean tiles to make sure CRC data is updated. */
1271         if (crc_rt >= 0) {
1272                 bool *valid = fb->rts[crc_rt].crc_valid;
1273                 bool full = !fb->extent.minx && !fb->extent.miny &&
1274                         fb->extent.maxx == (fb->width - 1) &&
1275                         fb->extent.maxy == (fb->height - 1);
1276 
1277                 if (full && !(*valid))
1278                         always_write = true;
1279         }
1280 
1281         pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd, dcd, always_write);
1282         if (zs) {
1283                 enum pipe_format fmt = fb->zs.view.zs ?
1284                                        fb->zs.view.zs->image->layout.format :
1285                                        fb->zs.view.s->image->layout.format;
1286                 bool always = false;
1287 
1288                 /* If we're dealing with a combined ZS resource and only one
1289                  * component is cleared, we need to reload the whole surface
1290                  * because the zs_clean_pixel_write_enable flag is set in that
1291                  * case.
1292                  */
1293                 if (util_format_is_depth_and_stencil(fmt) &&
1294                     fb->zs.clear.z != fb->zs.clear.s)
1295                         always = true;
1296 
1297                 /* We could use INTERSECT on Bifrost v7 too, but
1298                  * EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
1299                  * buffer one or more tiles ahead, making ZS data immediately
1300                  * available for any ZS tests taking place in other shaders.
1301                  * Thing's haven't been benchmarked to determine what's
1302                  * preferable (saving bandwidth vs having ZS preloaded
1303                  * earlier), so let's leave it like that for now.
1304                  */
1305                 fb->bifrost.pre_post.modes[dcd_idx] =
1306                         desc_pool->dev->arch > 6 ?
1307                         MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS :
1308                         always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS :
1309                         MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
1310         } else {
1311                 fb->bifrost.pre_post.modes[dcd_idx] =
1312                         always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS :
1313                         MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
1314         }
1315 }
1316 #else
1317 static struct panfrost_ptr
pan_preload_emit_tiler_job(struct pan_pool * desc_pool,struct pan_scoreboard * scoreboard,struct pan_fb_info * fb,bool zs,mali_ptr coords,mali_ptr tsd)1318 pan_preload_emit_tiler_job(struct pan_pool *desc_pool,
1319                            struct pan_scoreboard *scoreboard,
1320                            struct pan_fb_info *fb, bool zs,
1321                            mali_ptr coords, mali_ptr tsd)
1322 {
1323         struct panfrost_ptr job =
1324                 pan_pool_alloc_desc(desc_pool, TILER_JOB);
1325 
1326         pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd,
1327                              pan_section_ptr(job.cpu, TILER_JOB, DRAW),
1328                              false);
1329 
1330         pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
1331                 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
1332                 cfg.index_count = 4;
1333                 cfg.job_task_split = 6;
1334         }
1335 
1336         pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
1337                 cfg.constant = 1.0f;
1338         }
1339 
1340         void *invoc = pan_section_ptr(job.cpu,
1341                                       TILER_JOB,
1342                                       INVOCATION);
1343         panfrost_pack_work_groups_compute(invoc, 1, 4,
1344                                           1, 1, 1, 1, true, false);
1345 
1346         panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
1347                          false, false, 0, 0, &job, true);
1348         return job;
1349 }
1350 #endif
1351 
1352 static struct panfrost_ptr
pan_preload_fb_part(struct pan_pool * pool,struct pan_scoreboard * scoreboard,struct pan_fb_info * fb,bool zs,mali_ptr coords,mali_ptr tsd,mali_ptr tiler)1353 pan_preload_fb_part(struct pan_pool *pool,
1354                     struct pan_scoreboard *scoreboard,
1355                     struct pan_fb_info *fb, bool zs,
1356                     mali_ptr coords, mali_ptr tsd, mali_ptr tiler)
1357 {
1358         struct panfrost_ptr job = { 0 };
1359 
1360 #if PAN_ARCH >= 6
1361         pan_preload_emit_pre_frame_dcd(pool, fb, zs, coords, tsd);
1362 #else
1363         job = pan_preload_emit_tiler_job(pool, scoreboard, fb, zs, coords, tsd);
1364 #endif
1365         return job;
1366 }
1367 
1368 unsigned
GENX(pan_preload_fb)1369 GENX(pan_preload_fb)(struct pan_pool *pool,
1370                      struct pan_scoreboard *scoreboard,
1371                      struct pan_fb_info *fb,
1372                      mali_ptr tsd, mali_ptr tiler,
1373                      struct panfrost_ptr *jobs)
1374 {
1375         bool preload_zs = pan_preload_needed(fb, true);
1376         bool preload_rts = pan_preload_needed(fb, false);
1377         mali_ptr coords;
1378 
1379         if (!preload_zs && !preload_rts)
1380                 return 0;
1381 
1382         float rect[] = {
1383                 0.0, 0.0, 0.0, 1.0,
1384                 fb->width, 0.0, 0.0, 1.0,
1385                 0.0, fb->height, 0.0, 1.0,
1386                 fb->width, fb->height, 0.0, 1.0,
1387         };
1388 
1389         coords = pan_pool_upload_aligned(pool, rect,
1390                                          sizeof(rect), 64);
1391 
1392         unsigned njobs = 0;
1393         if (preload_zs) {
1394                 struct panfrost_ptr job =
1395                         pan_preload_fb_part(pool, scoreboard, fb, true,
1396                                             coords, tsd, tiler);
1397                 if (jobs && job.cpu)
1398                         jobs[njobs++] = job;
1399         }
1400 
1401         if (preload_rts) {
1402                 struct panfrost_ptr job =
1403                         pan_preload_fb_part(pool, scoreboard, fb, false,
1404                                             coords, tsd, tiler);
1405                 if (jobs && job.cpu)
1406                         jobs[njobs++] = job;
1407         }
1408 
1409         return njobs;
1410 }
1411 
1412 #if PAN_ARCH <= 7
1413 void
GENX(pan_blit_ctx_init)1414 GENX(pan_blit_ctx_init)(struct panfrost_device *dev,
1415                         const struct pan_blit_info *info,
1416                         struct pan_pool *blit_pool,
1417                         struct pan_blit_context *ctx)
1418 {
1419         memset(ctx, 0, sizeof(*ctx));
1420 
1421         struct pan_image_view sviews[2] = {
1422                 {
1423                         .format = info->src.planes[0].format,
1424                         .image = info->src.planes[0].image,
1425                         .dim = info->src.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
1426                                MALI_TEXTURE_DIMENSION_2D : info->src.planes[0].image->layout.dim,
1427                         .first_level = info->src.level,
1428                         .last_level = info->src.level,
1429                         .first_layer = info->src.start.layer,
1430                         .last_layer = info->src.end.layer,
1431                         .swizzle = {
1432                                 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
1433                                 PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
1434                         },
1435                 },
1436         };
1437 
1438         struct pan_image_view dview = {
1439                 .format = info->dst.planes[0].format,
1440                 .image = info->dst.planes[0].image,
1441                 .dim = info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_1D ?
1442                        MALI_TEXTURE_DIMENSION_1D : MALI_TEXTURE_DIMENSION_2D,
1443                 .first_level = info->dst.level,
1444                 .last_level = info->dst.level,
1445                 .first_layer = info->dst.start.layer,
1446                 .last_layer = info->dst.start.layer,
1447                 .swizzle = {
1448                         PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
1449                         PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
1450                 },
1451         };
1452 
1453         ctx->src.start.x = info->src.start.x;
1454         ctx->src.start.y = info->src.start.y;
1455         ctx->src.end.x = info->src.end.x;
1456         ctx->src.end.y = info->src.end.y;
1457         ctx->src.dim = sviews[0].dim;
1458 
1459         if (info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_3D) {
1460                 unsigned max_z = u_minify(info->dst.planes[0].image->layout.depth, info->dst.level) - 1;
1461 
1462                 ctx->z_scale = (float)(info->src.end.z - info->src.start.z) /
1463                                (info->dst.end.z - info->dst.start.z);
1464                 assert(info->dst.start.z != info->dst.end.z);
1465                 if (info->dst.start.z > info->dst.end.z) {
1466                         ctx->dst.cur_layer = info->dst.start.z - 1;
1467                         ctx->dst.last_layer = info->dst.end.z;
1468                 } else {
1469                         ctx->dst.cur_layer = info->dst.start.z;
1470                         ctx->dst.last_layer = info->dst.end.z - 1;
1471                 }
1472                 ctx->dst.cur_layer = MIN2(MAX2(ctx->dst.cur_layer, 0), max_z);
1473                 ctx->dst.last_layer = MIN2(MAX2(ctx->dst.last_layer, 0), max_z);
1474                 ctx->dst.layer_offset = ctx->dst.cur_layer;
1475         } else {
1476                 unsigned max_layer = info->dst.planes[0].image->layout.array_size - 1;
1477                 ctx->dst.layer_offset = info->dst.start.layer;
1478                 ctx->dst.cur_layer = info->dst.start.layer;
1479                 ctx->dst.last_layer = MIN2(info->dst.end.layer, max_layer);
1480                 ctx->z_scale = 1;
1481         }
1482 
1483         if (sviews[0].dim == MALI_TEXTURE_DIMENSION_3D) {
1484                 if (info->src.start.z < info->src.end.z)
1485                         ctx->src.z_offset = info->src.start.z + fabs(ctx->z_scale * 0.5f);
1486                 else
1487                         ctx->src.z_offset = info->src.start.z - fabs(ctx->z_scale * 0.5f);
1488         } else {
1489                 ctx->src.layer_offset = info->src.start.layer;
1490         }
1491 
1492         /* Split depth and stencil */
1493         if (util_format_is_depth_and_stencil(sviews[0].format)) {
1494                 sviews[1] = sviews[0];
1495                 sviews[0].format = util_format_get_depth_only(sviews[0].format);
1496                 sviews[1].format = util_format_stencil_only(sviews[1].format);
1497         } else if (info->src.planes[1].format) {
1498                 sviews[1] = sviews[0];
1499                 sviews[1].format = info->src.planes[1].format;
1500                 sviews[1].image = info->src.planes[1].image;
1501         }
1502 
1503         ctx->rsd = pan_blit_get_rsd(dev, sviews, &dview);
1504 
1505         ASSERTED unsigned nlayers = info->src.end.layer - info->src.start.layer + 1;
1506 
1507         assert(nlayers == (info->dst.end.layer - info->dst.start.layer + 1));
1508 
1509         unsigned dst_w = u_minify(info->dst.planes[0].image->layout.width, info->dst.level);
1510         unsigned dst_h = u_minify(info->dst.planes[0].image->layout.height, info->dst.level);
1511         unsigned maxx = MIN2(MAX2(info->dst.start.x, info->dst.end.x), dst_w - 1);
1512         unsigned maxy = MIN2(MAX2(info->dst.start.y, info->dst.end.y), dst_h - 1);
1513         unsigned minx = MAX2(MIN3(info->dst.start.x, info->dst.end.x, maxx), 0);
1514         unsigned miny = MAX2(MIN3(info->dst.start.y, info->dst.end.y, maxy), 0);
1515 
1516         if (info->scissor.enable) {
1517                 minx = MAX2(minx, info->scissor.minx);
1518                 miny = MAX2(miny, info->scissor.miny);
1519                 maxx = MIN2(maxx, info->scissor.maxx);
1520                 maxy = MIN2(maxy, info->scissor.maxy);
1521         }
1522 
1523         const struct pan_image_view *sview_ptrs[] = { &sviews[0], &sviews[1] };
1524         unsigned nviews = sviews[1].format ? 2 : 1;
1525 
1526         ctx->textures = pan_blitter_emit_textures(blit_pool, nviews, sview_ptrs);
1527         ctx->samplers = pan_blitter_emit_sampler(blit_pool, info->nearest);
1528 
1529         ctx->vpd = pan_blitter_emit_viewport(blit_pool,
1530                                              minx, miny, maxx, maxy);
1531 
1532         float dst_rect[] = {
1533                 info->dst.start.x, info->dst.start.y, 0.0, 1.0,
1534                 info->dst.end.x, info->dst.start.y, 0.0, 1.0,
1535                 info->dst.start.x, info->dst.end.y, 0.0, 1.0,
1536                 info->dst.end.x, info->dst.end.y, 0.0, 1.0,
1537         };
1538 
1539         ctx->position =
1540                 pan_pool_upload_aligned(blit_pool, dst_rect,
1541                                         sizeof(dst_rect), 64);
1542 }
1543 
1544 struct panfrost_ptr
GENX(pan_blit)1545 GENX(pan_blit)(struct pan_blit_context *ctx,
1546                struct pan_pool *pool,
1547                struct pan_scoreboard *scoreboard,
1548                mali_ptr tsd, mali_ptr tiler)
1549 {
1550         if (ctx->dst.cur_layer < 0 ||
1551             (ctx->dst.last_layer >= ctx->dst.layer_offset &&
1552              ctx->dst.cur_layer > ctx->dst.last_layer) ||
1553             (ctx->dst.last_layer < ctx->dst.layer_offset &&
1554              ctx->dst.cur_layer < ctx->dst.last_layer))
1555                 return (struct panfrost_ptr){ 0 };
1556 
1557         int32_t layer = ctx->dst.cur_layer - ctx->dst.layer_offset;
1558         float src_z;
1559         if (ctx->src.dim == MALI_TEXTURE_DIMENSION_3D)
1560                 src_z = (ctx->z_scale * layer) + ctx->src.z_offset;
1561         else
1562                 src_z = ctx->src.layer_offset + layer;
1563 
1564         float src_rect[] = {
1565                 ctx->src.start.x, ctx->src.start.y, src_z, 1.0,
1566                 ctx->src.end.x, ctx->src.start.y, src_z, 1.0,
1567                 ctx->src.start.x, ctx->src.end.y, src_z, 1.0,
1568                 ctx->src.end.x, ctx->src.end.y, src_z, 1.0,
1569         };
1570 
1571         mali_ptr src_coords =
1572                 pan_pool_upload_aligned(pool, src_rect,
1573                                         sizeof(src_rect), 64);
1574 
1575         struct panfrost_ptr job = { 0 };
1576         void *dcd = pan_blit_emit_tiler_job(pool, scoreboard, tiler, &job);
1577 
1578         pan_pack(dcd, DRAW, cfg) {
1579                 cfg.thread_storage = tsd;
1580                 cfg.state = ctx->rsd;
1581 
1582                 cfg.position = ctx->position;
1583                 cfg.varyings = pan_blitter_emit_varying(pool);
1584                 cfg.varying_buffers = pan_blitter_emit_varying_buffer(pool, src_coords);
1585                 cfg.viewport = ctx->vpd;
1586                 cfg.textures = ctx->textures;
1587                 cfg.samplers = ctx->samplers;
1588         }
1589 
1590         return job;
1591 }
1592 #endif
1593 
pan_blit_shader_key_hash(const void * key)1594 static uint32_t pan_blit_shader_key_hash(const void *key)
1595 {
1596         return _mesa_hash_data(key, sizeof(struct pan_blit_shader_key));
1597 }
1598 
pan_blit_shader_key_equal(const void * a,const void * b)1599 static bool pan_blit_shader_key_equal(const void *a, const void *b)
1600 {
1601         return !memcmp(a, b, sizeof(struct pan_blit_shader_key));
1602 }
1603 
pan_blit_blend_shader_key_hash(const void * key)1604 static uint32_t pan_blit_blend_shader_key_hash(const void *key)
1605 {
1606         return _mesa_hash_data(key, sizeof(struct pan_blit_blend_shader_key));
1607 }
1608 
pan_blit_blend_shader_key_equal(const void * a,const void * b)1609 static bool pan_blit_blend_shader_key_equal(const void *a, const void *b)
1610 {
1611         return !memcmp(a, b, sizeof(struct pan_blit_blend_shader_key));
1612 }
1613 
pan_blit_rsd_key_hash(const void * key)1614 static uint32_t pan_blit_rsd_key_hash(const void *key)
1615 {
1616         return _mesa_hash_data(key, sizeof(struct pan_blit_rsd_key));
1617 }
1618 
pan_blit_rsd_key_equal(const void * a,const void * b)1619 static bool pan_blit_rsd_key_equal(const void *a, const void *b)
1620 {
1621         return !memcmp(a, b, sizeof(struct pan_blit_rsd_key));
1622 }
1623 
1624 static void
pan_blitter_prefill_blit_shader_cache(struct panfrost_device * dev)1625 pan_blitter_prefill_blit_shader_cache(struct panfrost_device *dev)
1626 {
1627         static const struct pan_blit_shader_key prefill[] = {
1628                 {
1629                         .surfaces[0] = {
1630                                 .loc = FRAG_RESULT_DEPTH,
1631                                 .type = nir_type_float32,
1632                                 .dim = MALI_TEXTURE_DIMENSION_2D,
1633                                 .src_samples = 1,
1634                                 .dst_samples = 1,
1635                         },
1636                 },
1637                 {
1638                         .surfaces[1] = {
1639                                 .loc = FRAG_RESULT_STENCIL,
1640                                 .type = nir_type_uint32,
1641                                 .dim = MALI_TEXTURE_DIMENSION_2D,
1642                                 .src_samples = 1,
1643                                 .dst_samples = 1,
1644                         },
1645                 },
1646                 {
1647                         .surfaces[0] = {
1648                                 .loc = FRAG_RESULT_DATA0,
1649                                 .type = nir_type_float32,
1650                                 .dim = MALI_TEXTURE_DIMENSION_2D,
1651                                 .src_samples = 1,
1652                                 .dst_samples = 1,
1653                         },
1654                 },
1655         };
1656 
1657         for (unsigned i = 0; i < ARRAY_SIZE(prefill); i++)
1658                 pan_blitter_get_blit_shader(dev, &prefill[i]);
1659 }
1660 
1661 void
GENX(pan_blitter_init)1662 GENX(pan_blitter_init)(struct panfrost_device *dev,
1663                        struct pan_pool *bin_pool,
1664                        struct pan_pool *desc_pool)
1665 {
1666         dev->blitter.shaders.blit =
1667                 _mesa_hash_table_create(NULL, pan_blit_shader_key_hash,
1668                                         pan_blit_shader_key_equal);
1669         dev->blitter.shaders.blend =
1670                 _mesa_hash_table_create(NULL, pan_blit_blend_shader_key_hash,
1671                                         pan_blit_blend_shader_key_equal);
1672         dev->blitter.shaders.pool = bin_pool;
1673         pthread_mutex_init(&dev->blitter.shaders.lock, NULL);
1674         pan_blitter_prefill_blit_shader_cache(dev);
1675 
1676         dev->blitter.rsds.pool = desc_pool;
1677         dev->blitter.rsds.rsds =
1678                 _mesa_hash_table_create(NULL, pan_blit_rsd_key_hash,
1679                                         pan_blit_rsd_key_equal);
1680         pthread_mutex_init(&dev->blitter.rsds.lock, NULL);
1681 }
1682 
1683 void
GENX(pan_blitter_cleanup)1684 GENX(pan_blitter_cleanup)(struct panfrost_device *dev)
1685 {
1686         _mesa_hash_table_destroy(dev->blitter.shaders.blit, NULL);
1687         _mesa_hash_table_destroy(dev->blitter.shaders.blend, NULL);
1688         pthread_mutex_destroy(&dev->blitter.shaders.lock);
1689         _mesa_hash_table_destroy(dev->blitter.rsds.rsds, NULL);
1690         pthread_mutex_destroy(&dev->blitter.rsds.lock);
1691 }
1692