• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 Alyssa Rosenzweig
3  * Copyright (C) 2019-2021 Collabora, Ltd.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "pan_blend.h"
26 #include "util/blend.h"
27 
28 #ifdef PAN_ARCH
29 #include "pan_shader.h"
30 #endif
31 
32 #include "compiler/nir/nir.h"
33 #include "compiler/nir/nir_builder.h"
34 #include "compiler/nir/nir_conversion_builder.h"
35 #include "compiler/nir/nir_lower_blend.h"
36 #include "panfrost/util/pan_lower_framebuffer.h"
37 #include "util/format/u_format.h"
38 #include "pan_texture.h"
39 
40 #ifndef PAN_ARCH
41 
42 /* Fixed function blending */
43 
44 static bool
factor_is_supported(enum pipe_blendfactor factor)45 factor_is_supported(enum pipe_blendfactor factor)
46 {
47    factor = util_blendfactor_without_invert(factor);
48 
49    return factor != PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
50           factor != PIPE_BLENDFACTOR_SRC1_COLOR &&
51           factor != PIPE_BLENDFACTOR_SRC1_ALPHA;
52 }
53 
54 /* OpenGL allows encoding (src*dest + dest*src) which is incompatiblle with
55  * Midgard style blending since there are two multiplies. However, it may be
56  * factored as 2*src*dest = dest*(2*src), which can be encoded on Bifrost as 0
57  * + dest * (2*src) wih the new source_2 value of C. Detect this case. */
58 
59 static bool
is_2srcdest(enum pipe_blend_func blend_func,enum pipe_blendfactor src_factor,enum pipe_blendfactor dest_factor,bool is_alpha)60 is_2srcdest(enum pipe_blend_func blend_func, enum pipe_blendfactor src_factor,
61             enum pipe_blendfactor dest_factor, bool is_alpha)
62 {
63    return (blend_func == PIPE_BLEND_ADD) &&
64           ((src_factor == PIPE_BLENDFACTOR_DST_COLOR) ||
65            ((src_factor == PIPE_BLENDFACTOR_DST_ALPHA) && is_alpha)) &&
66           ((dest_factor == PIPE_BLENDFACTOR_SRC_COLOR) ||
67            ((dest_factor == PIPE_BLENDFACTOR_SRC_ALPHA) && is_alpha));
68 }
69 
70 static bool
can_fixed_function_equation(enum pipe_blend_func blend_func,enum pipe_blendfactor src_factor,enum pipe_blendfactor dest_factor,bool is_alpha,bool supports_2src)71 can_fixed_function_equation(enum pipe_blend_func blend_func,
72                             enum pipe_blendfactor src_factor,
73                             enum pipe_blendfactor dest_factor, bool is_alpha,
74                             bool supports_2src)
75 {
76    if (is_2srcdest(blend_func, src_factor, dest_factor, is_alpha))
77       return supports_2src;
78 
79    if (blend_func != PIPE_BLEND_ADD && blend_func != PIPE_BLEND_SUBTRACT &&
80        blend_func != PIPE_BLEND_REVERSE_SUBTRACT)
81       return false;
82 
83    if (!factor_is_supported(src_factor) || !factor_is_supported(dest_factor))
84       return false;
85 
86    /* Fixed function requires src/dest factors to match (up to invert) or be
87     * zero/one.
88     */
89    enum pipe_blendfactor src = util_blendfactor_without_invert(src_factor);
90    enum pipe_blendfactor dest = util_blendfactor_without_invert(dest_factor);
91 
92    return (src == dest) || (src == PIPE_BLENDFACTOR_ONE) ||
93           (dest == PIPE_BLENDFACTOR_ONE);
94 }
95 
96 static unsigned
blend_factor_constant_mask(enum pipe_blendfactor factor)97 blend_factor_constant_mask(enum pipe_blendfactor factor)
98 {
99    factor = util_blendfactor_without_invert(factor);
100 
101    if (factor == PIPE_BLENDFACTOR_CONST_COLOR)
102       return 0b0111; /* RGB */
103    else if (factor == PIPE_BLENDFACTOR_CONST_ALPHA)
104       return 0b1000; /* A */
105    else
106       return 0b0000; /* - */
107 }
108 
109 unsigned
pan_blend_constant_mask(const struct pan_blend_equation eq)110 pan_blend_constant_mask(const struct pan_blend_equation eq)
111 {
112    return blend_factor_constant_mask(eq.rgb_src_factor) |
113           blend_factor_constant_mask(eq.rgb_dst_factor) |
114           blend_factor_constant_mask(eq.alpha_src_factor) |
115           blend_factor_constant_mask(eq.alpha_dst_factor);
116 }
117 
118 /* Only "homogenous" (scalar or vector with all components equal) constants are
119  * valid for fixed-function, so check for this condition */
120 
121 bool
pan_blend_is_homogenous_constant(unsigned mask,const float * constants)122 pan_blend_is_homogenous_constant(unsigned mask, const float *constants)
123 {
124    float constant = pan_blend_get_constant(mask, constants);
125 
126    u_foreach_bit(i, mask) {
127       if (constants[i] != constant)
128          return false;
129    }
130 
131    return true;
132 }
133 
134 /* Determines if an equation can run in fixed function */
135 
136 bool
pan_blend_can_fixed_function(const struct pan_blend_equation equation,bool supports_2src)137 pan_blend_can_fixed_function(const struct pan_blend_equation equation,
138                              bool supports_2src)
139 {
140    return !equation.blend_enable ||
141           (can_fixed_function_equation(
142               equation.rgb_func, equation.rgb_src_factor,
143               equation.rgb_dst_factor, false, supports_2src) &&
144            can_fixed_function_equation(
145               equation.alpha_func, equation.alpha_src_factor,
146               equation.alpha_dst_factor, true, supports_2src));
147 }
148 
149 static enum mali_blend_operand_c
to_c_factor(enum pipe_blendfactor factor)150 to_c_factor(enum pipe_blendfactor factor)
151 {
152    switch (util_blendfactor_without_invert(factor)) {
153    case PIPE_BLENDFACTOR_ONE:
154       /* Extra invert to flip back in caller */
155       return MALI_BLEND_OPERAND_C_ZERO;
156 
157    case PIPE_BLENDFACTOR_SRC_ALPHA:
158       return MALI_BLEND_OPERAND_C_SRC_ALPHA;
159 
160    case PIPE_BLENDFACTOR_DST_ALPHA:
161       return MALI_BLEND_OPERAND_C_DEST_ALPHA;
162 
163    case PIPE_BLENDFACTOR_SRC_COLOR:
164       return MALI_BLEND_OPERAND_C_SRC;
165 
166    case PIPE_BLENDFACTOR_DST_COLOR:
167       return MALI_BLEND_OPERAND_C_DEST;
168 
169    case PIPE_BLENDFACTOR_CONST_COLOR:
170    case PIPE_BLENDFACTOR_CONST_ALPHA:
171       return MALI_BLEND_OPERAND_C_CONSTANT;
172 
173    default:
174       unreachable("Unsupported blend factor");
175    }
176 }
177 
178 static void
to_panfrost_function(enum pipe_blend_func blend_func,enum pipe_blendfactor src_factor,enum pipe_blendfactor dest_factor,bool is_alpha,struct MALI_BLEND_FUNCTION * function)179 to_panfrost_function(enum pipe_blend_func blend_func,
180                      enum pipe_blendfactor src_factor,
181                      enum pipe_blendfactor dest_factor, bool is_alpha,
182                      struct MALI_BLEND_FUNCTION *function)
183 {
184    assert(can_fixed_function_equation(blend_func, src_factor, dest_factor,
185                                       is_alpha, true));
186 
187    /* We handle ZERO/ONE specially since it's the hardware has 0 and can invert
188     * to 1 but Gallium has 0 as the uninverted version.
189     */
190    bool src_inverted =
191       util_blendfactor_is_inverted(src_factor) ^
192       (util_blendfactor_without_invert(src_factor) == PIPE_BLENDFACTOR_ONE);
193 
194    bool dest_inverted =
195       util_blendfactor_is_inverted(dest_factor) ^
196       (util_blendfactor_without_invert(dest_factor) == PIPE_BLENDFACTOR_ONE);
197 
198    if (src_factor == PIPE_BLENDFACTOR_ZERO) {
199       function->a = MALI_BLEND_OPERAND_A_ZERO;
200       function->b = MALI_BLEND_OPERAND_B_DEST;
201       if (blend_func == PIPE_BLEND_SUBTRACT)
202          function->negate_b = true;
203       function->invert_c = dest_inverted;
204       function->c = to_c_factor(dest_factor);
205    } else if (src_factor == PIPE_BLENDFACTOR_ONE) {
206       function->a = MALI_BLEND_OPERAND_A_SRC;
207       function->b = MALI_BLEND_OPERAND_B_DEST;
208       if (blend_func == PIPE_BLEND_SUBTRACT)
209          function->negate_b = true;
210       else if (blend_func == PIPE_BLEND_REVERSE_SUBTRACT)
211          function->negate_a = true;
212       function->invert_c = dest_inverted;
213       function->c = to_c_factor(dest_factor);
214    } else if (dest_factor == PIPE_BLENDFACTOR_ZERO) {
215       function->a = MALI_BLEND_OPERAND_A_ZERO;
216       function->b = MALI_BLEND_OPERAND_B_SRC;
217       if (blend_func == PIPE_BLEND_REVERSE_SUBTRACT)
218          function->negate_b = true;
219       function->invert_c = src_inverted;
220       function->c = to_c_factor(src_factor);
221    } else if (dest_factor == PIPE_BLENDFACTOR_ONE) {
222       function->a = MALI_BLEND_OPERAND_A_DEST;
223       function->b = MALI_BLEND_OPERAND_B_SRC;
224       if (blend_func == PIPE_BLEND_SUBTRACT)
225          function->negate_a = true;
226       else if (blend_func == PIPE_BLEND_REVERSE_SUBTRACT)
227          function->negate_b = true;
228       function->invert_c = src_inverted;
229       function->c = to_c_factor(src_factor);
230    } else if (src_factor == dest_factor) {
231       function->a = MALI_BLEND_OPERAND_A_ZERO;
232       function->invert_c = src_inverted;
233       function->c = to_c_factor(src_factor);
234 
235       switch (blend_func) {
236       case PIPE_BLEND_ADD:
237          function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST;
238          break;
239       case PIPE_BLEND_REVERSE_SUBTRACT:
240          function->negate_b = true;
241          FALLTHROUGH;
242       case PIPE_BLEND_SUBTRACT:
243          function->b = MALI_BLEND_OPERAND_B_SRC_MINUS_DEST;
244          break;
245       default:
246          unreachable("Invalid blend function");
247       }
248    } else if (is_2srcdest(blend_func, src_factor, dest_factor, is_alpha)) {
249       /* src*dest + dest*src = 2*src*dest = 0 + dest*(2*src) */
250       function->a = MALI_BLEND_OPERAND_A_ZERO;
251       function->b = MALI_BLEND_OPERAND_B_DEST;
252       function->c = MALI_BLEND_OPERAND_C_SRC_X_2;
253    } else {
254       assert(util_blendfactor_without_invert(src_factor) ==
255                 util_blendfactor_without_invert(dest_factor) &&
256              src_inverted != dest_inverted);
257 
258       function->a = MALI_BLEND_OPERAND_A_DEST;
259       function->invert_c = src_inverted;
260       function->c = to_c_factor(src_factor);
261 
262       switch (blend_func) {
263       case PIPE_BLEND_ADD:
264          function->b = MALI_BLEND_OPERAND_B_SRC_MINUS_DEST;
265          break;
266       case PIPE_BLEND_REVERSE_SUBTRACT:
267          function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST;
268          function->negate_b = true;
269          break;
270       case PIPE_BLEND_SUBTRACT:
271          function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST;
272          function->negate_a = true;
273          break;
274       default:
275          unreachable("Invalid blend function\n");
276       }
277    }
278 }
279 
280 bool
pan_blend_is_opaque(const struct pan_blend_equation equation)281 pan_blend_is_opaque(const struct pan_blend_equation equation)
282 {
283    /* If a channel is masked out, we can't use opaque mode even if
284     * blending is disabled, since we need a tilebuffer read in there */
285    if (equation.color_mask != 0xF)
286       return false;
287 
288    /* With nothing masked out, disabled bledning is opaque */
289    if (!equation.blend_enable)
290       return true;
291 
292    /* Also detect open-coded opaque blending */
293    return equation.rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
294           equation.rgb_dst_factor == PIPE_BLENDFACTOR_ZERO &&
295           (equation.rgb_func == PIPE_BLEND_ADD ||
296            equation.rgb_func == PIPE_BLEND_SUBTRACT) &&
297           equation.alpha_src_factor == PIPE_BLENDFACTOR_ONE &&
298           equation.alpha_dst_factor == PIPE_BLENDFACTOR_ZERO &&
299           (equation.alpha_func == PIPE_BLEND_ADD ||
300            equation.alpha_func == PIPE_BLEND_SUBTRACT);
301 }
302 
303 /* Check if a factor represents a constant value of val, assuming src_alpha is
304  * the given constant.
305  */
306 
307 static inline bool
is_factor_01(enum pipe_blendfactor factor,unsigned val,unsigned srca)308 is_factor_01(enum pipe_blendfactor factor, unsigned val, unsigned srca)
309 {
310    assert(val == 0 || val == 1);
311    assert(srca == 0 || srca == 1);
312 
313    switch (factor) {
314    case PIPE_BLENDFACTOR_ZERO:
315       return (val == 0);
316 
317    case PIPE_BLENDFACTOR_ONE:
318       return (val == 1);
319 
320    case PIPE_BLENDFACTOR_SRC_ALPHA:
321       return (val == srca);
322 
323    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
324       return (val == (1 - srca));
325 
326    default:
327       return false;
328    }
329 }
330 
331 /* Returns if src alpha = 0 implies the blended colour equals the destination
332  * colour. Suppose source alpha = 0 and consider cases.
333  *
334  * Additive blending: Equivalent to D = S * f_s + D * f_d for all D and all S
335  * with S_a = 0, for each component. For the alpha component (if it unmasked),
336  * we have S_a = 0 so this reduces to D = D * f_d <===> f_d = 1. For RGB
337  * components (if unmasked), we need f_s = 0 and f_d = 1.
338  *
339  * Subtractive blending: Fails in general (D = S * f_S - D * f_D). We
340  * would need f_S = 0 and f_D = -1, which is not valid in the APIs.
341  *
342  * Reverse subtractive blending (D = D * f_D - S * f_S), we need f_D = 1
343  * and f_S = 0 up to masking. This is the same as additive blending.
344  *
345  * Min/max: Fails in general on the RGB components.
346  */
347 
348 bool
pan_blend_alpha_zero_nop(const struct pan_blend_equation eq)349 pan_blend_alpha_zero_nop(const struct pan_blend_equation eq)
350 {
351    if (eq.rgb_func != PIPE_BLEND_ADD &&
352        eq.rgb_func != PIPE_BLEND_REVERSE_SUBTRACT)
353       return false;
354 
355    if (eq.color_mask & 0x8) {
356       if (!is_factor_01(eq.alpha_dst_factor, 1, 0))
357          return false;
358    }
359 
360    if (eq.color_mask & 0x7) {
361       if (!is_factor_01(eq.rgb_dst_factor, 1, 0))
362          return false;
363 
364       if (!is_factor_01(eq.rgb_src_factor, 0, 0))
365          return false;
366    }
367 
368    return true;
369 }
370 
371 /* Returns if src alpha = 1 implies the blended colour equals the source
372  * colour. Suppose source alpha = 1 and consider cases.
373  *
374  * Additive blending: S = S * f_s + D * f_d. We need f_s = 1 and f_d = 0.
375  *
376  * Subtractive blending: S = S * f_s - D * f_d. Same as additive blending.
377  *
378  * Reverse subtractive blending: S = D * f_d - S * f_s. Fails in general since
379  * it would require f_s = -1, which is not valid in the APIs.
380  *
381  * Min/max: Fails in general on the RGB components.
382  *
383  * Note if any component is masked, we can't use a store.
384  */
385 
386 bool
pan_blend_alpha_one_store(const struct pan_blend_equation eq)387 pan_blend_alpha_one_store(const struct pan_blend_equation eq)
388 {
389    if (eq.rgb_func != PIPE_BLEND_ADD && eq.rgb_func != PIPE_BLEND_SUBTRACT)
390       return false;
391 
392    if (eq.color_mask != 0xf)
393       return false;
394 
395    return is_factor_01(eq.rgb_src_factor, 1, 1) &&
396           is_factor_01(eq.alpha_src_factor, 1, 1) &&
397           is_factor_01(eq.rgb_dst_factor, 0, 1) &&
398           is_factor_01(eq.alpha_dst_factor, 0, 1);
399 }
400 
401 static bool
is_dest_factor(enum pipe_blendfactor factor,bool alpha)402 is_dest_factor(enum pipe_blendfactor factor, bool alpha)
403 {
404    factor = util_blendfactor_without_invert(factor);
405 
406    return factor == PIPE_BLENDFACTOR_DST_ALPHA ||
407           factor == PIPE_BLENDFACTOR_DST_COLOR ||
408           (factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && !alpha);
409 }
410 
411 /* Determines if a blend equation reads back the destination. This can occur by
412  * explicitly referencing the destination in the blend equation, or by using a
413  * partial writemask. */
414 
415 bool
pan_blend_reads_dest(const struct pan_blend_equation equation)416 pan_blend_reads_dest(const struct pan_blend_equation equation)
417 {
418    if (equation.color_mask && equation.color_mask != 0xF)
419       return true;
420 
421    if (!equation.blend_enable)
422       return false;
423 
424    return is_dest_factor(equation.rgb_src_factor, false) ||
425           is_dest_factor(equation.alpha_src_factor, true) ||
426           equation.rgb_dst_factor != PIPE_BLENDFACTOR_ZERO ||
427           equation.alpha_dst_factor != PIPE_BLENDFACTOR_ZERO;
428 }
429 
430 /* Create the descriptor for a fixed blend mode given the corresponding API
431  * state. Assumes the equation can be represented as fixed-function. */
432 
433 void
pan_blend_to_fixed_function_equation(const struct pan_blend_equation equation,struct MALI_BLEND_EQUATION * out)434 pan_blend_to_fixed_function_equation(const struct pan_blend_equation equation,
435                                      struct MALI_BLEND_EQUATION *out)
436 {
437    /* If no blending is enabled, default back on `replace` mode */
438    if (!equation.blend_enable) {
439       out->color_mask = equation.color_mask;
440       out->rgb.a = MALI_BLEND_OPERAND_A_SRC;
441       out->rgb.b = MALI_BLEND_OPERAND_B_SRC;
442       out->rgb.c = MALI_BLEND_OPERAND_C_ZERO;
443       out->alpha.a = MALI_BLEND_OPERAND_A_SRC;
444       out->alpha.b = MALI_BLEND_OPERAND_B_SRC;
445       out->alpha.c = MALI_BLEND_OPERAND_C_ZERO;
446       return;
447    }
448 
449    /* Compile the fixed-function blend */
450    to_panfrost_function(equation.rgb_func, equation.rgb_src_factor,
451                         equation.rgb_dst_factor, false, &out->rgb);
452    to_panfrost_function(equation.alpha_func, equation.alpha_src_factor,
453                         equation.alpha_dst_factor, true, &out->alpha);
454 
455    out->color_mask = equation.color_mask;
456 }
457 
458 uint32_t
pan_pack_blend(const struct pan_blend_equation equation)459 pan_pack_blend(const struct pan_blend_equation equation)
460 {
461    struct mali_blend_equation_packed out;
462 
463    pan_pack(&out, BLEND_EQUATION, cfg) {
464       pan_blend_to_fixed_function_equation(equation, &cfg);
465    }
466 
467    return out.opaque[0];
468 }
469 
470 DERIVE_HASH_TABLE(pan_blend_shader_key);
471 
472 void
pan_blend_shader_cache_init(struct pan_blend_shader_cache * cache,unsigned gpu_id)473 pan_blend_shader_cache_init(struct pan_blend_shader_cache *cache,
474                             unsigned gpu_id)
475 {
476    cache->gpu_id = gpu_id;
477    cache->shaders = pan_blend_shader_key_table_create(NULL);
478    pthread_mutex_init(&cache->lock, NULL);
479 }
480 
481 void
pan_blend_shader_cache_cleanup(struct pan_blend_shader_cache * cache)482 pan_blend_shader_cache_cleanup(struct pan_blend_shader_cache *cache)
483 {
484    _mesa_hash_table_destroy(cache->shaders, NULL);
485    pthread_mutex_destroy(&cache->lock);
486 }
487 
488 #else /* ifndef PAN_ARCH */
489 
490 static const char *
logicop_str(enum pipe_logicop logicop)491 logicop_str(enum pipe_logicop logicop)
492 {
493    switch (logicop) {
494    case PIPE_LOGICOP_CLEAR:
495       return "clear";
496    case PIPE_LOGICOP_NOR:
497       return "nor";
498    case PIPE_LOGICOP_AND_INVERTED:
499       return "and-inverted";
500    case PIPE_LOGICOP_COPY_INVERTED:
501       return "copy-inverted";
502    case PIPE_LOGICOP_AND_REVERSE:
503       return "and-reverse";
504    case PIPE_LOGICOP_INVERT:
505       return "invert";
506    case PIPE_LOGICOP_XOR:
507       return "xor";
508    case PIPE_LOGICOP_NAND:
509       return "nand";
510    case PIPE_LOGICOP_AND:
511       return "and";
512    case PIPE_LOGICOP_EQUIV:
513       return "equiv";
514    case PIPE_LOGICOP_NOOP:
515       return "noop";
516    case PIPE_LOGICOP_OR_INVERTED:
517       return "or-inverted";
518    case PIPE_LOGICOP_COPY:
519       return "copy";
520    case PIPE_LOGICOP_OR_REVERSE:
521       return "or-reverse";
522    case PIPE_LOGICOP_OR:
523       return "or";
524    case PIPE_LOGICOP_SET:
525       return "set";
526    default:
527       unreachable("Invalid logicop\n");
528    }
529 }
530 
531 static void
get_equation_str(const struct pan_blend_rt_state * rt_state,char * str,unsigned len)532 get_equation_str(const struct pan_blend_rt_state *rt_state, char *str,
533                  unsigned len)
534 {
535    const char *funcs[] = {
536       "add", "sub", "reverse_sub", "min", "max",
537    };
538    const char *factors[] = {
539       "",           "one",           "src_color",   "src_alpha",   "dst_alpha",
540       "dst_color",  "src_alpha_sat", "const_color", "const_alpha", "src1_color",
541       "src1_alpha",
542    };
543    int ret;
544 
545    if (!rt_state->equation.blend_enable) {
546       ret = snprintf(str, len, "replace(%s%s%s%s)",
547                      (rt_state->equation.color_mask & 1) ? "R" : "",
548                      (rt_state->equation.color_mask & 2) ? "G" : "",
549                      (rt_state->equation.color_mask & 4) ? "B" : "",
550                      (rt_state->equation.color_mask & 8) ? "A" : "");
551       assert(ret > 0);
552       return;
553    }
554 
555    if (rt_state->equation.color_mask & 7) {
556       assert(rt_state->equation.rgb_func < ARRAY_SIZE(funcs));
557       ret = snprintf(
558          str, len, "%s%s%s(func=%s,src_factor=%s%s,dst_factor=%s%s)%s",
559          (rt_state->equation.color_mask & 1) ? "R" : "",
560          (rt_state->equation.color_mask & 2) ? "G" : "",
561          (rt_state->equation.color_mask & 4) ? "B" : "",
562          funcs[rt_state->equation.rgb_func],
563          util_blendfactor_is_inverted(rt_state->equation.rgb_src_factor) ? "-"
564                                                                          : "",
565          factors[util_blendfactor_without_invert(
566             rt_state->equation.rgb_src_factor)],
567          util_blendfactor_is_inverted(rt_state->equation.rgb_dst_factor) ? "-"
568                                                                          : "",
569          factors[util_blendfactor_without_invert(
570             rt_state->equation.rgb_dst_factor)],
571          rt_state->equation.color_mask & 8 ? ";" : "");
572       assert(ret > 0);
573       str += ret;
574       len -= ret;
575    }
576 
577    if (rt_state->equation.color_mask & 8) {
578       assert(rt_state->equation.alpha_func < ARRAY_SIZE(funcs));
579       ret = snprintf(
580          str, len, "A(func=%s,src_factor=%s%s,dst_factor=%s%s)",
581          funcs[rt_state->equation.alpha_func],
582          util_blendfactor_is_inverted(rt_state->equation.alpha_src_factor) ? "-"
583                                                                            : "",
584          factors[util_blendfactor_without_invert(
585             rt_state->equation.alpha_src_factor)],
586          util_blendfactor_is_inverted(rt_state->equation.alpha_dst_factor) ? "-"
587                                                                            : "",
588          factors[util_blendfactor_without_invert(
589             rt_state->equation.alpha_dst_factor)]);
590       assert(ret > 0);
591       str += ret;
592       len -= ret;
593    }
594 }
595 
596 static bool
pan_inline_blend_constants(nir_builder * b,nir_intrinsic_instr * intr,void * data)597 pan_inline_blend_constants(nir_builder *b, nir_intrinsic_instr *intr,
598                            void *data)
599 {
600    if (intr->intrinsic != nir_intrinsic_load_blend_const_color_rgba)
601       return false;
602 
603    float *floats = data;
604    const nir_const_value constants[4] = {
605       nir_const_value_for_float(floats[0], 32),
606       nir_const_value_for_float(floats[1], 32),
607       nir_const_value_for_float(floats[2], 32),
608       nir_const_value_for_float(floats[3], 32)};
609 
610    b->cursor = nir_after_instr(&intr->instr);
611    nir_def *constant = nir_build_imm(b, 4, 32, constants);
612    nir_def_replace(&intr->def, constant);
613    return true;
614 }
615 
616 nir_shader *
GENX(pan_blend_create_shader)617 GENX(pan_blend_create_shader)(const struct pan_blend_state *state,
618                               nir_alu_type src0_type, nir_alu_type src1_type,
619                               unsigned rt)
620 {
621    const struct pan_blend_rt_state *rt_state = &state->rts[rt];
622    char equation_str[128] = {0};
623 
624    get_equation_str(rt_state, equation_str, sizeof(equation_str));
625 
626    nir_builder b = nir_builder_init_simple_shader(
627       MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(),
628       "pan_blend(rt=%d,fmt=%s,nr_samples=%d,%s=%s)", rt,
629       util_format_name(rt_state->format), rt_state->nr_samples,
630       state->logicop_enable ? "logicop" : "equation",
631       state->logicop_enable ? logicop_str(state->logicop_func) : equation_str);
632 
633    const struct util_format_description *format_desc =
634       util_format_description(rt_state->format);
635    nir_alu_type nir_type = pan_unpacked_type_for_format(format_desc);
636 
637    /* Bifrost/Valhall support 16-bit and 32-bit register formats for
638     * LD_TILE/ST_TILE/BLEND, but do not support 8-bit. Rather than making
639     * the fragment output 8-bit and inserting extra conversions in the
640     * compiler, promote the output to 16-bit. The larger size is still
641     * compatible with correct conversion semantics.
642     */
643    if (PAN_ARCH >= 6 && nir_alu_type_get_type_size(nir_type) == 8)
644       nir_type = nir_alu_type_get_base_type(nir_type) | 16;
645 
646    nir_lower_blend_options options = {
647       .logicop_enable = state->logicop_enable,
648       .logicop_func = state->logicop_func,
649    };
650 
651    options.rt[rt].colormask = rt_state->equation.color_mask;
652    options.format[rt] = rt_state->format;
653 
654    if (!rt_state->equation.blend_enable) {
655       static const nir_lower_blend_channel replace = {
656          .func = PIPE_BLEND_ADD,
657          .src_factor = PIPE_BLENDFACTOR_ONE,
658          .dst_factor = PIPE_BLENDFACTOR_ZERO,
659       };
660 
661       options.rt[rt].rgb = replace;
662       options.rt[rt].alpha = replace;
663    } else {
664       options.rt[rt].rgb.func = rt_state->equation.rgb_func;
665       options.rt[rt].rgb.src_factor = rt_state->equation.rgb_src_factor;
666       options.rt[rt].rgb.dst_factor = rt_state->equation.rgb_dst_factor;
667       options.rt[rt].alpha.func = rt_state->equation.alpha_func;
668       options.rt[rt].alpha.src_factor = rt_state->equation.alpha_src_factor;
669       options.rt[rt].alpha.dst_factor = rt_state->equation.alpha_dst_factor;
670    }
671 
672    nir_def *pixel = nir_load_barycentric_pixel(&b, 32, .interp_mode = 1);
673    nir_def *zero = nir_imm_int(&b, 0);
674 
675    for (unsigned i = 0; i < 2; ++i) {
676       nir_alu_type src_type =
677          (i == 1 ? src1_type : src0_type) ?: nir_type_float32;
678 
679       /* HACK: workaround buggy TGSI shaders (u_blitter) */
680       src_type = nir_alu_type_get_base_type(nir_type) |
681                  nir_alu_type_get_type_size(src_type);
682 
683       nir_def *src = nir_load_interpolated_input(
684          &b, 4, nir_alu_type_get_type_size(src_type), pixel, zero,
685          .io_semantics.location = i ? VARYING_SLOT_VAR0 : VARYING_SLOT_COL0,
686          .io_semantics.num_slots = 1, .base = i, .dest_type = src_type);
687 
688       if (state->alpha_to_one && src_type == nir_type_float32) {
689          /* force alpha to 1 */
690          src = nir_vector_insert_imm(&b, src,
691                                      nir_imm_floatN_t(&b, 1.0, src->bit_size),
692                                      3);
693       }
694 
695       /* On Midgard, the blend shader is responsible for format conversion.
696        * As the OpenGL spec requires integer conversions to saturate, we must
697        * saturate ourselves here. On Bifrost and later, the conversion
698        * hardware handles this automatically.
699        */
700       nir_alu_type T = nir_alu_type_get_base_type(nir_type);
701       bool should_saturate = (PAN_ARCH <= 5) && (T != nir_type_float);
702       src = nir_convert_with_rounding(&b, src, T, nir_type,
703                                       nir_rounding_mode_undef, should_saturate);
704 
705       nir_store_output(&b, src, zero, .write_mask = BITFIELD_MASK(4),
706                        .src_type = nir_type,
707                        .io_semantics.location = FRAG_RESULT_DATA0 + rt,
708                        .io_semantics.num_slots = 1,
709                        .io_semantics.dual_source_blend_index = i);
710    }
711 
712    b.shader->info.io_lowered = true;
713 
714    NIR_PASS(_, b.shader, nir_lower_blend, &options);
715 
716    return b.shader;
717 }
718 
719 #if PAN_ARCH >= 6
720 uint64_t
GENX(pan_blend_get_internal_desc)721 GENX(pan_blend_get_internal_desc)(enum pipe_format fmt, unsigned rt,
722                                   unsigned force_size, bool dithered)
723 {
724    const struct util_format_description *desc = util_format_description(fmt);
725    struct mali_internal_blend_packed res;
726 
727    pan_pack(&res, INTERNAL_BLEND, cfg) {
728       cfg.mode = MALI_BLEND_MODE_OPAQUE;
729       cfg.fixed_function.num_comps = desc->nr_channels;
730       cfg.fixed_function.rt = rt;
731 
732       nir_alu_type T = pan_unpacked_type_for_format(desc);
733 
734       if (force_size)
735          T = nir_alu_type_get_base_type(T) | force_size;
736 
737       switch (T) {
738       case nir_type_float16:
739          cfg.fixed_function.conversion.register_format =
740             MALI_REGISTER_FILE_FORMAT_F16;
741          break;
742       case nir_type_float32:
743          cfg.fixed_function.conversion.register_format =
744             MALI_REGISTER_FILE_FORMAT_F32;
745          break;
746       case nir_type_int8:
747       case nir_type_int16:
748          cfg.fixed_function.conversion.register_format =
749             MALI_REGISTER_FILE_FORMAT_I16;
750          break;
751       case nir_type_int32:
752          cfg.fixed_function.conversion.register_format =
753             MALI_REGISTER_FILE_FORMAT_I32;
754          break;
755       case nir_type_uint8:
756       case nir_type_uint16:
757          cfg.fixed_function.conversion.register_format =
758             MALI_REGISTER_FILE_FORMAT_U16;
759          break;
760       case nir_type_uint32:
761          cfg.fixed_function.conversion.register_format =
762             MALI_REGISTER_FILE_FORMAT_U32;
763          break;
764       default:
765          unreachable("Invalid format");
766       }
767 
768       cfg.fixed_function.conversion.memory_format =
769          GENX(panfrost_dithered_format_from_pipe_format)(fmt, dithered);
770    }
771 
772    return res.opaque[0] | ((uint64_t)res.opaque[1] << 32);
773 }
774 
775 static bool
inline_rt_conversion(nir_builder * b,nir_intrinsic_instr * intr,void * data)776 inline_rt_conversion(nir_builder *b, nir_intrinsic_instr *intr, void *data)
777 {
778    if (intr->intrinsic != nir_intrinsic_load_rt_conversion_pan)
779       return false;
780 
781    enum pipe_format *formats = data;
782    unsigned rt = nir_intrinsic_base(intr);
783    unsigned size = nir_alu_type_get_type_size(nir_intrinsic_src_type(intr));
784    uint64_t conversion =
785       GENX(pan_blend_get_internal_desc)(formats[rt], rt, size, false);
786 
787    b->cursor = nir_after_instr(&intr->instr);
788    nir_def_rewrite_uses(&intr->def, nir_imm_int(b, conversion >> 32));
789    return true;
790 }
791 
792 bool
GENX(pan_inline_rt_conversion)793 GENX(pan_inline_rt_conversion)(nir_shader *s, enum pipe_format *formats)
794 {
795    return nir_shader_intrinsics_pass(s, inline_rt_conversion,
796                                      nir_metadata_control_flow, formats);
797 }
798 #endif
799 
800 struct pan_blend_shader_variant *
GENX(pan_blend_get_shader_locked)801 GENX(pan_blend_get_shader_locked)(struct pan_blend_shader_cache *cache,
802                                   const struct pan_blend_state *state,
803                                   nir_alu_type src0_type,
804                                   nir_alu_type src1_type, unsigned rt)
805 {
806    struct pan_blend_shader_key key = {
807       .format = state->rts[rt].format,
808       .src0_type = src0_type,
809       .src1_type = src1_type,
810       .rt = rt,
811       .has_constants = pan_blend_constant_mask(state->rts[rt].equation) != 0,
812       .logicop_enable = state->logicop_enable,
813       .logicop_func = state->logicop_func,
814       .nr_samples = state->rts[rt].nr_samples,
815       .equation = state->rts[rt].equation,
816       .alpha_to_one = state->alpha_to_one,
817    };
818    /* Blend shaders should only be used for blending on Bifrost onwards */
819    assert(PAN_ARCH <= 5 || state->logicop_enable || state->alpha_to_one ||
820           !pan_blend_is_opaque(state->rts[rt].equation));
821    assert(state->rts[rt].equation.color_mask != 0);
822 
823    struct hash_entry *he =
824       _mesa_hash_table_search(cache->shaders, &key);
825    struct pan_blend_shader *shader = he ? he->data : NULL;
826 
827    if (!shader) {
828       shader = rzalloc(cache->shaders, struct pan_blend_shader);
829       shader->key = key;
830       list_inithead(&shader->variants);
831       _mesa_hash_table_insert(cache->shaders, &shader->key, shader);
832    }
833 
834    list_for_each_entry(struct pan_blend_shader_variant, iter, &shader->variants,
835                        node) {
836       if (!key.has_constants ||
837           !memcmp(iter->constants, state->constants, sizeof(iter->constants))) {
838          return iter;
839       }
840    }
841 
842    struct pan_blend_shader_variant *variant = NULL;
843 
844    if (shader->nvariants < PAN_BLEND_SHADER_MAX_VARIANTS) {
845       variant = rzalloc(shader, struct pan_blend_shader_variant);
846       util_dynarray_init(&variant->binary, variant);
847       list_add(&variant->node, &shader->variants);
848       shader->nvariants++;
849    } else {
850       variant = list_last_entry(&shader->variants,
851                                 struct pan_blend_shader_variant, node);
852       list_del(&variant->node);
853       list_add(&variant->node, &shader->variants);
854       util_dynarray_clear(&variant->binary);
855    }
856 
857    memcpy(variant->constants, state->constants, sizeof(variant->constants));
858 
859    nir_shader *nir =
860       GENX(pan_blend_create_shader)(state, src0_type, src1_type, rt);
861 
862    nir_shader_intrinsics_pass(nir, pan_inline_blend_constants,
863                               nir_metadata_control_flow,
864                               (void *)state->constants);
865 
866    /* Compile the NIR shader */
867    struct panfrost_compile_inputs inputs = {
868       .gpu_id = cache->gpu_id,
869       .is_blend = true,
870       .blend.nr_samples = key.nr_samples,
871    };
872 
873    enum pipe_format rt_formats[8] = {0};
874    rt_formats[rt] = key.format;
875 
876 #if PAN_ARCH >= 6
877    inputs.blend.bifrost_blend_desc =
878       GENX(pan_blend_get_internal_desc)(key.format, key.rt, 0, false);
879 #endif
880 
881    struct pan_shader_info info;
882    pan_shader_preprocess(nir, inputs.gpu_id);
883 
884 #if PAN_ARCH >= 6
885    NIR_PASS(_, nir, GENX(pan_inline_rt_conversion), rt_formats);
886 #else
887    NIR_PASS(_, nir, pan_lower_framebuffer, rt_formats,
888             pan_raw_format_mask_midgard(rt_formats), MAX2(key.nr_samples, 1),
889             cache->gpu_id < 0x700);
890 #endif
891 
892    GENX(pan_shader_compile)(nir, &inputs, &variant->binary, &info);
893 
894    variant->work_reg_count = info.work_reg_count;
895 
896 #if PAN_ARCH <= 5
897    variant->first_tag = info.midgard.first_tag;
898 #endif
899 
900    ralloc_free(nir);
901 
902    return variant;
903 }
904 #endif /* ifndef PAN_ARCH */
905