1 /*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2019-2021 Collabora, Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "pan_blend.h"
26 #include "util/blend.h"
27
28 #ifdef PAN_ARCH
29 #include "pan_shader.h"
30 #endif
31
32 #include "compiler/nir/nir.h"
33 #include "compiler/nir/nir_builder.h"
34 #include "compiler/nir/nir_conversion_builder.h"
35 #include "compiler/nir/nir_lower_blend.h"
36 #include "panfrost/util/pan_lower_framebuffer.h"
37 #include "util/format/u_format.h"
38 #include "pan_texture.h"
39
40 #ifndef PAN_ARCH
41
42 /* Fixed function blending */
43
44 static bool
factor_is_supported(enum pipe_blendfactor factor)45 factor_is_supported(enum pipe_blendfactor factor)
46 {
47 factor = util_blendfactor_without_invert(factor);
48
49 return factor != PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
50 factor != PIPE_BLENDFACTOR_SRC1_COLOR &&
51 factor != PIPE_BLENDFACTOR_SRC1_ALPHA;
52 }
53
54 /* OpenGL allows encoding (src*dest + dest*src) which is incompatiblle with
55 * Midgard style blending since there are two multiplies. However, it may be
56 * factored as 2*src*dest = dest*(2*src), which can be encoded on Bifrost as 0
57 * + dest * (2*src) wih the new source_2 value of C. Detect this case. */
58
59 static bool
is_2srcdest(enum pipe_blend_func blend_func,enum pipe_blendfactor src_factor,enum pipe_blendfactor dest_factor,bool is_alpha)60 is_2srcdest(enum pipe_blend_func blend_func, enum pipe_blendfactor src_factor,
61 enum pipe_blendfactor dest_factor, bool is_alpha)
62 {
63 return (blend_func == PIPE_BLEND_ADD) &&
64 ((src_factor == PIPE_BLENDFACTOR_DST_COLOR) ||
65 ((src_factor == PIPE_BLENDFACTOR_DST_ALPHA) && is_alpha)) &&
66 ((dest_factor == PIPE_BLENDFACTOR_SRC_COLOR) ||
67 ((dest_factor == PIPE_BLENDFACTOR_SRC_ALPHA) && is_alpha));
68 }
69
70 static bool
can_fixed_function_equation(enum pipe_blend_func blend_func,enum pipe_blendfactor src_factor,enum pipe_blendfactor dest_factor,bool is_alpha,bool supports_2src)71 can_fixed_function_equation(enum pipe_blend_func blend_func,
72 enum pipe_blendfactor src_factor,
73 enum pipe_blendfactor dest_factor, bool is_alpha,
74 bool supports_2src)
75 {
76 if (is_2srcdest(blend_func, src_factor, dest_factor, is_alpha))
77 return supports_2src;
78
79 if (blend_func != PIPE_BLEND_ADD && blend_func != PIPE_BLEND_SUBTRACT &&
80 blend_func != PIPE_BLEND_REVERSE_SUBTRACT)
81 return false;
82
83 if (!factor_is_supported(src_factor) || !factor_is_supported(dest_factor))
84 return false;
85
86 /* Fixed function requires src/dest factors to match (up to invert) or be
87 * zero/one.
88 */
89 enum pipe_blendfactor src = util_blendfactor_without_invert(src_factor);
90 enum pipe_blendfactor dest = util_blendfactor_without_invert(dest_factor);
91
92 return (src == dest) || (src == PIPE_BLENDFACTOR_ONE) ||
93 (dest == PIPE_BLENDFACTOR_ONE);
94 }
95
96 static unsigned
blend_factor_constant_mask(enum pipe_blendfactor factor)97 blend_factor_constant_mask(enum pipe_blendfactor factor)
98 {
99 factor = util_blendfactor_without_invert(factor);
100
101 if (factor == PIPE_BLENDFACTOR_CONST_COLOR)
102 return 0b0111; /* RGB */
103 else if (factor == PIPE_BLENDFACTOR_CONST_ALPHA)
104 return 0b1000; /* A */
105 else
106 return 0b0000; /* - */
107 }
108
109 unsigned
pan_blend_constant_mask(const struct pan_blend_equation eq)110 pan_blend_constant_mask(const struct pan_blend_equation eq)
111 {
112 return blend_factor_constant_mask(eq.rgb_src_factor) |
113 blend_factor_constant_mask(eq.rgb_dst_factor) |
114 blend_factor_constant_mask(eq.alpha_src_factor) |
115 blend_factor_constant_mask(eq.alpha_dst_factor);
116 }
117
118 /* Only "homogenous" (scalar or vector with all components equal) constants are
119 * valid for fixed-function, so check for this condition */
120
121 bool
pan_blend_is_homogenous_constant(unsigned mask,const float * constants)122 pan_blend_is_homogenous_constant(unsigned mask, const float *constants)
123 {
124 float constant = pan_blend_get_constant(mask, constants);
125
126 u_foreach_bit(i, mask) {
127 if (constants[i] != constant)
128 return false;
129 }
130
131 return true;
132 }
133
134 /* Determines if an equation can run in fixed function */
135
136 bool
pan_blend_can_fixed_function(const struct pan_blend_equation equation,bool supports_2src)137 pan_blend_can_fixed_function(const struct pan_blend_equation equation,
138 bool supports_2src)
139 {
140 return !equation.blend_enable ||
141 (can_fixed_function_equation(
142 equation.rgb_func, equation.rgb_src_factor,
143 equation.rgb_dst_factor, false, supports_2src) &&
144 can_fixed_function_equation(
145 equation.alpha_func, equation.alpha_src_factor,
146 equation.alpha_dst_factor, true, supports_2src));
147 }
148
149 static enum mali_blend_operand_c
to_c_factor(enum pipe_blendfactor factor)150 to_c_factor(enum pipe_blendfactor factor)
151 {
152 switch (util_blendfactor_without_invert(factor)) {
153 case PIPE_BLENDFACTOR_ONE:
154 /* Extra invert to flip back in caller */
155 return MALI_BLEND_OPERAND_C_ZERO;
156
157 case PIPE_BLENDFACTOR_SRC_ALPHA:
158 return MALI_BLEND_OPERAND_C_SRC_ALPHA;
159
160 case PIPE_BLENDFACTOR_DST_ALPHA:
161 return MALI_BLEND_OPERAND_C_DEST_ALPHA;
162
163 case PIPE_BLENDFACTOR_SRC_COLOR:
164 return MALI_BLEND_OPERAND_C_SRC;
165
166 case PIPE_BLENDFACTOR_DST_COLOR:
167 return MALI_BLEND_OPERAND_C_DEST;
168
169 case PIPE_BLENDFACTOR_CONST_COLOR:
170 case PIPE_BLENDFACTOR_CONST_ALPHA:
171 return MALI_BLEND_OPERAND_C_CONSTANT;
172
173 default:
174 unreachable("Unsupported blend factor");
175 }
176 }
177
178 static void
to_panfrost_function(enum pipe_blend_func blend_func,enum pipe_blendfactor src_factor,enum pipe_blendfactor dest_factor,bool is_alpha,struct MALI_BLEND_FUNCTION * function)179 to_panfrost_function(enum pipe_blend_func blend_func,
180 enum pipe_blendfactor src_factor,
181 enum pipe_blendfactor dest_factor, bool is_alpha,
182 struct MALI_BLEND_FUNCTION *function)
183 {
184 assert(can_fixed_function_equation(blend_func, src_factor, dest_factor,
185 is_alpha, true));
186
187 /* We handle ZERO/ONE specially since it's the hardware has 0 and can invert
188 * to 1 but Gallium has 0 as the uninverted version.
189 */
190 bool src_inverted =
191 util_blendfactor_is_inverted(src_factor) ^
192 (util_blendfactor_without_invert(src_factor) == PIPE_BLENDFACTOR_ONE);
193
194 bool dest_inverted =
195 util_blendfactor_is_inverted(dest_factor) ^
196 (util_blendfactor_without_invert(dest_factor) == PIPE_BLENDFACTOR_ONE);
197
198 if (src_factor == PIPE_BLENDFACTOR_ZERO) {
199 function->a = MALI_BLEND_OPERAND_A_ZERO;
200 function->b = MALI_BLEND_OPERAND_B_DEST;
201 if (blend_func == PIPE_BLEND_SUBTRACT)
202 function->negate_b = true;
203 function->invert_c = dest_inverted;
204 function->c = to_c_factor(dest_factor);
205 } else if (src_factor == PIPE_BLENDFACTOR_ONE) {
206 function->a = MALI_BLEND_OPERAND_A_SRC;
207 function->b = MALI_BLEND_OPERAND_B_DEST;
208 if (blend_func == PIPE_BLEND_SUBTRACT)
209 function->negate_b = true;
210 else if (blend_func == PIPE_BLEND_REVERSE_SUBTRACT)
211 function->negate_a = true;
212 function->invert_c = dest_inverted;
213 function->c = to_c_factor(dest_factor);
214 } else if (dest_factor == PIPE_BLENDFACTOR_ZERO) {
215 function->a = MALI_BLEND_OPERAND_A_ZERO;
216 function->b = MALI_BLEND_OPERAND_B_SRC;
217 if (blend_func == PIPE_BLEND_REVERSE_SUBTRACT)
218 function->negate_b = true;
219 function->invert_c = src_inverted;
220 function->c = to_c_factor(src_factor);
221 } else if (dest_factor == PIPE_BLENDFACTOR_ONE) {
222 function->a = MALI_BLEND_OPERAND_A_DEST;
223 function->b = MALI_BLEND_OPERAND_B_SRC;
224 if (blend_func == PIPE_BLEND_SUBTRACT)
225 function->negate_a = true;
226 else if (blend_func == PIPE_BLEND_REVERSE_SUBTRACT)
227 function->negate_b = true;
228 function->invert_c = src_inverted;
229 function->c = to_c_factor(src_factor);
230 } else if (src_factor == dest_factor) {
231 function->a = MALI_BLEND_OPERAND_A_ZERO;
232 function->invert_c = src_inverted;
233 function->c = to_c_factor(src_factor);
234
235 switch (blend_func) {
236 case PIPE_BLEND_ADD:
237 function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST;
238 break;
239 case PIPE_BLEND_REVERSE_SUBTRACT:
240 function->negate_b = true;
241 FALLTHROUGH;
242 case PIPE_BLEND_SUBTRACT:
243 function->b = MALI_BLEND_OPERAND_B_SRC_MINUS_DEST;
244 break;
245 default:
246 unreachable("Invalid blend function");
247 }
248 } else if (is_2srcdest(blend_func, src_factor, dest_factor, is_alpha)) {
249 /* src*dest + dest*src = 2*src*dest = 0 + dest*(2*src) */
250 function->a = MALI_BLEND_OPERAND_A_ZERO;
251 function->b = MALI_BLEND_OPERAND_B_DEST;
252 function->c = MALI_BLEND_OPERAND_C_SRC_X_2;
253 } else {
254 assert(util_blendfactor_without_invert(src_factor) ==
255 util_blendfactor_without_invert(dest_factor) &&
256 src_inverted != dest_inverted);
257
258 function->a = MALI_BLEND_OPERAND_A_DEST;
259 function->invert_c = src_inverted;
260 function->c = to_c_factor(src_factor);
261
262 switch (blend_func) {
263 case PIPE_BLEND_ADD:
264 function->b = MALI_BLEND_OPERAND_B_SRC_MINUS_DEST;
265 break;
266 case PIPE_BLEND_REVERSE_SUBTRACT:
267 function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST;
268 function->negate_b = true;
269 break;
270 case PIPE_BLEND_SUBTRACT:
271 function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST;
272 function->negate_a = true;
273 break;
274 default:
275 unreachable("Invalid blend function\n");
276 }
277 }
278 }
279
280 bool
pan_blend_is_opaque(const struct pan_blend_equation equation)281 pan_blend_is_opaque(const struct pan_blend_equation equation)
282 {
283 /* If a channel is masked out, we can't use opaque mode even if
284 * blending is disabled, since we need a tilebuffer read in there */
285 if (equation.color_mask != 0xF)
286 return false;
287
288 /* With nothing masked out, disabled bledning is opaque */
289 if (!equation.blend_enable)
290 return true;
291
292 /* Also detect open-coded opaque blending */
293 return equation.rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
294 equation.rgb_dst_factor == PIPE_BLENDFACTOR_ZERO &&
295 (equation.rgb_func == PIPE_BLEND_ADD ||
296 equation.rgb_func == PIPE_BLEND_SUBTRACT) &&
297 equation.alpha_src_factor == PIPE_BLENDFACTOR_ONE &&
298 equation.alpha_dst_factor == PIPE_BLENDFACTOR_ZERO &&
299 (equation.alpha_func == PIPE_BLEND_ADD ||
300 equation.alpha_func == PIPE_BLEND_SUBTRACT);
301 }
302
303 /* Check if a factor represents a constant value of val, assuming src_alpha is
304 * the given constant.
305 */
306
307 static inline bool
is_factor_01(enum pipe_blendfactor factor,unsigned val,unsigned srca)308 is_factor_01(enum pipe_blendfactor factor, unsigned val, unsigned srca)
309 {
310 assert(val == 0 || val == 1);
311 assert(srca == 0 || srca == 1);
312
313 switch (factor) {
314 case PIPE_BLENDFACTOR_ZERO:
315 return (val == 0);
316
317 case PIPE_BLENDFACTOR_ONE:
318 return (val == 1);
319
320 case PIPE_BLENDFACTOR_SRC_ALPHA:
321 return (val == srca);
322
323 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
324 return (val == (1 - srca));
325
326 default:
327 return false;
328 }
329 }
330
331 /* Returns if src alpha = 0 implies the blended colour equals the destination
332 * colour. Suppose source alpha = 0 and consider cases.
333 *
334 * Additive blending: Equivalent to D = S * f_s + D * f_d for all D and all S
335 * with S_a = 0, for each component. For the alpha component (if it unmasked),
336 * we have S_a = 0 so this reduces to D = D * f_d <===> f_d = 1. For RGB
337 * components (if unmasked), we need f_s = 0 and f_d = 1.
338 *
339 * Subtractive blending: Fails in general (D = S * f_S - D * f_D). We
340 * would need f_S = 0 and f_D = -1, which is not valid in the APIs.
341 *
342 * Reverse subtractive blending (D = D * f_D - S * f_S), we need f_D = 1
343 * and f_S = 0 up to masking. This is the same as additive blending.
344 *
345 * Min/max: Fails in general on the RGB components.
346 */
347
348 bool
pan_blend_alpha_zero_nop(const struct pan_blend_equation eq)349 pan_blend_alpha_zero_nop(const struct pan_blend_equation eq)
350 {
351 if (eq.rgb_func != PIPE_BLEND_ADD &&
352 eq.rgb_func != PIPE_BLEND_REVERSE_SUBTRACT)
353 return false;
354
355 if (eq.color_mask & 0x8) {
356 if (!is_factor_01(eq.alpha_dst_factor, 1, 0))
357 return false;
358 }
359
360 if (eq.color_mask & 0x7) {
361 if (!is_factor_01(eq.rgb_dst_factor, 1, 0))
362 return false;
363
364 if (!is_factor_01(eq.rgb_src_factor, 0, 0))
365 return false;
366 }
367
368 return true;
369 }
370
371 /* Returns if src alpha = 1 implies the blended colour equals the source
372 * colour. Suppose source alpha = 1 and consider cases.
373 *
374 * Additive blending: S = S * f_s + D * f_d. We need f_s = 1 and f_d = 0.
375 *
376 * Subtractive blending: S = S * f_s - D * f_d. Same as additive blending.
377 *
378 * Reverse subtractive blending: S = D * f_d - S * f_s. Fails in general since
379 * it would require f_s = -1, which is not valid in the APIs.
380 *
381 * Min/max: Fails in general on the RGB components.
382 *
383 * Note if any component is masked, we can't use a store.
384 */
385
386 bool
pan_blend_alpha_one_store(const struct pan_blend_equation eq)387 pan_blend_alpha_one_store(const struct pan_blend_equation eq)
388 {
389 if (eq.rgb_func != PIPE_BLEND_ADD && eq.rgb_func != PIPE_BLEND_SUBTRACT)
390 return false;
391
392 if (eq.color_mask != 0xf)
393 return false;
394
395 return is_factor_01(eq.rgb_src_factor, 1, 1) &&
396 is_factor_01(eq.alpha_src_factor, 1, 1) &&
397 is_factor_01(eq.rgb_dst_factor, 0, 1) &&
398 is_factor_01(eq.alpha_dst_factor, 0, 1);
399 }
400
401 static bool
is_dest_factor(enum pipe_blendfactor factor,bool alpha)402 is_dest_factor(enum pipe_blendfactor factor, bool alpha)
403 {
404 factor = util_blendfactor_without_invert(factor);
405
406 return factor == PIPE_BLENDFACTOR_DST_ALPHA ||
407 factor == PIPE_BLENDFACTOR_DST_COLOR ||
408 (factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && !alpha);
409 }
410
411 /* Determines if a blend equation reads back the destination. This can occur by
412 * explicitly referencing the destination in the blend equation, or by using a
413 * partial writemask. */
414
415 bool
pan_blend_reads_dest(const struct pan_blend_equation equation)416 pan_blend_reads_dest(const struct pan_blend_equation equation)
417 {
418 if (equation.color_mask && equation.color_mask != 0xF)
419 return true;
420
421 if (!equation.blend_enable)
422 return false;
423
424 return is_dest_factor(equation.rgb_src_factor, false) ||
425 is_dest_factor(equation.alpha_src_factor, true) ||
426 equation.rgb_dst_factor != PIPE_BLENDFACTOR_ZERO ||
427 equation.alpha_dst_factor != PIPE_BLENDFACTOR_ZERO;
428 }
429
430 /* Create the descriptor for a fixed blend mode given the corresponding API
431 * state. Assumes the equation can be represented as fixed-function. */
432
433 void
pan_blend_to_fixed_function_equation(const struct pan_blend_equation equation,struct MALI_BLEND_EQUATION * out)434 pan_blend_to_fixed_function_equation(const struct pan_blend_equation equation,
435 struct MALI_BLEND_EQUATION *out)
436 {
437 /* If no blending is enabled, default back on `replace` mode */
438 if (!equation.blend_enable) {
439 out->color_mask = equation.color_mask;
440 out->rgb.a = MALI_BLEND_OPERAND_A_SRC;
441 out->rgb.b = MALI_BLEND_OPERAND_B_SRC;
442 out->rgb.c = MALI_BLEND_OPERAND_C_ZERO;
443 out->alpha.a = MALI_BLEND_OPERAND_A_SRC;
444 out->alpha.b = MALI_BLEND_OPERAND_B_SRC;
445 out->alpha.c = MALI_BLEND_OPERAND_C_ZERO;
446 return;
447 }
448
449 /* Compile the fixed-function blend */
450 to_panfrost_function(equation.rgb_func, equation.rgb_src_factor,
451 equation.rgb_dst_factor, false, &out->rgb);
452 to_panfrost_function(equation.alpha_func, equation.alpha_src_factor,
453 equation.alpha_dst_factor, true, &out->alpha);
454
455 out->color_mask = equation.color_mask;
456 }
457
458 uint32_t
pan_pack_blend(const struct pan_blend_equation equation)459 pan_pack_blend(const struct pan_blend_equation equation)
460 {
461 STATIC_ASSERT(sizeof(uint32_t) == MALI_BLEND_EQUATION_LENGTH);
462
463 uint32_t out = 0;
464
465 pan_pack(&out, BLEND_EQUATION, cfg) {
466 pan_blend_to_fixed_function_equation(equation, &cfg);
467 }
468
469 return out;
470 }
471
472 static uint32_t
pan_blend_shader_key_hash(const void * key)473 pan_blend_shader_key_hash(const void *key)
474 {
475 return _mesa_hash_data(key, sizeof(struct pan_blend_shader_key));
476 }
477
478 static bool
pan_blend_shader_key_equal(const void * a,const void * b)479 pan_blend_shader_key_equal(const void *a, const void *b)
480 {
481 return !memcmp(a, b, sizeof(struct pan_blend_shader_key));
482 }
483
484 void
pan_blend_shader_cache_init(struct pan_blend_shader_cache * cache,unsigned gpu_id)485 pan_blend_shader_cache_init(struct pan_blend_shader_cache *cache,
486 unsigned gpu_id)
487 {
488 cache->gpu_id = gpu_id;
489 cache->shaders = _mesa_hash_table_create(NULL, pan_blend_shader_key_hash,
490 pan_blend_shader_key_equal);
491 pthread_mutex_init(&cache->lock, NULL);
492 }
493
494 void
pan_blend_shader_cache_cleanup(struct pan_blend_shader_cache * cache)495 pan_blend_shader_cache_cleanup(struct pan_blend_shader_cache *cache)
496 {
497 _mesa_hash_table_destroy(cache->shaders, NULL);
498 pthread_mutex_destroy(&cache->lock);
499 }
500
501 #else /* ifndef PAN_ARCH */
502
503 static const char *
logicop_str(enum pipe_logicop logicop)504 logicop_str(enum pipe_logicop logicop)
505 {
506 switch (logicop) {
507 case PIPE_LOGICOP_CLEAR:
508 return "clear";
509 case PIPE_LOGICOP_NOR:
510 return "nor";
511 case PIPE_LOGICOP_AND_INVERTED:
512 return "and-inverted";
513 case PIPE_LOGICOP_COPY_INVERTED:
514 return "copy-inverted";
515 case PIPE_LOGICOP_AND_REVERSE:
516 return "and-reverse";
517 case PIPE_LOGICOP_INVERT:
518 return "invert";
519 case PIPE_LOGICOP_XOR:
520 return "xor";
521 case PIPE_LOGICOP_NAND:
522 return "nand";
523 case PIPE_LOGICOP_AND:
524 return "and";
525 case PIPE_LOGICOP_EQUIV:
526 return "equiv";
527 case PIPE_LOGICOP_NOOP:
528 return "noop";
529 case PIPE_LOGICOP_OR_INVERTED:
530 return "or-inverted";
531 case PIPE_LOGICOP_COPY:
532 return "copy";
533 case PIPE_LOGICOP_OR_REVERSE:
534 return "or-reverse";
535 case PIPE_LOGICOP_OR:
536 return "or";
537 case PIPE_LOGICOP_SET:
538 return "set";
539 default:
540 unreachable("Invalid logicop\n");
541 }
542 }
543
544 static void
get_equation_str(const struct pan_blend_rt_state * rt_state,char * str,unsigned len)545 get_equation_str(const struct pan_blend_rt_state *rt_state, char *str,
546 unsigned len)
547 {
548 const char *funcs[] = {
549 "add", "sub", "reverse_sub", "min", "max",
550 };
551 const char *factors[] = {
552 "", "one", "src_color", "src_alpha", "dst_alpha",
553 "dst_color", "src_alpha_sat", "const_color", "const_alpha", "src1_color",
554 "src1_alpha",
555 };
556 int ret;
557
558 if (!rt_state->equation.blend_enable) {
559 ret = snprintf(str, len, "replace(%s%s%s%s)",
560 (rt_state->equation.color_mask & 1) ? "R" : "",
561 (rt_state->equation.color_mask & 2) ? "G" : "",
562 (rt_state->equation.color_mask & 4) ? "B" : "",
563 (rt_state->equation.color_mask & 8) ? "A" : "");
564 assert(ret > 0);
565 return;
566 }
567
568 if (rt_state->equation.color_mask & 7) {
569 assert(rt_state->equation.rgb_func < ARRAY_SIZE(funcs));
570 ret = snprintf(
571 str, len, "%s%s%s(func=%s,src_factor=%s%s,dst_factor=%s%s)%s",
572 (rt_state->equation.color_mask & 1) ? "R" : "",
573 (rt_state->equation.color_mask & 2) ? "G" : "",
574 (rt_state->equation.color_mask & 4) ? "B" : "",
575 funcs[rt_state->equation.rgb_func],
576 util_blendfactor_is_inverted(rt_state->equation.rgb_src_factor) ? "-"
577 : "",
578 factors[util_blendfactor_without_invert(
579 rt_state->equation.rgb_src_factor)],
580 util_blendfactor_is_inverted(rt_state->equation.rgb_dst_factor) ? "-"
581 : "",
582 factors[util_blendfactor_without_invert(
583 rt_state->equation.rgb_dst_factor)],
584 rt_state->equation.color_mask & 8 ? ";" : "");
585 assert(ret > 0);
586 str += ret;
587 len -= ret;
588 }
589
590 if (rt_state->equation.color_mask & 8) {
591 assert(rt_state->equation.alpha_func < ARRAY_SIZE(funcs));
592 ret = snprintf(
593 str, len, "A(func=%s,src_factor=%s%s,dst_factor=%s%s)",
594 funcs[rt_state->equation.alpha_func],
595 util_blendfactor_is_inverted(rt_state->equation.alpha_src_factor) ? "-"
596 : "",
597 factors[util_blendfactor_without_invert(
598 rt_state->equation.alpha_src_factor)],
599 util_blendfactor_is_inverted(rt_state->equation.alpha_dst_factor) ? "-"
600 : "",
601 factors[util_blendfactor_without_invert(
602 rt_state->equation.alpha_dst_factor)]);
603 assert(ret > 0);
604 str += ret;
605 len -= ret;
606 }
607 }
608
609 static bool
pan_inline_blend_constants(nir_builder * b,nir_intrinsic_instr * intr,void * data)610 pan_inline_blend_constants(nir_builder *b, nir_intrinsic_instr *intr,
611 void *data)
612 {
613 if (intr->intrinsic != nir_intrinsic_load_blend_const_color_rgba)
614 return false;
615
616 float *floats = data;
617 const nir_const_value constants[4] = {
618 nir_const_value_for_float(floats[0], 32),
619 nir_const_value_for_float(floats[1], 32),
620 nir_const_value_for_float(floats[2], 32),
621 nir_const_value_for_float(floats[3], 32)};
622
623 b->cursor = nir_after_instr(&intr->instr);
624 nir_def *constant = nir_build_imm(b, 4, 32, constants);
625 nir_def_rewrite_uses(&intr->def, constant);
626 nir_instr_remove(&intr->instr);
627 return true;
628 }
629
630 static nir_shader *
pan_blend_create_shader(const struct pan_blend_state * state,nir_alu_type src0_type,nir_alu_type src1_type,unsigned rt)631 pan_blend_create_shader(const struct pan_blend_state *state,
632 nir_alu_type src0_type, nir_alu_type src1_type,
633 unsigned rt)
634 {
635 const struct pan_blend_rt_state *rt_state = &state->rts[rt];
636 char equation_str[128] = {0};
637
638 get_equation_str(rt_state, equation_str, sizeof(equation_str));
639
640 nir_builder b = nir_builder_init_simple_shader(
641 MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(),
642 "pan_blend(rt=%d,fmt=%s,nr_samples=%d,%s=%s)", rt,
643 util_format_name(rt_state->format), rt_state->nr_samples,
644 state->logicop_enable ? "logicop" : "equation",
645 state->logicop_enable ? logicop_str(state->logicop_func) : equation_str);
646
647 const struct util_format_description *format_desc =
648 util_format_description(rt_state->format);
649 nir_alu_type nir_type = pan_unpacked_type_for_format(format_desc);
650
651 /* Bifrost/Valhall support 16-bit and 32-bit register formats for
652 * LD_TILE/ST_TILE/BLEND, but do not support 8-bit. Rather than making
653 * the fragment output 8-bit and inserting extra conversions in the
654 * compiler, promote the output to 16-bit. The larger size is still
655 * compatible with correct conversion semantics.
656 */
657 if (PAN_ARCH >= 6 && nir_alu_type_get_type_size(nir_type) == 8)
658 nir_type = nir_alu_type_get_base_type(nir_type) | 16;
659
660 nir_lower_blend_options options = {
661 .logicop_enable = state->logicop_enable,
662 .logicop_func = state->logicop_func,
663 };
664
665 options.rt[rt].colormask = rt_state->equation.color_mask;
666 options.format[rt] = rt_state->format;
667
668 if (!rt_state->equation.blend_enable) {
669 static const nir_lower_blend_channel replace = {
670 .func = PIPE_BLEND_ADD,
671 .src_factor = PIPE_BLENDFACTOR_ONE,
672 .dst_factor = PIPE_BLENDFACTOR_ZERO,
673 };
674
675 options.rt[rt].rgb = replace;
676 options.rt[rt].alpha = replace;
677 } else {
678 options.rt[rt].rgb.func = rt_state->equation.rgb_func;
679 options.rt[rt].rgb.src_factor = rt_state->equation.rgb_src_factor;
680 options.rt[rt].rgb.dst_factor = rt_state->equation.rgb_dst_factor;
681 options.rt[rt].alpha.func = rt_state->equation.alpha_func;
682 options.rt[rt].alpha.src_factor = rt_state->equation.alpha_src_factor;
683 options.rt[rt].alpha.dst_factor = rt_state->equation.alpha_dst_factor;
684 }
685
686 nir_def *pixel = nir_load_barycentric_pixel(&b, 32, .interp_mode = 1);
687 nir_def *zero = nir_imm_int(&b, 0);
688
689 for (unsigned i = 0; i < 2; ++i) {
690 nir_alu_type src_type =
691 (i == 1 ? src1_type : src0_type) ?: nir_type_float32;
692
693 /* HACK: workaround buggy TGSI shaders (u_blitter) */
694 src_type = nir_alu_type_get_base_type(nir_type) |
695 nir_alu_type_get_type_size(src_type);
696
697 nir_def *src = nir_load_interpolated_input(
698 &b, 4, nir_alu_type_get_type_size(src_type), pixel, zero,
699 .io_semantics.location = i ? VARYING_SLOT_VAR0 : VARYING_SLOT_COL0,
700 .io_semantics.num_slots = 1, .base = i, .dest_type = src_type);
701
702 /* On Midgard, the blend shader is responsible for format conversion.
703 * As the OpenGL spec requires integer conversions to saturate, we must
704 * saturate ourselves here. On Bifrost and later, the conversion
705 * hardware handles this automatically.
706 */
707 nir_alu_type T = nir_alu_type_get_base_type(nir_type);
708 bool should_saturate = (PAN_ARCH <= 5) && (T != nir_type_float);
709 src = nir_convert_with_rounding(&b, src, T, nir_type,
710 nir_rounding_mode_undef, should_saturate);
711
712 nir_store_output(&b, src, zero, .write_mask = BITFIELD_MASK(4),
713 .src_type = nir_type,
714 .io_semantics.location = FRAG_RESULT_DATA0 + rt,
715 .io_semantics.num_slots = 1,
716 .io_semantics.dual_source_blend_index = i);
717 }
718
719 b.shader->info.io_lowered = true;
720
721 NIR_PASS_V(b.shader, nir_lower_blend, &options);
722 nir_shader_intrinsics_pass(b.shader, pan_inline_blend_constants,
723 nir_metadata_block_index | nir_metadata_dominance,
724 (void *)state->constants);
725
726 return b.shader;
727 }
728
729 #if PAN_ARCH >= 6
730 uint64_t
GENX(pan_blend_get_internal_desc)731 GENX(pan_blend_get_internal_desc)(enum pipe_format fmt, unsigned rt,
732 unsigned force_size, bool dithered)
733 {
734 const struct util_format_description *desc = util_format_description(fmt);
735 uint64_t res;
736
737 pan_pack(&res, INTERNAL_BLEND, cfg) {
738 cfg.mode = MALI_BLEND_MODE_OPAQUE;
739 cfg.fixed_function.num_comps = desc->nr_channels;
740 cfg.fixed_function.rt = rt;
741
742 nir_alu_type T = pan_unpacked_type_for_format(desc);
743
744 if (force_size)
745 T = nir_alu_type_get_base_type(T) | force_size;
746
747 switch (T) {
748 case nir_type_float16:
749 cfg.fixed_function.conversion.register_format =
750 MALI_REGISTER_FILE_FORMAT_F16;
751 break;
752 case nir_type_float32:
753 cfg.fixed_function.conversion.register_format =
754 MALI_REGISTER_FILE_FORMAT_F32;
755 break;
756 case nir_type_int8:
757 case nir_type_int16:
758 cfg.fixed_function.conversion.register_format =
759 MALI_REGISTER_FILE_FORMAT_I16;
760 break;
761 case nir_type_int32:
762 cfg.fixed_function.conversion.register_format =
763 MALI_REGISTER_FILE_FORMAT_I32;
764 break;
765 case nir_type_uint8:
766 case nir_type_uint16:
767 cfg.fixed_function.conversion.register_format =
768 MALI_REGISTER_FILE_FORMAT_U16;
769 break;
770 case nir_type_uint32:
771 cfg.fixed_function.conversion.register_format =
772 MALI_REGISTER_FILE_FORMAT_U32;
773 break;
774 default:
775 unreachable("Invalid format");
776 }
777
778 cfg.fixed_function.conversion.memory_format =
779 GENX(panfrost_dithered_format_from_pipe_format)(fmt, dithered);
780 }
781
782 return res;
783 }
784
785 static bool
inline_rt_conversion(nir_builder * b,nir_intrinsic_instr * intr,void * data)786 inline_rt_conversion(nir_builder *b, nir_intrinsic_instr *intr, void *data)
787 {
788 if (intr->intrinsic != nir_intrinsic_load_rt_conversion_pan)
789 return false;
790
791 enum pipe_format *formats = data;
792 unsigned rt = nir_intrinsic_base(intr);
793 unsigned size = nir_alu_type_get_type_size(nir_intrinsic_src_type(intr));
794 uint64_t conversion =
795 GENX(pan_blend_get_internal_desc)(formats[rt], rt, size, false);
796
797 b->cursor = nir_after_instr(&intr->instr);
798 nir_def_rewrite_uses(&intr->def, nir_imm_int(b, conversion >> 32));
799 return true;
800 }
801
802 bool
GENX(pan_inline_rt_conversion)803 GENX(pan_inline_rt_conversion)(nir_shader *s, enum pipe_format *formats)
804 {
805 return nir_shader_intrinsics_pass(
806 s, inline_rt_conversion,
807 nir_metadata_block_index | nir_metadata_dominance, formats);
808 }
809 #endif
810
811 struct pan_blend_shader_variant *
GENX(pan_blend_get_shader_locked)812 GENX(pan_blend_get_shader_locked)(struct pan_blend_shader_cache *cache,
813 const struct pan_blend_state *state,
814 nir_alu_type src0_type,
815 nir_alu_type src1_type, unsigned rt)
816 {
817 struct pan_blend_shader_key key = {
818 .format = state->rts[rt].format,
819 .src0_type = src0_type,
820 .src1_type = src1_type,
821 .rt = rt,
822 .has_constants = pan_blend_constant_mask(state->rts[rt].equation) != 0,
823 .logicop_enable = state->logicop_enable,
824 .logicop_func = state->logicop_func,
825 .nr_samples = state->rts[rt].nr_samples,
826 .equation = state->rts[rt].equation,
827 };
828
829 /* Blend shaders should only be used for blending on Bifrost onwards */
830 assert(PAN_ARCH <= 5 || state->logicop_enable ||
831 !pan_blend_is_opaque(state->rts[rt].equation));
832 assert(state->rts[rt].equation.color_mask != 0);
833
834 struct hash_entry *he =
835 _mesa_hash_table_search(cache->shaders, &key);
836 struct pan_blend_shader *shader = he ? he->data : NULL;
837
838 if (!shader) {
839 shader = rzalloc(cache->shaders, struct pan_blend_shader);
840 shader->key = key;
841 list_inithead(&shader->variants);
842 _mesa_hash_table_insert(cache->shaders, &shader->key, shader);
843 }
844
845 list_for_each_entry(struct pan_blend_shader_variant, iter, &shader->variants,
846 node) {
847 if (!key.has_constants ||
848 !memcmp(iter->constants, state->constants, sizeof(iter->constants))) {
849 return iter;
850 }
851 }
852
853 struct pan_blend_shader_variant *variant = NULL;
854
855 if (shader->nvariants < PAN_BLEND_SHADER_MAX_VARIANTS) {
856 variant = rzalloc(shader, struct pan_blend_shader_variant);
857 util_dynarray_init(&variant->binary, variant);
858 list_add(&variant->node, &shader->variants);
859 shader->nvariants++;
860 } else {
861 variant = list_last_entry(&shader->variants,
862 struct pan_blend_shader_variant, node);
863 list_del(&variant->node);
864 list_add(&variant->node, &shader->variants);
865 util_dynarray_clear(&variant->binary);
866 }
867
868 memcpy(variant->constants, state->constants, sizeof(variant->constants));
869
870 nir_shader *nir = pan_blend_create_shader(state, src0_type, src1_type, rt);
871
872 /* Compile the NIR shader */
873 struct panfrost_compile_inputs inputs = {
874 .gpu_id = cache->gpu_id,
875 .is_blend = true,
876 .blend.nr_samples = key.nr_samples,
877 };
878
879 enum pipe_format rt_formats[8] = {0};
880 rt_formats[rt] = key.format;
881
882 #if PAN_ARCH >= 6
883 inputs.blend.bifrost_blend_desc =
884 GENX(pan_blend_get_internal_desc)(key.format, key.rt, 0, false);
885 #endif
886
887 struct pan_shader_info info;
888 pan_shader_preprocess(nir, inputs.gpu_id);
889
890 #if PAN_ARCH >= 6
891 NIR_PASS_V(nir, GENX(pan_inline_rt_conversion), rt_formats);
892 #else
893 NIR_PASS_V(nir, pan_lower_framebuffer, rt_formats,
894 pan_raw_format_mask_midgard(rt_formats), MAX2(key.nr_samples, 1),
895 cache->gpu_id < 0x700);
896 #endif
897
898 GENX(pan_shader_compile)(nir, &inputs, &variant->binary, &info);
899
900 variant->work_reg_count = info.work_reg_count;
901
902 #if PAN_ARCH <= 5
903 variant->first_tag = info.midgard.first_tag;
904 #endif
905
906 ralloc_free(nir);
907
908 return variant;
909 }
910 #endif /* ifndef PAN_ARCH */
911