1 /*
2 * Copyright (C) 2019-2021 Collabora, Ltd.
3 * Copyright (C) 2019 Alyssa Rosenzweig
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 /**
26 * @file
27 *
28 * Implements the fragment pipeline (blending and writeout) in software, to be
29 * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
30 * shader variant on typical GPUs. This pass is useful if hardware lacks
31 * fixed-function blending in part or in full.
32 */
33
34 #include "nir_lower_blend.h"
35 #include "compiler/nir/nir.h"
36 #include "compiler/nir/nir_builder.h"
37 #include "compiler/nir/nir_format_convert.h"
38 #include "util/blend.h"
39
40 struct ctx {
41 const nir_lower_blend_options *options;
42 nir_def *src1[8];
43 };
44
45 /* Given processed factors, combine them per a blend function */
46
47 static nir_def *
nir_blend_func(nir_builder * b,enum pipe_blend_func func,nir_def * src,nir_def * dst)48 nir_blend_func(
49 nir_builder *b,
50 enum pipe_blend_func func,
51 nir_def *src, nir_def *dst)
52 {
53 switch (func) {
54 case PIPE_BLEND_ADD:
55 return nir_fadd(b, src, dst);
56 case PIPE_BLEND_SUBTRACT:
57 return nir_fsub(b, src, dst);
58 case PIPE_BLEND_REVERSE_SUBTRACT:
59 return nir_fsub(b, dst, src);
60 case PIPE_BLEND_MIN:
61 return nir_fmin(b, src, dst);
62 case PIPE_BLEND_MAX:
63 return nir_fmax(b, src, dst);
64 }
65
66 unreachable("Invalid blend function");
67 }
68
69 /* Does this blend function multiply by a blend factor? */
70
71 static bool
nir_blend_factored(enum pipe_blend_func func)72 nir_blend_factored(enum pipe_blend_func func)
73 {
74 switch (func) {
75 case PIPE_BLEND_ADD:
76 case PIPE_BLEND_SUBTRACT:
77 case PIPE_BLEND_REVERSE_SUBTRACT:
78 return true;
79 default:
80 return false;
81 }
82 }
83
84 /* Compute a src_alpha_saturate factor */
85 static nir_def *
nir_alpha_saturate(nir_builder * b,nir_def * src,nir_def * dst,unsigned chan)86 nir_alpha_saturate(
87 nir_builder *b,
88 nir_def *src, nir_def *dst,
89 unsigned chan)
90 {
91 nir_def *Asrc = nir_channel(b, src, 3);
92 nir_def *Adst = nir_channel(b, dst, 3);
93 nir_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size);
94 nir_def *Adsti = nir_fsub(b, one, Adst);
95
96 return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
97 }
98
99 /* Returns a scalar single factor, unmultiplied */
100
101 static nir_def *
nir_blend_factor_value(nir_builder * b,nir_def * src,nir_def * src1,nir_def * dst,nir_def * bconst,unsigned chan,enum pipe_blendfactor factor_without_invert)102 nir_blend_factor_value(
103 nir_builder *b,
104 nir_def *src, nir_def *src1, nir_def *dst, nir_def *bconst,
105 unsigned chan,
106 enum pipe_blendfactor factor_without_invert)
107 {
108 switch (factor_without_invert) {
109 case PIPE_BLENDFACTOR_ONE:
110 return nir_imm_floatN_t(b, 1.0, src->bit_size);
111 case PIPE_BLENDFACTOR_SRC_COLOR:
112 return nir_channel(b, src, chan);
113 case PIPE_BLENDFACTOR_SRC1_COLOR:
114 return nir_channel(b, src1, chan);
115 case PIPE_BLENDFACTOR_DST_COLOR:
116 return nir_channel(b, dst, chan);
117 case PIPE_BLENDFACTOR_SRC_ALPHA:
118 return nir_channel(b, src, 3);
119 case PIPE_BLENDFACTOR_SRC1_ALPHA:
120 return nir_channel(b, src1, 3);
121 case PIPE_BLENDFACTOR_DST_ALPHA:
122 return nir_channel(b, dst, 3);
123 case PIPE_BLENDFACTOR_CONST_COLOR:
124 return nir_channel(b, bconst, chan);
125 case PIPE_BLENDFACTOR_CONST_ALPHA:
126 return nir_channel(b, bconst, 3);
127 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
128 return nir_alpha_saturate(b, src, dst, chan);
129 default:
130 assert(util_blendfactor_is_inverted(factor_without_invert));
131 unreachable("Unexpected inverted factor");
132 }
133 }
134
135 static nir_def *
nir_fsat_signed(nir_builder * b,nir_def * x)136 nir_fsat_signed(nir_builder *b, nir_def *x)
137 {
138 return nir_fclamp(b, x, nir_imm_floatN_t(b, -1.0, x->bit_size),
139 nir_imm_floatN_t(b, +1.0, x->bit_size));
140 }
141
142 static nir_def *
nir_fsat_to_format(nir_builder * b,nir_def * x,enum pipe_format format)143 nir_fsat_to_format(nir_builder *b, nir_def *x, enum pipe_format format)
144 {
145 if (util_format_is_unorm(format))
146 return nir_fsat(b, x);
147 else if (util_format_is_snorm(format))
148 return nir_fsat_signed(b, x);
149 else
150 return x;
151 }
152
153 /*
154 * The spec says we need to clamp blend factors. However, we don't want to clamp
155 * unnecessarily, as the clamp might not be optimized out. Check whether
156 * clamping a blend factor is needed.
157 */
158 static bool
should_clamp_factor(enum pipe_blendfactor factor,bool snorm)159 should_clamp_factor(enum pipe_blendfactor factor, bool snorm)
160 {
161 switch (util_blendfactor_without_invert(factor)) {
162 case PIPE_BLENDFACTOR_ONE:
163 /* 0, 1 are in [0, 1] and [-1, 1] */
164 return false;
165
166 case PIPE_BLENDFACTOR_SRC_COLOR:
167 case PIPE_BLENDFACTOR_SRC1_COLOR:
168 case PIPE_BLENDFACTOR_DST_COLOR:
169 case PIPE_BLENDFACTOR_SRC_ALPHA:
170 case PIPE_BLENDFACTOR_SRC1_ALPHA:
171 case PIPE_BLENDFACTOR_DST_ALPHA:
172 /* Colours are already clamped. For unorm, the complement of something
173 * clamped is still clamped. But for snorm, this is not true. Clamp for
174 * snorm only.
175 */
176 return util_blendfactor_is_inverted(factor) && snorm;
177
178 case PIPE_BLENDFACTOR_CONST_COLOR:
179 case PIPE_BLENDFACTOR_CONST_ALPHA:
180 /* Constant colours are not yet clamped */
181 return true;
182
183 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
184 /* For unorm, this is in bounds (and hence so is its complement). For
185 * snorm, it may not be.
186 */
187 return snorm;
188
189 default:
190 unreachable("invalid blend factor");
191 }
192 }
193
194 static bool
channel_uses_dest(nir_lower_blend_channel chan)195 channel_uses_dest(nir_lower_blend_channel chan)
196 {
197 /* If blend factors are ignored, dest is used (min/max) */
198 if (!nir_blend_factored(chan.func))
199 return true;
200
201 /* If dest has a nonzero factor, it is used */
202 if (chan.dst_factor != PIPE_BLENDFACTOR_ZERO)
203 return true;
204
205 /* Else, check the source factor */
206 switch (util_blendfactor_without_invert(chan.src_factor)) {
207 case PIPE_BLENDFACTOR_DST_COLOR:
208 case PIPE_BLENDFACTOR_DST_ALPHA:
209 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
210 return true;
211 default:
212 return false;
213 }
214 }
215
216 static nir_def *
nir_blend_factor(nir_builder * b,nir_def * raw_scalar,nir_def * src,nir_def * src1,nir_def * dst,nir_def * bconst,unsigned chan,enum pipe_blendfactor factor,enum pipe_format format)217 nir_blend_factor(
218 nir_builder *b,
219 nir_def *raw_scalar,
220 nir_def *src, nir_def *src1, nir_def *dst, nir_def *bconst,
221 unsigned chan,
222 enum pipe_blendfactor factor,
223 enum pipe_format format)
224 {
225 nir_def *f =
226 nir_blend_factor_value(b, src, src1, dst, bconst, chan,
227 util_blendfactor_without_invert(factor));
228
229 if (util_blendfactor_is_inverted(factor))
230 f = nir_fadd_imm(b, nir_fneg(b, f), 1.0);
231
232 if (should_clamp_factor(factor, util_format_is_snorm(format)))
233 f = nir_fsat_to_format(b, f, format);
234
235 return nir_fmul(b, raw_scalar, f);
236 }
237
238 /* Given a colormask, "blend" with the destination */
239
240 static nir_def *
nir_color_mask(nir_builder * b,unsigned mask,nir_def * src,nir_def * dst)241 nir_color_mask(
242 nir_builder *b,
243 unsigned mask,
244 nir_def *src,
245 nir_def *dst)
246 {
247 return nir_vec4(b,
248 nir_channel(b, (mask & (1 << 0)) ? src : dst, 0),
249 nir_channel(b, (mask & (1 << 1)) ? src : dst, 1),
250 nir_channel(b, (mask & (1 << 2)) ? src : dst, 2),
251 nir_channel(b, (mask & (1 << 3)) ? src : dst, 3));
252 }
253
254 static nir_def *
nir_logicop_func(nir_builder * b,enum pipe_logicop func,nir_def * src,nir_def * dst,nir_def * bitmask)255 nir_logicop_func(
256 nir_builder *b,
257 enum pipe_logicop func,
258 nir_def *src, nir_def *dst, nir_def *bitmask)
259 {
260 switch (func) {
261 case PIPE_LOGICOP_CLEAR:
262 return nir_imm_ivec4(b, 0, 0, 0, 0);
263 case PIPE_LOGICOP_NOR:
264 return nir_ixor(b, nir_ior(b, src, dst), bitmask);
265 case PIPE_LOGICOP_AND_INVERTED:
266 return nir_iand(b, nir_ixor(b, src, bitmask), dst);
267 case PIPE_LOGICOP_COPY_INVERTED:
268 return nir_ixor(b, src, bitmask);
269 case PIPE_LOGICOP_AND_REVERSE:
270 return nir_iand(b, src, nir_ixor(b, dst, bitmask));
271 case PIPE_LOGICOP_INVERT:
272 return nir_ixor(b, dst, bitmask);
273 case PIPE_LOGICOP_XOR:
274 return nir_ixor(b, src, dst);
275 case PIPE_LOGICOP_NAND:
276 return nir_ixor(b, nir_iand(b, src, dst), bitmask);
277 case PIPE_LOGICOP_AND:
278 return nir_iand(b, src, dst);
279 case PIPE_LOGICOP_EQUIV:
280 return nir_ixor(b, nir_ixor(b, src, dst), bitmask);
281 case PIPE_LOGICOP_NOOP:
282 unreachable("optimized out");
283 case PIPE_LOGICOP_OR_INVERTED:
284 return nir_ior(b, nir_ixor(b, src, bitmask), dst);
285 case PIPE_LOGICOP_COPY:
286 return src;
287 case PIPE_LOGICOP_OR_REVERSE:
288 return nir_ior(b, src, nir_ixor(b, dst, bitmask));
289 case PIPE_LOGICOP_OR:
290 return nir_ior(b, src, dst);
291 case PIPE_LOGICOP_SET:
292 return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
293 }
294
295 unreachable("Invalid logciop function");
296 }
297
298 static nir_def *
nir_blend_logicop(nir_builder * b,const nir_lower_blend_options * options,unsigned rt,nir_def * src,nir_def * dst)299 nir_blend_logicop(
300 nir_builder *b,
301 const nir_lower_blend_options *options,
302 unsigned rt,
303 nir_def *src, nir_def *dst)
304 {
305 unsigned bit_size = src->bit_size;
306
307 enum pipe_format format = options->format[rt];
308 const struct util_format_description *format_desc =
309 util_format_description(format);
310
311 /* From section 17.3.9 ("Logical Operation") of the OpenGL 4.6 core spec:
312 *
313 * Logical operation has no effect on a floating-point destination color
314 * buffer, or when FRAMEBUFFER_SRGB is enabled and the value of
315 * FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING for the framebuffer attachment
316 * corresponding to the destination buffer is SRGB (see section 9.2.3).
317 * However, if logical operation is enabled, blending is still disabled.
318 */
319 if (util_format_is_float(format) || util_format_is_srgb(format))
320 return src;
321
322 if (bit_size != 32) {
323 src = nir_f2f32(b, src);
324 dst = nir_f2f32(b, dst);
325 }
326
327 assert(src->num_components <= 4);
328 assert(dst->num_components <= 4);
329
330 unsigned bits[4];
331 for (int i = 0; i < 4; ++i)
332 bits[i] = format_desc->channel[i].size;
333
334 if (util_format_is_unorm(format)) {
335 src = nir_format_float_to_unorm(b, src, bits);
336 dst = nir_format_float_to_unorm(b, dst, bits);
337 } else if (util_format_is_snorm(format)) {
338 src = nir_format_float_to_snorm(b, src, bits);
339 dst = nir_format_float_to_snorm(b, dst, bits);
340 } else {
341 assert(util_format_is_pure_integer(format));
342 }
343
344 nir_const_value mask[4];
345 for (int i = 0; i < 4; ++i)
346 mask[i] = nir_const_value_for_uint(BITFIELD_MASK(bits[i]), 32);
347
348 nir_def *out = nir_logicop_func(b, options->logicop_func, src, dst,
349 nir_build_imm(b, 4, 32, mask));
350
351 if (util_format_is_unorm(format)) {
352 out = nir_format_unorm_to_float(b, out, bits);
353 } else if (util_format_is_snorm(format)) {
354 /* Sign extend before converting so the i2f in snorm_to_float works */
355 out = nir_format_sign_extend_ivec(b, out, bits);
356 out = nir_format_snorm_to_float(b, out, bits);
357 } else {
358 assert(util_format_is_pure_integer(format));
359 }
360
361 if (bit_size == 16)
362 out = nir_f2f16(b, out);
363
364 return out;
365 }
366
367 static bool
channel_exists(const struct util_format_description * desc,unsigned i)368 channel_exists(const struct util_format_description *desc, unsigned i)
369 {
370 return (i < desc->nr_channels) &&
371 desc->channel[i].type != UTIL_FORMAT_TYPE_VOID;
372 }
373
374 /* Given a blend state, the source color, and the destination color,
375 * return the blended color
376 */
377
378 static nir_def *
nir_blend(nir_builder * b,const nir_lower_blend_options * options,unsigned rt,nir_def * src,nir_def * src1,nir_def * dst)379 nir_blend(
380 nir_builder *b,
381 const nir_lower_blend_options *options,
382 unsigned rt,
383 nir_def *src, nir_def *src1, nir_def *dst)
384 {
385 /* Don't crash if src1 isn't written. It doesn't matter what dual colour we
386 * blend with in that case, as long as we don't dereference NULL.
387 */
388 if (!src1)
389 src1 = nir_imm_zero(b, 4, src->bit_size);
390
391 /* Grab the blend constant ahead of time */
392 nir_def *bconst;
393 if (options->scalar_blend_const) {
394 bconst = nir_vec4(b,
395 nir_load_blend_const_color_r_float(b),
396 nir_load_blend_const_color_g_float(b),
397 nir_load_blend_const_color_b_float(b),
398 nir_load_blend_const_color_a_float(b));
399 } else {
400 bconst = nir_load_blend_const_color_rgba(b);
401 }
402
403 if (src->bit_size == 16) {
404 bconst = nir_f2f16(b, bconst);
405 src1 = nir_f2f16(b, src1);
406 }
407
408 /* Fixed-point framebuffers require their inputs clamped. */
409 enum pipe_format format = options->format[rt];
410
411 /* From section 17.3.6 "Blending" of the OpenGL 4.5 spec:
412 *
413 * If the color buffer is fixed-point, the components of the source and
414 * destination values and blend factors are each clamped to [0, 1] or
415 * [-1, 1] respectively for an unsigned normalized or signed normalized
416 * color buffer prior to evaluating the blend equation. If the color
417 * buffer is floating-point, no clamping occurs.
418 *
419 * Blend factors are clamped at the time of their use to ensure we properly
420 * clamp negative constant colours with signed normalized formats and
421 * ONE_MINUS_CONSTANT_* factors. Notice that -1 is in [-1, 1] but 1 - (-1) =
422 * 2 is not in [-1, 1] and should be clamped to 1.
423 */
424 src = nir_fsat_to_format(b, src, format);
425
426 if (src1)
427 src1 = nir_fsat_to_format(b, src1, format);
428
429 /* DST_ALPHA reads back 1.0 if there is no alpha channel */
430 const struct util_format_description *desc =
431 util_format_description(format);
432
433 nir_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size);
434 nir_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size);
435
436 dst = nir_vec4(b,
437 channel_exists(desc, 0) ? nir_channel(b, dst, 0) : zero,
438 channel_exists(desc, 1) ? nir_channel(b, dst, 1) : zero,
439 channel_exists(desc, 2) ? nir_channel(b, dst, 2) : zero,
440 channel_exists(desc, 3) ? nir_channel(b, dst, 3) : one);
441
442 /* We blend per channel and recombine later */
443 nir_def *channels[4];
444
445 for (unsigned c = 0; c < 4; ++c) {
446 /* Decide properties based on channel */
447 nir_lower_blend_channel chan =
448 (c < 3) ? options->rt[rt].rgb : options->rt[rt].alpha;
449
450 nir_def *psrc = nir_channel(b, src, c);
451 nir_def *pdst = nir_channel(b, dst, c);
452
453 if (nir_blend_factored(chan.func)) {
454 psrc = nir_blend_factor(
455 b, psrc,
456 src, src1, dst, bconst, c,
457 chan.src_factor, format);
458
459 pdst = nir_blend_factor(
460 b, pdst,
461 src, src1, dst, bconst, c,
462 chan.dst_factor, format);
463 }
464
465 channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
466 }
467
468 return nir_vec(b, channels, 4);
469 }
470
471 static int
color_index_for_location(unsigned location)472 color_index_for_location(unsigned location)
473 {
474 assert(location != FRAG_RESULT_COLOR &&
475 "gl_FragColor must be lowered before nir_lower_blend");
476
477 if (location < FRAG_RESULT_DATA0)
478 return -1;
479 else
480 return location - FRAG_RESULT_DATA0;
481 }
482
483 /*
484 * Test if the blending options for a given channel encode the "replace" blend
485 * mode: dest = source. In this case, blending may be specially optimized.
486 */
487 static bool
nir_blend_replace_channel(const nir_lower_blend_channel * c)488 nir_blend_replace_channel(const nir_lower_blend_channel *c)
489 {
490 return (c->func == PIPE_BLEND_ADD) &&
491 (c->src_factor == PIPE_BLENDFACTOR_ONE) &&
492 (c->dst_factor == PIPE_BLENDFACTOR_ZERO);
493 }
494
495 static bool
nir_blend_replace_rt(const nir_lower_blend_rt * rt)496 nir_blend_replace_rt(const nir_lower_blend_rt *rt)
497 {
498 return nir_blend_replace_channel(&rt->rgb) &&
499 nir_blend_replace_channel(&rt->alpha);
500 }
501
502 static bool
nir_lower_blend_instr(nir_builder * b,nir_intrinsic_instr * store,void * data)503 nir_lower_blend_instr(nir_builder *b, nir_intrinsic_instr *store, void *data)
504 {
505 struct ctx *ctx = data;
506 const nir_lower_blend_options *options = ctx->options;
507 if (store->intrinsic != nir_intrinsic_store_output)
508 return false;
509
510 nir_io_semantics sem = nir_intrinsic_io_semantics(store);
511 int rt = color_index_for_location(sem.location);
512
513 /* No blend lowering requested on this RT */
514 if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE)
515 return false;
516
517 /* Only process stores once. Pass flags are cleared by consume_dual_stores */
518 if (store->instr.pass_flags)
519 return false;
520
521 store->instr.pass_flags = 1;
522
523 /* Store are sunk to the bottom of the block to ensure that the dual
524 * source colour is already written.
525 */
526 b->cursor = nir_after_block(store->instr.block);
527
528 /* Don't bother copying the destination to the source for disabled RTs */
529 if (options->rt[rt].colormask == 0 ||
530 (options->logicop_enable && options->logicop_func == PIPE_LOGICOP_NOOP)) {
531
532 nir_instr_remove(&store->instr);
533 return true;
534 }
535
536 /* Grab the input color. We always want 4 channels during blend. Dead
537 * code will clean up any channels we don't need.
538 */
539 nir_def *src = nir_pad_vector(b, store->src[0].ssa, 4);
540
541 assert(nir_src_as_uint(store->src[1]) == 0 && "store_output invariant");
542
543 /* Grab the previous fragment color if we need it */
544 nir_def *dst;
545
546 if (channel_uses_dest(options->rt[rt].rgb) ||
547 channel_uses_dest(options->rt[rt].alpha) ||
548 options->logicop_enable ||
549 options->rt[rt].colormask != BITFIELD_MASK(4)) {
550
551 b->shader->info.outputs_read |= BITFIELD64_BIT(sem.location);
552 b->shader->info.fs.uses_fbfetch_output = true;
553 b->shader->info.fs.uses_sample_shading = true;
554 sem.fb_fetch_output = true;
555
556 dst = nir_load_output(b, 4, nir_src_bit_size(store->src[0]),
557 nir_imm_int(b, 0),
558 .dest_type = nir_intrinsic_src_type(store),
559 .io_semantics = sem);
560 } else {
561 dst = nir_undef(b, 4, nir_src_bit_size(store->src[0]));
562 }
563
564 /* Blend the two colors per the passed options. We only call nir_blend if
565 * blending is enabled with a blend mode other than replace (independent of
566 * the color mask). That avoids unnecessary fsat instructions in the common
567 * case where blending is disabled at an API level, but the driver calls
568 * nir_blend (possibly for color masking).
569 */
570 nir_def *blended = src;
571
572 if (options->logicop_enable) {
573 blended = nir_blend_logicop(b, options, rt, src, dst);
574 } else if (!util_format_is_pure_integer(options->format[rt]) &&
575 !nir_blend_replace_rt(&options->rt[rt])) {
576 assert(!util_format_is_scaled(options->format[rt]));
577 blended = nir_blend(b, options, rt, src, ctx->src1[rt], dst);
578 }
579
580 /* Apply a colormask if necessary */
581 if (options->rt[rt].colormask != BITFIELD_MASK(4))
582 blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst);
583
584 const unsigned num_components =
585 util_format_get_nr_components(options->format[rt]);
586
587 /* Shave off any components we don't want to store */
588 blended = nir_trim_vector(b, blended, num_components);
589
590 /* Grow or shrink the store destination as needed */
591 store->num_components = num_components;
592 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(store) &
593 nir_component_mask(num_components));
594
595 /* Write out the final color instead of the input */
596 nir_src_rewrite(&store->src[0], blended);
597
598 /* Sink to bottom */
599 nir_instr_remove(&store->instr);
600 nir_builder_instr_insert(b, &store->instr);
601 return true;
602 }
603
604 /*
605 * Dual-source colours are only for blending, so when nir_lower_blend is used,
606 * the dual source store_output is for us (only). Remove dual stores so the
607 * backend doesn't have to deal with them, collecting the sources for blending.
608 */
609 static bool
consume_dual_stores(nir_builder * b,nir_intrinsic_instr * store,void * data)610 consume_dual_stores(nir_builder *b, nir_intrinsic_instr *store, void *data)
611 {
612 nir_def **outputs = data;
613 if (store->intrinsic != nir_intrinsic_store_output)
614 return false;
615
616 /* While we're here, clear the pass flags for store_outputs, since we'll set
617 * them later.
618 */
619 store->instr.pass_flags = 0;
620
621 nir_io_semantics sem = nir_intrinsic_io_semantics(store);
622 if (sem.dual_source_blend_index == 0)
623 return false;
624
625 int rt = color_index_for_location(sem.location);
626 assert(rt >= 0 && rt < 8 && "bounds for dual-source blending");
627
628 outputs[rt] = store->src[0].ssa;
629 nir_instr_remove(&store->instr);
630 return true;
631 }
632
633 /** Lower blending to framebuffer fetch and some math
634 *
635 * This pass requires that shader I/O is lowered to explicit load/store
636 * instructions using nir_lower_io.
637 */
638 bool
nir_lower_blend(nir_shader * shader,const nir_lower_blend_options * options)639 nir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options)
640 {
641 assert(shader->info.stage == MESA_SHADER_FRAGMENT);
642
643 struct ctx ctx = { .options = options };
644 bool progress = nir_shader_intrinsics_pass(shader, consume_dual_stores,
645 nir_metadata_block_index |
646 nir_metadata_dominance,
647 ctx.src1);
648
649 progress |= nir_shader_intrinsics_pass(shader, nir_lower_blend_instr,
650 nir_metadata_block_index |
651 nir_metadata_dominance,
652 &ctx);
653 return progress;
654 }
655