1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * Implements most of the fixed function fragment pipeline in shader code.
26 *
27 * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
28 * or color mask. Instead, you read the current contents of the destination
29 * from the tile buffer after having waited for the scoreboard (which is
30 * handled by vc4_qpu_emit.c), then do math using your output color and that
31 * destination value, and update the output color appropriately.
32 *
33 * Once this pass is done, the color write will either have one component (for
34 * single sample) with packed argb8888, or 4 components with the per-sample
35 * argb8888 result.
36 */
37
38 /**
39 * Lowers fixed-function blending to a load of the destination color and a
40 * series of ALU operations before the store of the output.
41 */
42 #include "util/format/u_format.h"
43 #include "vc4_qir.h"
44 #include "compiler/nir/nir_builder.h"
45 #include "compiler/nir/nir_format_convert.h"
46 #include "vc4_context.h"
47
48 static bool
blend_depends_on_dst_color(struct vc4_compile * c)49 blend_depends_on_dst_color(struct vc4_compile *c)
50 {
51 return (c->fs_key->blend.blend_enable ||
52 c->fs_key->blend.colormask != 0xf ||
53 c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
54 }
55
56 /** Emits a load of the previous fragment color from the tile buffer. */
57 static nir_ssa_def *
vc4_nir_get_dst_color(nir_builder * b,int sample)58 vc4_nir_get_dst_color(nir_builder *b, int sample)
59 {
60 return nir_load_input(b, 1, 32, nir_imm_int(b, 0),
61 .base = VC4_NIR_TLB_COLOR_READ_INPUT + sample);
62 }
63
64 static nir_ssa_def *
vc4_blend_channel_f(nir_builder * b,nir_ssa_def ** src,nir_ssa_def ** dst,unsigned factor,int channel)65 vc4_blend_channel_f(nir_builder *b,
66 nir_ssa_def **src,
67 nir_ssa_def **dst,
68 unsigned factor,
69 int channel)
70 {
71 switch(factor) {
72 case PIPE_BLENDFACTOR_ONE:
73 return nir_imm_float(b, 1.0);
74 case PIPE_BLENDFACTOR_SRC_COLOR:
75 return src[channel];
76 case PIPE_BLENDFACTOR_SRC_ALPHA:
77 return src[3];
78 case PIPE_BLENDFACTOR_DST_ALPHA:
79 return dst[3];
80 case PIPE_BLENDFACTOR_DST_COLOR:
81 return dst[channel];
82 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
83 if (channel != 3) {
84 return nir_fmin(b,
85 src[3],
86 nir_fsub(b,
87 nir_imm_float(b, 1.0),
88 dst[3]));
89 } else {
90 return nir_imm_float(b, 1.0);
91 }
92 case PIPE_BLENDFACTOR_CONST_COLOR:
93 return nir_load_system_value(b,
94 nir_intrinsic_load_blend_const_color_r_float +
95 channel,
96 0, 1, 32);
97 case PIPE_BLENDFACTOR_CONST_ALPHA:
98 return nir_load_blend_const_color_a_float(b);
99 case PIPE_BLENDFACTOR_ZERO:
100 return nir_imm_float(b, 0.0);
101 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
102 return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
103 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
104 return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
105 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
106 return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
107 case PIPE_BLENDFACTOR_INV_DST_COLOR:
108 return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
109 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
110 return nir_fsub(b, nir_imm_float(b, 1.0),
111 nir_load_system_value(b,
112 nir_intrinsic_load_blend_const_color_r_float +
113 channel,
114 0, 1, 32));
115 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
116 return nir_fsub(b, nir_imm_float(b, 1.0),
117 nir_load_blend_const_color_a_float(b));
118
119 default:
120 case PIPE_BLENDFACTOR_SRC1_COLOR:
121 case PIPE_BLENDFACTOR_SRC1_ALPHA:
122 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
123 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
124 /* Unsupported. */
125 fprintf(stderr, "Unknown blend factor %d\n", factor);
126 return nir_imm_float(b, 1.0);
127 }
128 }
129
130 static nir_ssa_def *
vc4_nir_set_packed_chan(nir_builder * b,nir_ssa_def * src0,nir_ssa_def * src1,int chan)131 vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
132 int chan)
133 {
134 unsigned chan_mask = 0xff << (chan * 8);
135 return nir_ior(b,
136 nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
137 nir_iand(b, src1, nir_imm_int(b, chan_mask)));
138 }
139
140 static nir_ssa_def *
vc4_blend_channel_i(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,nir_ssa_def * src_a,nir_ssa_def * dst_a,unsigned factor,int a_chan)141 vc4_blend_channel_i(nir_builder *b,
142 nir_ssa_def *src,
143 nir_ssa_def *dst,
144 nir_ssa_def *src_a,
145 nir_ssa_def *dst_a,
146 unsigned factor,
147 int a_chan)
148 {
149 switch (factor) {
150 case PIPE_BLENDFACTOR_ONE:
151 return nir_imm_int(b, ~0);
152 case PIPE_BLENDFACTOR_SRC_COLOR:
153 return src;
154 case PIPE_BLENDFACTOR_SRC_ALPHA:
155 return src_a;
156 case PIPE_BLENDFACTOR_DST_ALPHA:
157 return dst_a;
158 case PIPE_BLENDFACTOR_DST_COLOR:
159 return dst;
160 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
161 return vc4_nir_set_packed_chan(b,
162 nir_umin_4x8_vc4(b,
163 src_a,
164 nir_inot(b, dst_a)),
165 nir_imm_int(b, ~0),
166 a_chan);
167 case PIPE_BLENDFACTOR_CONST_COLOR:
168 return nir_load_blend_const_color_rgba8888_unorm(b);
169 case PIPE_BLENDFACTOR_CONST_ALPHA:
170 return nir_load_blend_const_color_aaaa8888_unorm(b);
171 case PIPE_BLENDFACTOR_ZERO:
172 return nir_imm_int(b, 0);
173 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
174 return nir_inot(b, src);
175 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
176 return nir_inot(b, src_a);
177 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
178 return nir_inot(b, dst_a);
179 case PIPE_BLENDFACTOR_INV_DST_COLOR:
180 return nir_inot(b, dst);
181 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
182 return nir_inot(b,
183 nir_load_blend_const_color_rgba8888_unorm(b));
184 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
185 return nir_inot(b,
186 nir_load_blend_const_color_aaaa8888_unorm(b));
187
188 default:
189 case PIPE_BLENDFACTOR_SRC1_COLOR:
190 case PIPE_BLENDFACTOR_SRC1_ALPHA:
191 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
192 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
193 /* Unsupported. */
194 fprintf(stderr, "Unknown blend factor %d\n", factor);
195 return nir_imm_int(b, ~0);
196 }
197 }
198
199 static nir_ssa_def *
vc4_blend_func_f(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,unsigned func)200 vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
201 unsigned func)
202 {
203 switch (func) {
204 case PIPE_BLEND_ADD:
205 return nir_fadd(b, src, dst);
206 case PIPE_BLEND_SUBTRACT:
207 return nir_fsub(b, src, dst);
208 case PIPE_BLEND_REVERSE_SUBTRACT:
209 return nir_fsub(b, dst, src);
210 case PIPE_BLEND_MIN:
211 return nir_fmin(b, src, dst);
212 case PIPE_BLEND_MAX:
213 return nir_fmax(b, src, dst);
214
215 default:
216 /* Unsupported. */
217 fprintf(stderr, "Unknown blend func %d\n", func);
218 return src;
219
220 }
221 }
222
223 static nir_ssa_def *
vc4_blend_func_i(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,unsigned func)224 vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
225 unsigned func)
226 {
227 switch (func) {
228 case PIPE_BLEND_ADD:
229 return nir_usadd_4x8_vc4(b, src, dst);
230 case PIPE_BLEND_SUBTRACT:
231 return nir_ussub_4x8_vc4(b, src, dst);
232 case PIPE_BLEND_REVERSE_SUBTRACT:
233 return nir_ussub_4x8_vc4(b, dst, src);
234 case PIPE_BLEND_MIN:
235 return nir_umin_4x8_vc4(b, src, dst);
236 case PIPE_BLEND_MAX:
237 return nir_umax_4x8_vc4(b, src, dst);
238
239 default:
240 /* Unsupported. */
241 fprintf(stderr, "Unknown blend func %d\n", func);
242 return src;
243
244 }
245 }
246
247 static void
vc4_do_blending_f(struct vc4_compile * c,nir_builder * b,nir_ssa_def ** result,nir_ssa_def ** src_color,nir_ssa_def ** dst_color)248 vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
249 nir_ssa_def **src_color, nir_ssa_def **dst_color)
250 {
251 struct pipe_rt_blend_state *blend = &c->fs_key->blend;
252
253 if (!blend->blend_enable) {
254 for (int i = 0; i < 4; i++)
255 result[i] = src_color[i];
256 return;
257 }
258
259 /* Clamp the src color to [0, 1]. Dest is already clamped. */
260 for (int i = 0; i < 4; i++)
261 src_color[i] = nir_fsat(b, src_color[i]);
262
263 nir_ssa_def *src_blend[4], *dst_blend[4];
264 for (int i = 0; i < 4; i++) {
265 int src_factor = ((i != 3) ? blend->rgb_src_factor :
266 blend->alpha_src_factor);
267 int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
268 blend->alpha_dst_factor);
269 src_blend[i] = nir_fmul(b, src_color[i],
270 vc4_blend_channel_f(b,
271 src_color, dst_color,
272 src_factor, i));
273 dst_blend[i] = nir_fmul(b, dst_color[i],
274 vc4_blend_channel_f(b,
275 src_color, dst_color,
276 dst_factor, i));
277 }
278
279 for (int i = 0; i < 4; i++) {
280 result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
281 ((i != 3) ? blend->rgb_func :
282 blend->alpha_func));
283 }
284 }
285
286 static nir_ssa_def *
vc4_nir_splat(nir_builder * b,nir_ssa_def * src)287 vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
288 {
289 nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
290 return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
291 }
292
293 static nir_ssa_def *
vc4_do_blending_i(struct vc4_compile * c,nir_builder * b,nir_ssa_def * src_color,nir_ssa_def * dst_color,nir_ssa_def * src_float_a)294 vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
295 nir_ssa_def *src_color, nir_ssa_def *dst_color,
296 nir_ssa_def *src_float_a)
297 {
298 struct pipe_rt_blend_state *blend = &c->fs_key->blend;
299
300 if (!blend->blend_enable)
301 return src_color;
302
303 enum pipe_format color_format = c->fs_key->color_format;
304 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
305 nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
306 nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
307 nir_ssa_def *dst_a;
308 int alpha_chan;
309 for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
310 if (format_swiz[alpha_chan] == 3)
311 break;
312 }
313 if (alpha_chan != 4) {
314 nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
315 dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
316 shift), imm_0xff));
317 } else {
318 dst_a = nir_imm_int(b, ~0);
319 }
320
321 nir_ssa_def *src_factor = vc4_blend_channel_i(b,
322 src_color, dst_color,
323 src_a, dst_a,
324 blend->rgb_src_factor,
325 alpha_chan);
326 nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
327 src_color, dst_color,
328 src_a, dst_a,
329 blend->rgb_dst_factor,
330 alpha_chan);
331
332 if (alpha_chan != 4 &&
333 blend->alpha_src_factor != blend->rgb_src_factor) {
334 nir_ssa_def *src_alpha_factor =
335 vc4_blend_channel_i(b,
336 src_color, dst_color,
337 src_a, dst_a,
338 blend->alpha_src_factor,
339 alpha_chan);
340 src_factor = vc4_nir_set_packed_chan(b, src_factor,
341 src_alpha_factor,
342 alpha_chan);
343 }
344 if (alpha_chan != 4 &&
345 blend->alpha_dst_factor != blend->rgb_dst_factor) {
346 nir_ssa_def *dst_alpha_factor =
347 vc4_blend_channel_i(b,
348 src_color, dst_color,
349 src_a, dst_a,
350 blend->alpha_dst_factor,
351 alpha_chan);
352 dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
353 dst_alpha_factor,
354 alpha_chan);
355 }
356 nir_ssa_def *src_blend = nir_umul_unorm_4x8_vc4(b, src_color, src_factor);
357 nir_ssa_def *dst_blend = nir_umul_unorm_4x8_vc4(b, dst_color, dst_factor);
358
359 nir_ssa_def *result =
360 vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
361 if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
362 nir_ssa_def *result_a = vc4_blend_func_i(b,
363 src_blend,
364 dst_blend,
365 blend->alpha_func);
366 result = vc4_nir_set_packed_chan(b, result, result_a,
367 alpha_chan);
368 }
369 return result;
370 }
371
372 static nir_ssa_def *
vc4_logicop(nir_builder * b,int logicop_func,nir_ssa_def * src,nir_ssa_def * dst)373 vc4_logicop(nir_builder *b, int logicop_func,
374 nir_ssa_def *src, nir_ssa_def *dst)
375 {
376 switch (logicop_func) {
377 case PIPE_LOGICOP_CLEAR:
378 return nir_imm_int(b, 0);
379 case PIPE_LOGICOP_NOR:
380 return nir_inot(b, nir_ior(b, src, dst));
381 case PIPE_LOGICOP_AND_INVERTED:
382 return nir_iand(b, nir_inot(b, src), dst);
383 case PIPE_LOGICOP_COPY_INVERTED:
384 return nir_inot(b, src);
385 case PIPE_LOGICOP_AND_REVERSE:
386 return nir_iand(b, src, nir_inot(b, dst));
387 case PIPE_LOGICOP_INVERT:
388 return nir_inot(b, dst);
389 case PIPE_LOGICOP_XOR:
390 return nir_ixor(b, src, dst);
391 case PIPE_LOGICOP_NAND:
392 return nir_inot(b, nir_iand(b, src, dst));
393 case PIPE_LOGICOP_AND:
394 return nir_iand(b, src, dst);
395 case PIPE_LOGICOP_EQUIV:
396 return nir_inot(b, nir_ixor(b, src, dst));
397 case PIPE_LOGICOP_NOOP:
398 return dst;
399 case PIPE_LOGICOP_OR_INVERTED:
400 return nir_ior(b, nir_inot(b, src), dst);
401 case PIPE_LOGICOP_OR_REVERSE:
402 return nir_ior(b, src, nir_inot(b, dst));
403 case PIPE_LOGICOP_OR:
404 return nir_ior(b, src, dst);
405 case PIPE_LOGICOP_SET:
406 return nir_imm_int(b, ~0);
407 default:
408 fprintf(stderr, "Unknown logic op %d\n", logicop_func);
409 FALLTHROUGH;
410 case PIPE_LOGICOP_COPY:
411 return src;
412 }
413 }
414
415 static nir_ssa_def *
vc4_nir_swizzle_and_pack(struct vc4_compile * c,nir_builder * b,nir_ssa_def ** colors)416 vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
417 nir_ssa_def **colors)
418 {
419 enum pipe_format color_format = c->fs_key->color_format;
420 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
421
422 nir_ssa_def *swizzled[4];
423 for (int i = 0; i < 4; i++) {
424 swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
425 format_swiz[i]);
426 }
427
428 return nir_pack_unorm_4x8(b,
429 nir_vec4(b,
430 swizzled[0], swizzled[1],
431 swizzled[2], swizzled[3]));
432
433 }
434
435 static nir_ssa_def *
vc4_nir_blend_pipeline(struct vc4_compile * c,nir_builder * b,nir_ssa_def * src,int sample)436 vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
437 int sample)
438 {
439 enum pipe_format color_format = c->fs_key->color_format;
440 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
441 bool srgb = util_format_is_srgb(color_format);
442
443 /* Pull out the float src/dst color components. */
444 nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
445 nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
446 nir_ssa_def *src_color[4], *unpacked_dst_color[4];
447 for (unsigned i = 0; i < 4; i++) {
448 src_color[i] = nir_channel(b, src, i);
449 unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
450 }
451
452 if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
453 src_color[3] = nir_imm_float(b, 1.0);
454
455 nir_ssa_def *packed_color;
456 if (srgb) {
457 /* Unswizzle the destination color. */
458 nir_ssa_def *dst_color[4];
459 for (unsigned i = 0; i < 4; i++) {
460 dst_color[i] = vc4_nir_get_swizzled_channel(b,
461 unpacked_dst_color,
462 format_swiz[i]);
463 }
464
465 /* Turn dst color to linear. */
466 for (int i = 0; i < 3; i++)
467 dst_color[i] = nir_format_srgb_to_linear(b, dst_color[i]);
468
469 nir_ssa_def *blend_color[4];
470 vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
471
472 /* sRGB encode the output color */
473 for (int i = 0; i < 3; i++)
474 blend_color[i] = nir_format_linear_to_srgb(b, blend_color[i]);
475
476 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
477 } else {
478 nir_ssa_def *packed_src_color =
479 vc4_nir_swizzle_and_pack(c, b, src_color);
480
481 packed_color =
482 vc4_do_blending_i(c, b,
483 packed_src_color, packed_dst_color,
484 src_color[3]);
485 }
486
487 packed_color = vc4_logicop(b, c->fs_key->logicop_func,
488 packed_color, packed_dst_color);
489
490 /* If the bit isn't set in the color mask, then just return the
491 * original dst color, instead.
492 */
493 uint32_t colormask = 0xffffffff;
494 for (int i = 0; i < 4; i++) {
495 if (format_swiz[i] < 4 &&
496 !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
497 colormask &= ~(0xff << (i * 8));
498 }
499 }
500
501 return nir_ior(b,
502 nir_iand(b, packed_color,
503 nir_imm_int(b, colormask)),
504 nir_iand(b, packed_dst_color,
505 nir_imm_int(b, ~colormask)));
506 }
507
508 static void
vc4_nir_store_sample_mask(struct vc4_compile * c,nir_builder * b,nir_ssa_def * val)509 vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
510 nir_ssa_def *val)
511 {
512 nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
513 glsl_uint_type(),
514 "sample_mask");
515 sample_mask->data.driver_location = c->s->num_outputs++;
516 sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
517
518 nir_store_output(b, val, nir_imm_int(b, 0),
519 .base = sample_mask->data.driver_location);
520 }
521
522 static void
vc4_nir_lower_blend_instr(struct vc4_compile * c,nir_builder * b,nir_intrinsic_instr * intr)523 vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
524 nir_intrinsic_instr *intr)
525 {
526 nir_ssa_def *frag_color = intr->src[0].ssa;
527
528 if (c->fs_key->sample_alpha_to_coverage) {
529 nir_ssa_def *a = nir_channel(b, frag_color, 3);
530
531 /* XXX: We should do a nice dither based on the fragment
532 * coordinate, instead.
533 */
534 nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
535 nir_ssa_def *num_bits = nir_f2i32(b, nir_fmul(b, a, num_samples));
536 nir_ssa_def *bitmask = nir_isub(b,
537 nir_ishl(b,
538 nir_imm_int(b, 1),
539 num_bits),
540 nir_imm_int(b, 1));
541 vc4_nir_store_sample_mask(c, b, bitmask);
542 }
543
544 /* The TLB color read returns each sample in turn, so if our blending
545 * depends on the destination color, we're going to have to run the
546 * blending function separately for each destination sample value, and
547 * then output the per-sample color using TLB_COLOR_MS.
548 */
549 nir_ssa_def *blend_output;
550 if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
551 c->msaa_per_sample_output = true;
552
553 nir_ssa_def *samples[4];
554 for (int i = 0; i < VC4_MAX_SAMPLES; i++)
555 samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
556 blend_output = nir_vec4(b,
557 samples[0], samples[1],
558 samples[2], samples[3]);
559 } else {
560 blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
561 }
562
563 nir_instr_rewrite_src(&intr->instr, &intr->src[0],
564 nir_src_for_ssa(blend_output));
565 if (intr->num_components != blend_output->num_components) {
566 unsigned component_mask = BITFIELD_MASK(blend_output->num_components);
567 nir_intrinsic_set_write_mask(intr, component_mask);
568 intr->num_components = blend_output->num_components;
569 }
570 }
571
572 static bool
vc4_nir_lower_blend_block(nir_block * block,struct vc4_compile * c)573 vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
574 {
575 nir_foreach_instr_safe(instr, block) {
576 if (instr->type != nir_instr_type_intrinsic)
577 continue;
578 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
579 if (intr->intrinsic != nir_intrinsic_store_output)
580 continue;
581
582 nir_variable *output_var = NULL;
583 nir_foreach_shader_out_variable(var, c->s) {
584 if (var->data.driver_location ==
585 nir_intrinsic_base(intr)) {
586 output_var = var;
587 break;
588 }
589 }
590 assert(output_var);
591
592 if (output_var->data.location != FRAG_RESULT_COLOR &&
593 output_var->data.location != FRAG_RESULT_DATA0) {
594 continue;
595 }
596
597 nir_function_impl *impl =
598 nir_cf_node_get_function(&block->cf_node);
599 nir_builder b;
600 nir_builder_init(&b, impl);
601 b.cursor = nir_before_instr(&intr->instr);
602 vc4_nir_lower_blend_instr(c, &b, intr);
603 }
604 return true;
605 }
606
607 void
vc4_nir_lower_blend(nir_shader * s,struct vc4_compile * c)608 vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
609 {
610 nir_foreach_function(function, s) {
611 if (function->impl) {
612 nir_foreach_block(block, function->impl) {
613 vc4_nir_lower_blend_block(block, c);
614 }
615
616 nir_metadata_preserve(function->impl,
617 nir_metadata_block_index |
618 nir_metadata_dominance);
619 }
620 }
621
622 /* If we didn't do alpha-to-coverage on the output color, we still
623 * need to pass glSampleMask() through.
624 */
625 if (c->fs_key->sample_coverage && !c->fs_key->sample_alpha_to_coverage) {
626 nir_function_impl *impl = nir_shader_get_entrypoint(s);
627 nir_builder b;
628 nir_builder_init(&b, impl);
629 b.cursor = nir_after_block(nir_impl_last_block(impl));
630
631 vc4_nir_store_sample_mask(c, &b, nir_load_sample_mask_in(&b));
632 }
633 }
634