• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * Implements most of the fixed function fragment pipeline in shader code.
26  *
27  * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
28  * or color mask.  Instead, you read the current contents of the destination
29  * from the tile buffer after having waited for the scoreboard (which is
30  * handled by vc4_qpu_emit.c), then do math using your output color and that
31  * destination value, and update the output color appropriately.
32  *
33  * Once this pass is done, the color write will either have one component (for
34  * single sample) with packed argb8888, or 4 components with the per-sample
35  * argb8888 result.
36  */
37 
38 /**
39  * Lowers fixed-function blending to a load of the destination color and a
40  * series of ALU operations before the store of the output.
41  */
42 #include "util/format/u_format.h"
43 #include "vc4_qir.h"
44 #include "compiler/nir/nir_builder.h"
45 #include "compiler/nir/nir_format_convert.h"
46 #include "vc4_context.h"
47 
48 static bool
blend_depends_on_dst_color(struct vc4_compile * c)49 blend_depends_on_dst_color(struct vc4_compile *c)
50 {
51         return (c->fs_key->blend.blend_enable ||
52                 c->fs_key->blend.colormask != 0xf ||
53                 c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
54 }
55 
56 /** Emits a load of the previous fragment color from the tile buffer. */
57 static nir_ssa_def *
vc4_nir_get_dst_color(nir_builder * b,int sample)58 vc4_nir_get_dst_color(nir_builder *b, int sample)
59 {
60         nir_intrinsic_instr *load =
61                 nir_intrinsic_instr_create(b->shader,
62                                            nir_intrinsic_load_input);
63         load->num_components = 1;
64         nir_intrinsic_set_base(load, VC4_NIR_TLB_COLOR_READ_INPUT + sample);
65         load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
66         nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
67         nir_builder_instr_insert(b, &load->instr);
68         return &load->dest.ssa;
69 }
70 
71 static nir_ssa_def *
vc4_blend_channel_f(nir_builder * b,nir_ssa_def ** src,nir_ssa_def ** dst,unsigned factor,int channel)72 vc4_blend_channel_f(nir_builder *b,
73                     nir_ssa_def **src,
74                     nir_ssa_def **dst,
75                     unsigned factor,
76                     int channel)
77 {
78         switch(factor) {
79         case PIPE_BLENDFACTOR_ONE:
80                 return nir_imm_float(b, 1.0);
81         case PIPE_BLENDFACTOR_SRC_COLOR:
82                 return src[channel];
83         case PIPE_BLENDFACTOR_SRC_ALPHA:
84                 return src[3];
85         case PIPE_BLENDFACTOR_DST_ALPHA:
86                 return dst[3];
87         case PIPE_BLENDFACTOR_DST_COLOR:
88                 return dst[channel];
89         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
90                 if (channel != 3) {
91                         return nir_fmin(b,
92                                         src[3],
93                                         nir_fsub(b,
94                                                  nir_imm_float(b, 1.0),
95                                                  dst[3]));
96                 } else {
97                         return nir_imm_float(b, 1.0);
98                 }
99         case PIPE_BLENDFACTOR_CONST_COLOR:
100                 return nir_load_system_value(b,
101                                              nir_intrinsic_load_blend_const_color_r_float +
102                                              channel,
103                                              0, 1, 32);
104         case PIPE_BLENDFACTOR_CONST_ALPHA:
105                 return nir_load_blend_const_color_a_float(b);
106         case PIPE_BLENDFACTOR_ZERO:
107                 return nir_imm_float(b, 0.0);
108         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
109                 return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
110         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
111                 return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
112         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
113                 return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
114         case PIPE_BLENDFACTOR_INV_DST_COLOR:
115                 return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
116         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
117                 return nir_fsub(b, nir_imm_float(b, 1.0),
118                                 nir_load_system_value(b,
119                                                       nir_intrinsic_load_blend_const_color_r_float +
120                                                       channel,
121                                                       0, 1, 32));
122         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
123                 return nir_fsub(b, nir_imm_float(b, 1.0),
124                                 nir_load_blend_const_color_a_float(b));
125 
126         default:
127         case PIPE_BLENDFACTOR_SRC1_COLOR:
128         case PIPE_BLENDFACTOR_SRC1_ALPHA:
129         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
130         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
131                 /* Unsupported. */
132                 fprintf(stderr, "Unknown blend factor %d\n", factor);
133                 return nir_imm_float(b, 1.0);
134         }
135 }
136 
137 static nir_ssa_def *
vc4_nir_set_packed_chan(nir_builder * b,nir_ssa_def * src0,nir_ssa_def * src1,int chan)138 vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
139                         int chan)
140 {
141         unsigned chan_mask = 0xff << (chan * 8);
142         return nir_ior(b,
143                        nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
144                        nir_iand(b, src1, nir_imm_int(b, chan_mask)));
145 }
146 
147 static nir_ssa_def *
vc4_blend_channel_i(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,nir_ssa_def * src_a,nir_ssa_def * dst_a,unsigned factor,int a_chan)148 vc4_blend_channel_i(nir_builder *b,
149                     nir_ssa_def *src,
150                     nir_ssa_def *dst,
151                     nir_ssa_def *src_a,
152                     nir_ssa_def *dst_a,
153                     unsigned factor,
154                     int a_chan)
155 {
156         switch (factor) {
157         case PIPE_BLENDFACTOR_ONE:
158                 return nir_imm_int(b, ~0);
159         case PIPE_BLENDFACTOR_SRC_COLOR:
160                 return src;
161         case PIPE_BLENDFACTOR_SRC_ALPHA:
162                 return src_a;
163         case PIPE_BLENDFACTOR_DST_ALPHA:
164                 return dst_a;
165         case PIPE_BLENDFACTOR_DST_COLOR:
166                 return dst;
167         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
168                 return vc4_nir_set_packed_chan(b,
169                                                nir_umin_4x8(b,
170                                                             src_a,
171                                                             nir_inot(b, dst_a)),
172                                                nir_imm_int(b, ~0),
173                                                a_chan);
174         case PIPE_BLENDFACTOR_CONST_COLOR:
175                 return nir_load_blend_const_color_rgba8888_unorm(b);
176         case PIPE_BLENDFACTOR_CONST_ALPHA:
177                 return nir_load_blend_const_color_aaaa8888_unorm(b);
178         case PIPE_BLENDFACTOR_ZERO:
179                 return nir_imm_int(b, 0);
180         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
181                 return nir_inot(b, src);
182         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
183                 return nir_inot(b, src_a);
184         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
185                 return nir_inot(b, dst_a);
186         case PIPE_BLENDFACTOR_INV_DST_COLOR:
187                 return nir_inot(b, dst);
188         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
189                 return nir_inot(b,
190                                 nir_load_blend_const_color_rgba8888_unorm(b));
191         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
192                 return nir_inot(b,
193                                 nir_load_blend_const_color_aaaa8888_unorm(b));
194 
195         default:
196         case PIPE_BLENDFACTOR_SRC1_COLOR:
197         case PIPE_BLENDFACTOR_SRC1_ALPHA:
198         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
199         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
200                 /* Unsupported. */
201                 fprintf(stderr, "Unknown blend factor %d\n", factor);
202                 return nir_imm_int(b, ~0);
203         }
204 }
205 
206 static nir_ssa_def *
vc4_blend_func_f(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,unsigned func)207 vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
208                  unsigned func)
209 {
210         switch (func) {
211         case PIPE_BLEND_ADD:
212                 return nir_fadd(b, src, dst);
213         case PIPE_BLEND_SUBTRACT:
214                 return nir_fsub(b, src, dst);
215         case PIPE_BLEND_REVERSE_SUBTRACT:
216                 return nir_fsub(b, dst, src);
217         case PIPE_BLEND_MIN:
218                 return nir_fmin(b, src, dst);
219         case PIPE_BLEND_MAX:
220                 return nir_fmax(b, src, dst);
221 
222         default:
223                 /* Unsupported. */
224                 fprintf(stderr, "Unknown blend func %d\n", func);
225                 return src;
226 
227         }
228 }
229 
230 static nir_ssa_def *
vc4_blend_func_i(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,unsigned func)231 vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
232                  unsigned func)
233 {
234         switch (func) {
235         case PIPE_BLEND_ADD:
236                 return nir_usadd_4x8(b, src, dst);
237         case PIPE_BLEND_SUBTRACT:
238                 return nir_ussub_4x8(b, src, dst);
239         case PIPE_BLEND_REVERSE_SUBTRACT:
240                 return nir_ussub_4x8(b, dst, src);
241         case PIPE_BLEND_MIN:
242                 return nir_umin_4x8(b, src, dst);
243         case PIPE_BLEND_MAX:
244                 return nir_umax_4x8(b, src, dst);
245 
246         default:
247                 /* Unsupported. */
248                 fprintf(stderr, "Unknown blend func %d\n", func);
249                 return src;
250 
251         }
252 }
253 
254 static void
vc4_do_blending_f(struct vc4_compile * c,nir_builder * b,nir_ssa_def ** result,nir_ssa_def ** src_color,nir_ssa_def ** dst_color)255 vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
256                   nir_ssa_def **src_color, nir_ssa_def **dst_color)
257 {
258         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
259 
260         if (!blend->blend_enable) {
261                 for (int i = 0; i < 4; i++)
262                         result[i] = src_color[i];
263                 return;
264         }
265 
266         /* Clamp the src color to [0, 1].  Dest is already clamped. */
267         for (int i = 0; i < 4; i++)
268                 src_color[i] = nir_fsat(b, src_color[i]);
269 
270         nir_ssa_def *src_blend[4], *dst_blend[4];
271         for (int i = 0; i < 4; i++) {
272                 int src_factor = ((i != 3) ? blend->rgb_src_factor :
273                                   blend->alpha_src_factor);
274                 int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
275                                   blend->alpha_dst_factor);
276                 src_blend[i] = nir_fmul(b, src_color[i],
277                                         vc4_blend_channel_f(b,
278                                                             src_color, dst_color,
279                                                             src_factor, i));
280                 dst_blend[i] = nir_fmul(b, dst_color[i],
281                                         vc4_blend_channel_f(b,
282                                                             src_color, dst_color,
283                                                             dst_factor, i));
284         }
285 
286         for (int i = 0; i < 4; i++) {
287                 result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
288                                              ((i != 3) ? blend->rgb_func :
289                                               blend->alpha_func));
290         }
291 }
292 
293 static nir_ssa_def *
vc4_nir_splat(nir_builder * b,nir_ssa_def * src)294 vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
295 {
296         nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
297         return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
298 }
299 
300 static nir_ssa_def *
vc4_do_blending_i(struct vc4_compile * c,nir_builder * b,nir_ssa_def * src_color,nir_ssa_def * dst_color,nir_ssa_def * src_float_a)301 vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
302                   nir_ssa_def *src_color, nir_ssa_def *dst_color,
303                   nir_ssa_def *src_float_a)
304 {
305         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
306 
307         if (!blend->blend_enable)
308                 return src_color;
309 
310         enum pipe_format color_format = c->fs_key->color_format;
311         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
312         nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
313         nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
314         nir_ssa_def *dst_a;
315         int alpha_chan;
316         for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
317                 if (format_swiz[alpha_chan] == 3)
318                         break;
319         }
320         if (alpha_chan != 4) {
321                 nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
322                 dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
323                                                               shift), imm_0xff));
324         } else {
325                 dst_a = nir_imm_int(b, ~0);
326         }
327 
328         nir_ssa_def *src_factor = vc4_blend_channel_i(b,
329                                                       src_color, dst_color,
330                                                       src_a, dst_a,
331                                                       blend->rgb_src_factor,
332                                                       alpha_chan);
333         nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
334                                                       src_color, dst_color,
335                                                       src_a, dst_a,
336                                                       blend->rgb_dst_factor,
337                                                       alpha_chan);
338 
339         if (alpha_chan != 4 &&
340             blend->alpha_src_factor != blend->rgb_src_factor) {
341                 nir_ssa_def *src_alpha_factor =
342                         vc4_blend_channel_i(b,
343                                             src_color, dst_color,
344                                             src_a, dst_a,
345                                             blend->alpha_src_factor,
346                                             alpha_chan);
347                 src_factor = vc4_nir_set_packed_chan(b, src_factor,
348                                                      src_alpha_factor,
349                                                      alpha_chan);
350         }
351         if (alpha_chan != 4 &&
352             blend->alpha_dst_factor != blend->rgb_dst_factor) {
353                 nir_ssa_def *dst_alpha_factor =
354                         vc4_blend_channel_i(b,
355                                             src_color, dst_color,
356                                             src_a, dst_a,
357                                             blend->alpha_dst_factor,
358                                             alpha_chan);
359                 dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
360                                                      dst_alpha_factor,
361                                                      alpha_chan);
362         }
363         nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor);
364         nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor);
365 
366         nir_ssa_def *result =
367                 vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
368         if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
369                 nir_ssa_def *result_a = vc4_blend_func_i(b,
370                                                          src_blend,
371                                                          dst_blend,
372                                                          blend->alpha_func);
373                 result = vc4_nir_set_packed_chan(b, result, result_a,
374                                                  alpha_chan);
375         }
376         return result;
377 }
378 
379 static nir_ssa_def *
vc4_logicop(nir_builder * b,int logicop_func,nir_ssa_def * src,nir_ssa_def * dst)380 vc4_logicop(nir_builder *b, int logicop_func,
381             nir_ssa_def *src, nir_ssa_def *dst)
382 {
383         switch (logicop_func) {
384         case PIPE_LOGICOP_CLEAR:
385                 return nir_imm_int(b, 0);
386         case PIPE_LOGICOP_NOR:
387                 return nir_inot(b, nir_ior(b, src, dst));
388         case PIPE_LOGICOP_AND_INVERTED:
389                 return nir_iand(b, nir_inot(b, src), dst);
390         case PIPE_LOGICOP_COPY_INVERTED:
391                 return nir_inot(b, src);
392         case PIPE_LOGICOP_AND_REVERSE:
393                 return nir_iand(b, src, nir_inot(b, dst));
394         case PIPE_LOGICOP_INVERT:
395                 return nir_inot(b, dst);
396         case PIPE_LOGICOP_XOR:
397                 return nir_ixor(b, src, dst);
398         case PIPE_LOGICOP_NAND:
399                 return nir_inot(b, nir_iand(b, src, dst));
400         case PIPE_LOGICOP_AND:
401                 return nir_iand(b, src, dst);
402         case PIPE_LOGICOP_EQUIV:
403                 return nir_inot(b, nir_ixor(b, src, dst));
404         case PIPE_LOGICOP_NOOP:
405                 return dst;
406         case PIPE_LOGICOP_OR_INVERTED:
407                 return nir_ior(b, nir_inot(b, src), dst);
408         case PIPE_LOGICOP_OR_REVERSE:
409                 return nir_ior(b, src, nir_inot(b, dst));
410         case PIPE_LOGICOP_OR:
411                 return nir_ior(b, src, dst);
412         case PIPE_LOGICOP_SET:
413                 return nir_imm_int(b, ~0);
414         default:
415                 fprintf(stderr, "Unknown logic op %d\n", logicop_func);
416                 /* FALLTHROUGH */
417         case PIPE_LOGICOP_COPY:
418                 return src;
419         }
420 }
421 
422 static nir_ssa_def *
vc4_nir_swizzle_and_pack(struct vc4_compile * c,nir_builder * b,nir_ssa_def ** colors)423 vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
424                          nir_ssa_def **colors)
425 {
426         enum pipe_format color_format = c->fs_key->color_format;
427         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
428 
429         nir_ssa_def *swizzled[4];
430         for (int i = 0; i < 4; i++) {
431                 swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
432                                                            format_swiz[i]);
433         }
434 
435         return nir_pack_unorm_4x8(b,
436                                   nir_vec4(b,
437                                            swizzled[0], swizzled[1],
438                                            swizzled[2], swizzled[3]));
439 
440 }
441 
442 static nir_ssa_def *
vc4_nir_blend_pipeline(struct vc4_compile * c,nir_builder * b,nir_ssa_def * src,int sample)443 vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
444                        int sample)
445 {
446         enum pipe_format color_format = c->fs_key->color_format;
447         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
448         bool srgb = util_format_is_srgb(color_format);
449 
450         /* Pull out the float src/dst color components. */
451         nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
452         nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
453         nir_ssa_def *src_color[4], *unpacked_dst_color[4];
454         for (unsigned i = 0; i < 4; i++) {
455                 src_color[i] = nir_channel(b, src, i);
456                 unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
457         }
458 
459         if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
460                 src_color[3] = nir_imm_float(b, 1.0);
461 
462         nir_ssa_def *packed_color;
463         if (srgb) {
464                 /* Unswizzle the destination color. */
465                 nir_ssa_def *dst_color[4];
466                 for (unsigned i = 0; i < 4; i++) {
467                         dst_color[i] = vc4_nir_get_swizzled_channel(b,
468                                                                     unpacked_dst_color,
469                                                                     format_swiz[i]);
470                 }
471 
472                 /* Turn dst color to linear. */
473                 for (int i = 0; i < 3; i++)
474                         dst_color[i] = nir_format_srgb_to_linear(b, dst_color[i]);
475 
476                 nir_ssa_def *blend_color[4];
477                 vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
478 
479                 /* sRGB encode the output color */
480                 for (int i = 0; i < 3; i++)
481                         blend_color[i] = nir_format_linear_to_srgb(b, blend_color[i]);
482 
483                 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
484         } else {
485                 nir_ssa_def *packed_src_color =
486                         vc4_nir_swizzle_and_pack(c, b, src_color);
487 
488                 packed_color =
489                         vc4_do_blending_i(c, b,
490                                           packed_src_color, packed_dst_color,
491                                           src_color[3]);
492         }
493 
494         packed_color = vc4_logicop(b, c->fs_key->logicop_func,
495                                    packed_color, packed_dst_color);
496 
497         /* If the bit isn't set in the color mask, then just return the
498          * original dst color, instead.
499          */
500         uint32_t colormask = 0xffffffff;
501         for (int i = 0; i < 4; i++) {
502                 if (format_swiz[i] < 4 &&
503                     !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
504                         colormask &= ~(0xff << (i * 8));
505                 }
506         }
507 
508         return nir_ior(b,
509                        nir_iand(b, packed_color,
510                                 nir_imm_int(b, colormask)),
511                        nir_iand(b, packed_dst_color,
512                                 nir_imm_int(b, ~colormask)));
513 }
514 
515 static void
vc4_nir_store_sample_mask(struct vc4_compile * c,nir_builder * b,nir_ssa_def * val)516 vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
517                           nir_ssa_def *val)
518 {
519         nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
520                                                         glsl_uint_type(),
521                                                         "sample_mask");
522         sample_mask->data.driver_location = c->s->num_outputs++;
523         sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
524 
525         nir_intrinsic_instr *intr =
526                 nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
527         intr->num_components = 1;
528         nir_intrinsic_set_base(intr, sample_mask->data.driver_location);
529 
530         intr->src[0] = nir_src_for_ssa(val);
531         intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
532         nir_builder_instr_insert(b, &intr->instr);
533 }
534 
535 static void
vc4_nir_lower_blend_instr(struct vc4_compile * c,nir_builder * b,nir_intrinsic_instr * intr)536 vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
537                           nir_intrinsic_instr *intr)
538 {
539         nir_ssa_def *frag_color = intr->src[0].ssa;
540 
541         if (c->fs_key->sample_alpha_to_coverage) {
542                 nir_ssa_def *a = nir_channel(b, frag_color, 3);
543 
544                 /* XXX: We should do a nice dither based on the fragment
545                  * coordinate, instead.
546                  */
547                 nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
548                 nir_ssa_def *num_bits = nir_f2i32(b, nir_fmul(b, a, num_samples));
549                 nir_ssa_def *bitmask = nir_isub(b,
550                                                 nir_ishl(b,
551                                                          nir_imm_int(b, 1),
552                                                          num_bits),
553                                                 nir_imm_int(b, 1));
554                 vc4_nir_store_sample_mask(c, b, bitmask);
555         }
556 
557         /* The TLB color read returns each sample in turn, so if our blending
558          * depends on the destination color, we're going to have to run the
559          * blending function separately for each destination sample value, and
560          * then output the per-sample color using TLB_COLOR_MS.
561          */
562         nir_ssa_def *blend_output;
563         if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
564                 c->msaa_per_sample_output = true;
565 
566                 nir_ssa_def *samples[4];
567                 for (int i = 0; i < VC4_MAX_SAMPLES; i++)
568                         samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
569                 blend_output = nir_vec4(b,
570                                         samples[0], samples[1],
571                                         samples[2], samples[3]);
572         } else {
573                 blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
574         }
575 
576         nir_instr_rewrite_src(&intr->instr, &intr->src[0],
577                               nir_src_for_ssa(blend_output));
578         intr->num_components = blend_output->num_components;
579 }
580 
581 static bool
vc4_nir_lower_blend_block(nir_block * block,struct vc4_compile * c)582 vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
583 {
584         nir_foreach_instr_safe(instr, block) {
585                 if (instr->type != nir_instr_type_intrinsic)
586                         continue;
587                 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
588                 if (intr->intrinsic != nir_intrinsic_store_output)
589                         continue;
590 
591                 nir_variable *output_var = NULL;
592                 nir_foreach_shader_out_variable(var, c->s) {
593                         if (var->data.driver_location ==
594                             nir_intrinsic_base(intr)) {
595                                 output_var = var;
596                                 break;
597                         }
598                 }
599                 assert(output_var);
600 
601                 if (output_var->data.location != FRAG_RESULT_COLOR &&
602                     output_var->data.location != FRAG_RESULT_DATA0) {
603                         continue;
604                 }
605 
606                 nir_function_impl *impl =
607                         nir_cf_node_get_function(&block->cf_node);
608                 nir_builder b;
609                 nir_builder_init(&b, impl);
610                 b.cursor = nir_before_instr(&intr->instr);
611                 vc4_nir_lower_blend_instr(c, &b, intr);
612         }
613         return true;
614 }
615 
616 void
vc4_nir_lower_blend(nir_shader * s,struct vc4_compile * c)617 vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
618 {
619         nir_foreach_function(function, s) {
620                 if (function->impl) {
621                         nir_foreach_block(block, function->impl) {
622                                 vc4_nir_lower_blend_block(block, c);
623                         }
624 
625                         nir_metadata_preserve(function->impl,
626                                               nir_metadata_block_index |
627                                               nir_metadata_dominance);
628                 }
629         }
630 
631         /* If we didn't do alpha-to-coverage on the output color, we still
632          * need to pass glSampleMask() through.
633          */
634         if (c->fs_key->sample_coverage && !c->fs_key->sample_alpha_to_coverage) {
635                 nir_function_impl *impl = nir_shader_get_entrypoint(s);
636                 nir_builder b;
637                 nir_builder_init(&b, impl);
638                 b.cursor = nir_after_block(nir_impl_last_block(impl));
639 
640                 vc4_nir_store_sample_mask(c, &b, nir_load_sample_mask_in(&b));
641         }
642 }
643