• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * Implements most of the fixed function fragment pipeline in shader code.
26  *
27  * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
28  * or color mask.  Instead, you read the current contents of the destination
29  * from the tile buffer after having waited for the scoreboard (which is
30  * handled by vc4_qpu_emit.c), then do math using your output color and that
31  * destination value, and update the output color appropriately.
32  *
33  * Once this pass is done, the color write will either have one component (for
34  * single sample) with packed argb8888, or 4 components with the per-sample
35  * argb8888 result.
36  */
37 
38 /**
39  * Lowers fixed-function blending to a load of the destination color and a
40  * series of ALU operations before the store of the output.
41  */
42 #include "util/u_format.h"
43 #include "vc4_qir.h"
44 #include "compiler/nir/nir_builder.h"
45 #include "vc4_context.h"
46 
47 static bool
blend_depends_on_dst_color(struct vc4_compile * c)48 blend_depends_on_dst_color(struct vc4_compile *c)
49 {
50         return (c->fs_key->blend.blend_enable ||
51                 c->fs_key->blend.colormask != 0xf ||
52                 c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
53 }
54 
55 /** Emits a load of the previous fragment color from the tile buffer. */
56 static nir_ssa_def *
vc4_nir_get_dst_color(nir_builder * b,int sample)57 vc4_nir_get_dst_color(nir_builder *b, int sample)
58 {
59         nir_intrinsic_instr *load =
60                 nir_intrinsic_instr_create(b->shader,
61                                            nir_intrinsic_load_input);
62         load->num_components = 1;
63         nir_intrinsic_set_base(load, VC4_NIR_TLB_COLOR_READ_INPUT + sample);
64         load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
65         nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
66         nir_builder_instr_insert(b, &load->instr);
67         return &load->dest.ssa;
68 }
69 
70 static  nir_ssa_def *
vc4_nir_srgb_decode(nir_builder * b,nir_ssa_def * srgb)71 vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb)
72 {
73         nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045));
74         nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92));
75         nir_ssa_def *high = nir_fpow(b,
76                                      nir_fmul(b,
77                                               nir_fadd(b, srgb,
78                                                        nir_imm_float(b, 0.055)),
79                                               nir_imm_float(b, 1.0 / 1.055)),
80                                      nir_imm_float(b, 2.4));
81 
82         return nir_bcsel(b, is_low, low, high);
83 }
84 
85 static  nir_ssa_def *
vc4_nir_srgb_encode(nir_builder * b,nir_ssa_def * linear)86 vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
87 {
88         nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308));
89         nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92));
90         nir_ssa_def *high = nir_fsub(b,
91                                      nir_fmul(b,
92                                               nir_imm_float(b, 1.055),
93                                               nir_fpow(b,
94                                                        linear,
95                                                        nir_imm_float(b, 0.41666))),
96                                      nir_imm_float(b, 0.055));
97 
98         return nir_bcsel(b, is_low, low, high);
99 }
100 
101 static nir_ssa_def *
vc4_blend_channel_f(nir_builder * b,nir_ssa_def ** src,nir_ssa_def ** dst,unsigned factor,int channel)102 vc4_blend_channel_f(nir_builder *b,
103                     nir_ssa_def **src,
104                     nir_ssa_def **dst,
105                     unsigned factor,
106                     int channel)
107 {
108         switch(factor) {
109         case PIPE_BLENDFACTOR_ONE:
110                 return nir_imm_float(b, 1.0);
111         case PIPE_BLENDFACTOR_SRC_COLOR:
112                 return src[channel];
113         case PIPE_BLENDFACTOR_SRC_ALPHA:
114                 return src[3];
115         case PIPE_BLENDFACTOR_DST_ALPHA:
116                 return dst[3];
117         case PIPE_BLENDFACTOR_DST_COLOR:
118                 return dst[channel];
119         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
120                 if (channel != 3) {
121                         return nir_fmin(b,
122                                         src[3],
123                                         nir_fsub(b,
124                                                  nir_imm_float(b, 1.0),
125                                                  dst[3]));
126                 } else {
127                         return nir_imm_float(b, 1.0);
128                 }
129         case PIPE_BLENDFACTOR_CONST_COLOR:
130                 return nir_load_system_value(b,
131                                              nir_intrinsic_load_blend_const_color_r_float +
132                                              channel,
133                                              0);
134         case PIPE_BLENDFACTOR_CONST_ALPHA:
135                 return nir_load_blend_const_color_a_float(b);
136         case PIPE_BLENDFACTOR_ZERO:
137                 return nir_imm_float(b, 0.0);
138         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
139                 return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
140         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
141                 return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
142         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
143                 return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
144         case PIPE_BLENDFACTOR_INV_DST_COLOR:
145                 return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
146         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
147                 return nir_fsub(b, nir_imm_float(b, 1.0),
148                                 nir_load_system_value(b,
149                                                       nir_intrinsic_load_blend_const_color_r_float +
150                                                       channel,
151                                                       0));
152         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
153                 return nir_fsub(b, nir_imm_float(b, 1.0),
154                                 nir_load_blend_const_color_a_float(b));
155 
156         default:
157         case PIPE_BLENDFACTOR_SRC1_COLOR:
158         case PIPE_BLENDFACTOR_SRC1_ALPHA:
159         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
160         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
161                 /* Unsupported. */
162                 fprintf(stderr, "Unknown blend factor %d\n", factor);
163                 return nir_imm_float(b, 1.0);
164         }
165 }
166 
167 static nir_ssa_def *
vc4_nir_set_packed_chan(nir_builder * b,nir_ssa_def * src0,nir_ssa_def * src1,int chan)168 vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
169                         int chan)
170 {
171         unsigned chan_mask = 0xff << (chan * 8);
172         return nir_ior(b,
173                        nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
174                        nir_iand(b, src1, nir_imm_int(b, chan_mask)));
175 }
176 
177 static nir_ssa_def *
vc4_blend_channel_i(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,nir_ssa_def * src_a,nir_ssa_def * dst_a,unsigned factor,int a_chan)178 vc4_blend_channel_i(nir_builder *b,
179                     nir_ssa_def *src,
180                     nir_ssa_def *dst,
181                     nir_ssa_def *src_a,
182                     nir_ssa_def *dst_a,
183                     unsigned factor,
184                     int a_chan)
185 {
186         switch (factor) {
187         case PIPE_BLENDFACTOR_ONE:
188                 return nir_imm_int(b, ~0);
189         case PIPE_BLENDFACTOR_SRC_COLOR:
190                 return src;
191         case PIPE_BLENDFACTOR_SRC_ALPHA:
192                 return src_a;
193         case PIPE_BLENDFACTOR_DST_ALPHA:
194                 return dst_a;
195         case PIPE_BLENDFACTOR_DST_COLOR:
196                 return dst;
197         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
198                 return vc4_nir_set_packed_chan(b,
199                                                nir_umin_4x8(b,
200                                                             src_a,
201                                                             nir_inot(b, dst_a)),
202                                                nir_imm_int(b, ~0),
203                                                a_chan);
204         case PIPE_BLENDFACTOR_CONST_COLOR:
205                 return nir_load_blend_const_color_rgba8888_unorm(b);
206         case PIPE_BLENDFACTOR_CONST_ALPHA:
207                 return nir_load_blend_const_color_aaaa8888_unorm(b);
208         case PIPE_BLENDFACTOR_ZERO:
209                 return nir_imm_int(b, 0);
210         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
211                 return nir_inot(b, src);
212         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
213                 return nir_inot(b, src_a);
214         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
215                 return nir_inot(b, dst_a);
216         case PIPE_BLENDFACTOR_INV_DST_COLOR:
217                 return nir_inot(b, dst);
218         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
219                 return nir_inot(b,
220                                 nir_load_blend_const_color_rgba8888_unorm(b));
221         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
222                 return nir_inot(b,
223                                 nir_load_blend_const_color_aaaa8888_unorm(b));
224 
225         default:
226         case PIPE_BLENDFACTOR_SRC1_COLOR:
227         case PIPE_BLENDFACTOR_SRC1_ALPHA:
228         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
229         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
230                 /* Unsupported. */
231                 fprintf(stderr, "Unknown blend factor %d\n", factor);
232                 return nir_imm_int(b, ~0);
233         }
234 }
235 
236 static nir_ssa_def *
vc4_blend_func_f(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,unsigned func)237 vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
238                  unsigned func)
239 {
240         switch (func) {
241         case PIPE_BLEND_ADD:
242                 return nir_fadd(b, src, dst);
243         case PIPE_BLEND_SUBTRACT:
244                 return nir_fsub(b, src, dst);
245         case PIPE_BLEND_REVERSE_SUBTRACT:
246                 return nir_fsub(b, dst, src);
247         case PIPE_BLEND_MIN:
248                 return nir_fmin(b, src, dst);
249         case PIPE_BLEND_MAX:
250                 return nir_fmax(b, src, dst);
251 
252         default:
253                 /* Unsupported. */
254                 fprintf(stderr, "Unknown blend func %d\n", func);
255                 return src;
256 
257         }
258 }
259 
260 static nir_ssa_def *
vc4_blend_func_i(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,unsigned func)261 vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
262                  unsigned func)
263 {
264         switch (func) {
265         case PIPE_BLEND_ADD:
266                 return nir_usadd_4x8(b, src, dst);
267         case PIPE_BLEND_SUBTRACT:
268                 return nir_ussub_4x8(b, src, dst);
269         case PIPE_BLEND_REVERSE_SUBTRACT:
270                 return nir_ussub_4x8(b, dst, src);
271         case PIPE_BLEND_MIN:
272                 return nir_umin_4x8(b, src, dst);
273         case PIPE_BLEND_MAX:
274                 return nir_umax_4x8(b, src, dst);
275 
276         default:
277                 /* Unsupported. */
278                 fprintf(stderr, "Unknown blend func %d\n", func);
279                 return src;
280 
281         }
282 }
283 
284 static void
vc4_do_blending_f(struct vc4_compile * c,nir_builder * b,nir_ssa_def ** result,nir_ssa_def ** src_color,nir_ssa_def ** dst_color)285 vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
286                   nir_ssa_def **src_color, nir_ssa_def **dst_color)
287 {
288         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
289 
290         if (!blend->blend_enable) {
291                 for (int i = 0; i < 4; i++)
292                         result[i] = src_color[i];
293                 return;
294         }
295 
296         /* Clamp the src color to [0, 1].  Dest is already clamped. */
297         for (int i = 0; i < 4; i++)
298                 src_color[i] = nir_fsat(b, src_color[i]);
299 
300         nir_ssa_def *src_blend[4], *dst_blend[4];
301         for (int i = 0; i < 4; i++) {
302                 int src_factor = ((i != 3) ? blend->rgb_src_factor :
303                                   blend->alpha_src_factor);
304                 int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
305                                   blend->alpha_dst_factor);
306                 src_blend[i] = nir_fmul(b, src_color[i],
307                                         vc4_blend_channel_f(b,
308                                                             src_color, dst_color,
309                                                             src_factor, i));
310                 dst_blend[i] = nir_fmul(b, dst_color[i],
311                                         vc4_blend_channel_f(b,
312                                                             src_color, dst_color,
313                                                             dst_factor, i));
314         }
315 
316         for (int i = 0; i < 4; i++) {
317                 result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
318                                              ((i != 3) ? blend->rgb_func :
319                                               blend->alpha_func));
320         }
321 }
322 
323 static nir_ssa_def *
vc4_nir_splat(nir_builder * b,nir_ssa_def * src)324 vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
325 {
326         nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
327         return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
328 }
329 
330 static nir_ssa_def *
vc4_do_blending_i(struct vc4_compile * c,nir_builder * b,nir_ssa_def * src_color,nir_ssa_def * dst_color,nir_ssa_def * src_float_a)331 vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
332                   nir_ssa_def *src_color, nir_ssa_def *dst_color,
333                   nir_ssa_def *src_float_a)
334 {
335         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
336 
337         if (!blend->blend_enable)
338                 return src_color;
339 
340         enum pipe_format color_format = c->fs_key->color_format;
341         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
342         nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
343         nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
344         nir_ssa_def *dst_a;
345         int alpha_chan;
346         for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
347                 if (format_swiz[alpha_chan] == 3)
348                         break;
349         }
350         if (alpha_chan != 4) {
351                 nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
352                 dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
353                                                               shift), imm_0xff));
354         } else {
355                 dst_a = nir_imm_int(b, ~0);
356         }
357 
358         nir_ssa_def *src_factor = vc4_blend_channel_i(b,
359                                                       src_color, dst_color,
360                                                       src_a, dst_a,
361                                                       blend->rgb_src_factor,
362                                                       alpha_chan);
363         nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
364                                                       src_color, dst_color,
365                                                       src_a, dst_a,
366                                                       blend->rgb_dst_factor,
367                                                       alpha_chan);
368 
369         if (alpha_chan != 4 &&
370             blend->alpha_src_factor != blend->rgb_src_factor) {
371                 nir_ssa_def *src_alpha_factor =
372                         vc4_blend_channel_i(b,
373                                             src_color, dst_color,
374                                             src_a, dst_a,
375                                             blend->alpha_src_factor,
376                                             alpha_chan);
377                 src_factor = vc4_nir_set_packed_chan(b, src_factor,
378                                                      src_alpha_factor,
379                                                      alpha_chan);
380         }
381         if (alpha_chan != 4 &&
382             blend->alpha_dst_factor != blend->rgb_dst_factor) {
383                 nir_ssa_def *dst_alpha_factor =
384                         vc4_blend_channel_i(b,
385                                             src_color, dst_color,
386                                             src_a, dst_a,
387                                             blend->alpha_dst_factor,
388                                             alpha_chan);
389                 dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
390                                                      dst_alpha_factor,
391                                                      alpha_chan);
392         }
393         nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor);
394         nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor);
395 
396         nir_ssa_def *result =
397                 vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
398         if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
399                 nir_ssa_def *result_a = vc4_blend_func_i(b,
400                                                          src_blend,
401                                                          dst_blend,
402                                                          blend->alpha_func);
403                 result = vc4_nir_set_packed_chan(b, result, result_a,
404                                                  alpha_chan);
405         }
406         return result;
407 }
408 
409 static nir_ssa_def *
vc4_logicop(nir_builder * b,int logicop_func,nir_ssa_def * src,nir_ssa_def * dst)410 vc4_logicop(nir_builder *b, int logicop_func,
411             nir_ssa_def *src, nir_ssa_def *dst)
412 {
413         switch (logicop_func) {
414         case PIPE_LOGICOP_CLEAR:
415                 return nir_imm_int(b, 0);
416         case PIPE_LOGICOP_NOR:
417                 return nir_inot(b, nir_ior(b, src, dst));
418         case PIPE_LOGICOP_AND_INVERTED:
419                 return nir_iand(b, nir_inot(b, src), dst);
420         case PIPE_LOGICOP_COPY_INVERTED:
421                 return nir_inot(b, src);
422         case PIPE_LOGICOP_AND_REVERSE:
423                 return nir_iand(b, src, nir_inot(b, dst));
424         case PIPE_LOGICOP_INVERT:
425                 return nir_inot(b, dst);
426         case PIPE_LOGICOP_XOR:
427                 return nir_ixor(b, src, dst);
428         case PIPE_LOGICOP_NAND:
429                 return nir_inot(b, nir_iand(b, src, dst));
430         case PIPE_LOGICOP_AND:
431                 return nir_iand(b, src, dst);
432         case PIPE_LOGICOP_EQUIV:
433                 return nir_inot(b, nir_ixor(b, src, dst));
434         case PIPE_LOGICOP_NOOP:
435                 return dst;
436         case PIPE_LOGICOP_OR_INVERTED:
437                 return nir_ior(b, nir_inot(b, src), dst);
438         case PIPE_LOGICOP_OR_REVERSE:
439                 return nir_ior(b, src, nir_inot(b, dst));
440         case PIPE_LOGICOP_OR:
441                 return nir_ior(b, src, dst);
442         case PIPE_LOGICOP_SET:
443                 return nir_imm_int(b, ~0);
444         default:
445                 fprintf(stderr, "Unknown logic op %d\n", logicop_func);
446                 /* FALLTHROUGH */
447         case PIPE_LOGICOP_COPY:
448                 return src;
449         }
450 }
451 
452 static nir_ssa_def *
vc4_nir_pipe_compare_func(nir_builder * b,int func,nir_ssa_def * src0,nir_ssa_def * src1)453 vc4_nir_pipe_compare_func(nir_builder *b, int func,
454                           nir_ssa_def *src0, nir_ssa_def *src1)
455 {
456         switch (func) {
457         default:
458                 fprintf(stderr, "Unknown compare func %d\n", func);
459                 /* FALLTHROUGH */
460         case PIPE_FUNC_NEVER:
461                 return nir_imm_int(b, 0);
462         case PIPE_FUNC_ALWAYS:
463                 return nir_imm_int(b, ~0);
464         case PIPE_FUNC_EQUAL:
465                 return nir_feq(b, src0, src1);
466         case PIPE_FUNC_NOTEQUAL:
467                 return nir_fne(b, src0, src1);
468         case PIPE_FUNC_GREATER:
469                 return nir_flt(b, src1, src0);
470         case PIPE_FUNC_GEQUAL:
471                 return nir_fge(b, src0, src1);
472         case PIPE_FUNC_LESS:
473                 return nir_flt(b, src0, src1);
474         case PIPE_FUNC_LEQUAL:
475                 return nir_fge(b, src1, src0);
476         }
477 }
478 
479 static void
vc4_nir_emit_alpha_test_discard(struct vc4_compile * c,nir_builder * b,nir_ssa_def * alpha)480 vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b,
481                                 nir_ssa_def *alpha)
482 {
483         if (!c->fs_key->alpha_test)
484                 return;
485 
486         nir_ssa_def *condition =
487                 vc4_nir_pipe_compare_func(b, c->fs_key->alpha_test_func,
488                                           alpha,
489                                           nir_load_alpha_ref_float(b));
490 
491         nir_intrinsic_instr *discard =
492                 nir_intrinsic_instr_create(b->shader,
493                                            nir_intrinsic_discard_if);
494         discard->num_components = 1;
495         discard->src[0] = nir_src_for_ssa(nir_inot(b, condition));
496         nir_builder_instr_insert(b, &discard->instr);
497         c->s->info->fs.uses_discard = true;
498 }
499 
500 static nir_ssa_def *
vc4_nir_swizzle_and_pack(struct vc4_compile * c,nir_builder * b,nir_ssa_def ** colors)501 vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
502                          nir_ssa_def **colors)
503 {
504         enum pipe_format color_format = c->fs_key->color_format;
505         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
506 
507         nir_ssa_def *swizzled[4];
508         for (int i = 0; i < 4; i++) {
509                 swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
510                                                            format_swiz[i]);
511         }
512 
513         return nir_pack_unorm_4x8(b,
514                                   nir_vec4(b,
515                                            swizzled[0], swizzled[1],
516                                            swizzled[2], swizzled[3]));
517 
518 }
519 
520 static nir_ssa_def *
vc4_nir_blend_pipeline(struct vc4_compile * c,nir_builder * b,nir_ssa_def * src,int sample)521 vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
522                        int sample)
523 {
524         enum pipe_format color_format = c->fs_key->color_format;
525         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
526         bool srgb = util_format_is_srgb(color_format);
527 
528         /* Pull out the float src/dst color components. */
529         nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
530         nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
531         nir_ssa_def *src_color[4], *unpacked_dst_color[4];
532         for (unsigned i = 0; i < 4; i++) {
533                 src_color[i] = nir_channel(b, src, i);
534                 unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
535         }
536 
537         if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
538                 src_color[3] = nir_imm_float(b, 1.0);
539 
540         vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
541 
542         nir_ssa_def *packed_color;
543         if (srgb) {
544                 /* Unswizzle the destination color. */
545                 nir_ssa_def *dst_color[4];
546                 for (unsigned i = 0; i < 4; i++) {
547                         dst_color[i] = vc4_nir_get_swizzled_channel(b,
548                                                                     unpacked_dst_color,
549                                                                     format_swiz[i]);
550                 }
551 
552                 /* Turn dst color to linear. */
553                 for (int i = 0; i < 3; i++)
554                         dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
555 
556                 nir_ssa_def *blend_color[4];
557                 vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
558 
559                 /* sRGB encode the output color */
560                 for (int i = 0; i < 3; i++)
561                         blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
562 
563                 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
564         } else {
565                 nir_ssa_def *packed_src_color =
566                         vc4_nir_swizzle_and_pack(c, b, src_color);
567 
568                 packed_color =
569                         vc4_do_blending_i(c, b,
570                                           packed_src_color, packed_dst_color,
571                                           src_color[3]);
572         }
573 
574         packed_color = vc4_logicop(b, c->fs_key->logicop_func,
575                                    packed_color, packed_dst_color);
576 
577         /* If the bit isn't set in the color mask, then just return the
578          * original dst color, instead.
579          */
580         uint32_t colormask = 0xffffffff;
581         for (int i = 0; i < 4; i++) {
582                 if (format_swiz[i] < 4 &&
583                     !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
584                         colormask &= ~(0xff << (i * 8));
585                 }
586         }
587 
588         return nir_ior(b,
589                        nir_iand(b, packed_color,
590                                 nir_imm_int(b, colormask)),
591                        nir_iand(b, packed_dst_color,
592                                 nir_imm_int(b, ~colormask)));
593 }
594 
595 static int
vc4_nir_next_output_driver_location(nir_shader * s)596 vc4_nir_next_output_driver_location(nir_shader *s)
597 {
598         int maxloc = -1;
599 
600         nir_foreach_variable(var, &s->outputs)
601                 maxloc = MAX2(maxloc, (int)var->data.driver_location);
602 
603         return maxloc + 1;
604 }
605 
606 static void
vc4_nir_store_sample_mask(struct vc4_compile * c,nir_builder * b,nir_ssa_def * val)607 vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
608                           nir_ssa_def *val)
609 {
610         nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
611                                                         glsl_uint_type(),
612                                                         "sample_mask");
613         sample_mask->data.driver_location =
614                 vc4_nir_next_output_driver_location(c->s);
615         sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
616 
617         nir_intrinsic_instr *intr =
618                 nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
619         intr->num_components = 1;
620         nir_intrinsic_set_base(intr, sample_mask->data.driver_location);
621 
622         intr->src[0] = nir_src_for_ssa(val);
623         intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
624         nir_builder_instr_insert(b, &intr->instr);
625 }
626 
627 static void
vc4_nir_lower_blend_instr(struct vc4_compile * c,nir_builder * b,nir_intrinsic_instr * intr)628 vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
629                           nir_intrinsic_instr *intr)
630 {
631         nir_ssa_def *frag_color = intr->src[0].ssa;
632 
633         if (c->fs_key->sample_coverage) {
634                 nir_intrinsic_instr *load =
635                         nir_intrinsic_instr_create(b->shader,
636                                                    nir_intrinsic_load_sample_mask_in);
637                 load->num_components = 1;
638                 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
639                 nir_builder_instr_insert(b, &load->instr);
640 
641                 nir_ssa_def *bitmask = &load->dest.ssa;
642 
643                 vc4_nir_store_sample_mask(c, b, bitmask);
644         } else if (c->fs_key->sample_alpha_to_coverage) {
645                 nir_ssa_def *a = nir_channel(b, frag_color, 3);
646 
647                 /* XXX: We should do a nice dither based on the fragment
648                  * coordinate, instead.
649                  */
650                 nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
651                 nir_ssa_def *num_bits = nir_f2i(b, nir_fmul(b, a, num_samples));
652                 nir_ssa_def *bitmask = nir_isub(b,
653                                                 nir_ishl(b,
654                                                          nir_imm_int(b, 1),
655                                                          num_bits),
656                                                 nir_imm_int(b, 1));
657                 vc4_nir_store_sample_mask(c, b, bitmask);
658         }
659 
660         /* The TLB color read returns each sample in turn, so if our blending
661          * depends on the destination color, we're going to have to run the
662          * blending function separately for each destination sample value, and
663          * then output the per-sample color using TLB_COLOR_MS.
664          */
665         nir_ssa_def *blend_output;
666         if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
667                 c->msaa_per_sample_output = true;
668 
669                 nir_ssa_def *samples[4];
670                 for (int i = 0; i < VC4_MAX_SAMPLES; i++)
671                         samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
672                 blend_output = nir_vec4(b,
673                                         samples[0], samples[1],
674                                         samples[2], samples[3]);
675         } else {
676                 blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
677         }
678 
679         nir_instr_rewrite_src(&intr->instr, &intr->src[0],
680                               nir_src_for_ssa(blend_output));
681         intr->num_components = blend_output->num_components;
682 }
683 
684 static bool
vc4_nir_lower_blend_block(nir_block * block,struct vc4_compile * c)685 vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
686 {
687         nir_foreach_instr_safe(instr, block) {
688                 if (instr->type != nir_instr_type_intrinsic)
689                         continue;
690                 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
691                 if (intr->intrinsic != nir_intrinsic_store_output)
692                         continue;
693 
694                 nir_variable *output_var = NULL;
695                 nir_foreach_variable(var, &c->s->outputs) {
696                         if (var->data.driver_location ==
697                             nir_intrinsic_base(intr)) {
698                                 output_var = var;
699                                 break;
700                         }
701                 }
702                 assert(output_var);
703 
704                 if (output_var->data.location != FRAG_RESULT_COLOR &&
705                     output_var->data.location != FRAG_RESULT_DATA0) {
706                         continue;
707                 }
708 
709                 nir_function_impl *impl =
710                         nir_cf_node_get_function(&block->cf_node);
711                 nir_builder b;
712                 nir_builder_init(&b, impl);
713                 b.cursor = nir_before_instr(&intr->instr);
714                 vc4_nir_lower_blend_instr(c, &b, intr);
715         }
716         return true;
717 }
718 
719 void
vc4_nir_lower_blend(nir_shader * s,struct vc4_compile * c)720 vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
721 {
722         nir_foreach_function(function, s) {
723                 if (function->impl) {
724                         nir_foreach_block(block, function->impl) {
725                                 vc4_nir_lower_blend_block(block, c);
726                         }
727 
728                         nir_metadata_preserve(function->impl,
729                                               nir_metadata_block_index |
730                                               nir_metadata_dominance);
731                 }
732         }
733 }
734