• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019-2021 Collabora, Ltd.
3  * Copyright (C) 2019 Alyssa Rosenzweig
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 /**
26  * @file
27  *
28  * Implements the fragment pipeline (blending and writeout) in software, to be
29  * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
30  * shader variant on typical GPUs. This pass is useful if hardware lacks
31  * fixed-function blending in part or in full.
32  */
33 
34 #include "compiler/nir/nir.h"
35 #include "compiler/nir/nir_builder.h"
36 #include "compiler/nir/nir_format_convert.h"
37 #include "nir_lower_blend.h"
38 
39 /* Given processed factors, combine them per a blend function */
40 
41 static nir_ssa_def *
nir_blend_func(nir_builder * b,enum blend_func func,nir_ssa_def * src,nir_ssa_def * dst)42 nir_blend_func(
43    nir_builder *b,
44    enum blend_func func,
45    nir_ssa_def *src, nir_ssa_def *dst)
46 {
47    switch (func) {
48    case BLEND_FUNC_ADD:
49       return nir_fadd(b, src, dst);
50    case BLEND_FUNC_SUBTRACT:
51       return nir_fsub(b, src, dst);
52    case BLEND_FUNC_REVERSE_SUBTRACT:
53       return nir_fsub(b, dst, src);
54    case BLEND_FUNC_MIN:
55       return nir_fmin(b, src, dst);
56    case BLEND_FUNC_MAX:
57       return nir_fmax(b, src, dst);
58    }
59 
60    unreachable("Invalid blend function");
61 }
62 
63 /* Does this blend function multiply by a blend factor? */
64 
65 static bool
nir_blend_factored(enum blend_func func)66 nir_blend_factored(enum blend_func func)
67 {
68    switch (func) {
69    case BLEND_FUNC_ADD:
70    case BLEND_FUNC_SUBTRACT:
71    case BLEND_FUNC_REVERSE_SUBTRACT:
72       return true;
73    default:
74       return false;
75    }
76 }
77 
78 /* Compute a src_alpha_saturate factor */
79 static nir_ssa_def *
nir_alpha_saturate(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,unsigned chan)80 nir_alpha_saturate(
81    nir_builder *b,
82    nir_ssa_def *src, nir_ssa_def *dst,
83    unsigned chan)
84 {
85    nir_ssa_def *Asrc = nir_channel(b, src, 3);
86    nir_ssa_def *Adst = nir_channel(b, dst, 3);
87    nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size);
88    nir_ssa_def *Adsti = nir_fsub(b, one, Adst);
89 
90    return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
91 }
92 
93 /* Returns a scalar single factor, unmultiplied */
94 
95 static nir_ssa_def *
nir_blend_factor_value(nir_builder * b,nir_ssa_def * src,nir_ssa_def * src1,nir_ssa_def * dst,nir_ssa_def * bconst,unsigned chan,enum blend_factor factor)96 nir_blend_factor_value(
97    nir_builder *b,
98    nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
99    unsigned chan,
100    enum blend_factor factor)
101 {
102    switch (factor) {
103    case BLEND_FACTOR_ZERO:
104       return nir_imm_floatN_t(b, 0.0, src->bit_size);
105    case BLEND_FACTOR_SRC_COLOR:
106       return nir_channel(b, src, chan);
107    case BLEND_FACTOR_SRC1_COLOR:
108       return nir_channel(b, src1, chan);
109    case BLEND_FACTOR_DST_COLOR:
110       return nir_channel(b, dst, chan);
111    case BLEND_FACTOR_SRC_ALPHA:
112       return nir_channel(b, src, 3);
113    case BLEND_FACTOR_SRC1_ALPHA:
114       return nir_channel(b, src1, 3);
115    case BLEND_FACTOR_DST_ALPHA:
116       return nir_channel(b, dst, 3);
117    case BLEND_FACTOR_CONSTANT_COLOR:
118       return nir_channel(b, bconst, chan);
119    case BLEND_FACTOR_CONSTANT_ALPHA:
120       return nir_channel(b, bconst, 3);
121    case BLEND_FACTOR_SRC_ALPHA_SATURATE:
122       return nir_alpha_saturate(b, src, dst, chan);
123    }
124 
125    unreachable("Invalid blend factor");
126 }
127 
128 static nir_ssa_def *
nir_blend_factor(nir_builder * b,nir_ssa_def * raw_scalar,nir_ssa_def * src,nir_ssa_def * src1,nir_ssa_def * dst,nir_ssa_def * bconst,unsigned chan,enum blend_factor factor,bool inverted)129 nir_blend_factor(
130    nir_builder *b,
131    nir_ssa_def *raw_scalar,
132    nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
133    unsigned chan,
134    enum blend_factor factor,
135    bool inverted)
136 {
137    nir_ssa_def *f =
138       nir_blend_factor_value(b, src, src1, dst, bconst, chan, factor);
139 
140    if (inverted)
141       f = nir_fadd_imm(b, nir_fneg(b, f), 1.0);
142 
143    return nir_fmul(b, raw_scalar, f);
144 }
145 
146 /* Given a colormask, "blend" with the destination */
147 
148 static nir_ssa_def *
nir_color_mask(nir_builder * b,unsigned mask,nir_ssa_def * src,nir_ssa_def * dst)149 nir_color_mask(
150    nir_builder *b,
151    unsigned mask,
152    nir_ssa_def *src,
153    nir_ssa_def *dst)
154 {
155    return nir_vec4(b,
156          nir_channel(b, (mask & (1 << 0)) ? src : dst, 0),
157          nir_channel(b, (mask & (1 << 1)) ? src : dst, 1),
158          nir_channel(b, (mask & (1 << 2)) ? src : dst, 2),
159          nir_channel(b, (mask & (1 << 3)) ? src : dst, 3));
160 }
161 
162 static nir_ssa_def *
nir_logicop_func(nir_builder * b,unsigned func,nir_ssa_def * src,nir_ssa_def * dst)163 nir_logicop_func(
164    nir_builder *b,
165    unsigned func,
166    nir_ssa_def *src, nir_ssa_def *dst)
167 {
168    switch (func) {
169    case PIPE_LOGICOP_CLEAR:
170       return nir_imm_ivec4(b, 0, 0, 0, 0);
171    case PIPE_LOGICOP_NOR:
172       return nir_inot(b, nir_ior(b, src, dst));
173    case PIPE_LOGICOP_AND_INVERTED:
174       return nir_iand(b, nir_inot(b, src), dst);
175    case PIPE_LOGICOP_COPY_INVERTED:
176       return nir_inot(b, src);
177    case PIPE_LOGICOP_AND_REVERSE:
178       return nir_iand(b, src, nir_inot(b, dst));
179    case PIPE_LOGICOP_INVERT:
180       return nir_inot(b, dst);
181    case PIPE_LOGICOP_XOR:
182       return nir_ixor(b, src, dst);
183    case PIPE_LOGICOP_NAND:
184       return nir_inot(b, nir_iand(b, src, dst));
185    case PIPE_LOGICOP_AND:
186       return nir_iand(b, src, dst);
187    case PIPE_LOGICOP_EQUIV:
188       return nir_inot(b, nir_ixor(b, src, dst));
189    case PIPE_LOGICOP_NOOP:
190       return dst;
191    case PIPE_LOGICOP_OR_INVERTED:
192       return nir_ior(b, nir_inot(b, src), dst);
193    case PIPE_LOGICOP_COPY:
194       return src;
195    case PIPE_LOGICOP_OR_REVERSE:
196       return nir_ior(b, src, nir_inot(b, dst));
197    case PIPE_LOGICOP_OR:
198       return nir_ior(b, src, dst);
199    case PIPE_LOGICOP_SET:
200       return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
201    }
202 
203    unreachable("Invalid logciop function");
204 }
205 
206 static nir_ssa_def *
nir_blend_logicop(nir_builder * b,const nir_lower_blend_options * options,unsigned rt,nir_ssa_def * src,nir_ssa_def * dst)207 nir_blend_logicop(
208    nir_builder *b,
209    const nir_lower_blend_options *options,
210    unsigned rt,
211    nir_ssa_def *src, nir_ssa_def *dst)
212 {
213    unsigned bit_size = src->bit_size;
214 
215    enum pipe_format format = options->format[rt];
216    const struct util_format_description *format_desc =
217       util_format_description(format);
218 
219    if (bit_size != 32) {
220       src = nir_f2f32(b, src);
221       dst = nir_f2f32(b, dst);
222    }
223 
224    assert(src->num_components <= 4);
225    assert(dst->num_components <= 4);
226 
227    unsigned bits[4];
228    for (int i = 0; i < 4; ++i)
229        bits[i] = format_desc->channel[i].size;
230 
231    if (util_format_is_unorm(format)) {
232       src = nir_format_float_to_unorm(b, src, bits);
233       dst = nir_format_float_to_unorm(b, dst, bits);
234    } else if (util_format_is_snorm(format)) {
235       src = nir_format_float_to_snorm(b, src, bits);
236       dst = nir_format_float_to_snorm(b, dst, bits);
237    } else {
238       assert(util_format_is_pure_integer(format));
239    }
240 
241    nir_ssa_def *out = nir_logicop_func(b, options->logicop_func, src, dst);
242 
243    if (bits[0] < 32) {
244        nir_const_value mask[4];
245        for (int i = 0; i < 4; ++i)
246            mask[i] = nir_const_value_for_int((1u << bits[i]) - 1, 32);
247 
248        out = nir_iand(b, out, nir_build_imm(b, 4, 32, mask));
249    }
250 
251    if (util_format_is_unorm(format)) {
252       out = nir_format_unorm_to_float(b, out, bits);
253    } else if (util_format_is_snorm(format)) {
254       out = nir_format_snorm_to_float(b, out, bits);
255    } else {
256       assert(util_format_is_pure_integer(format));
257    }
258 
259    if (bit_size == 16)
260       out = nir_f2f16(b, out);
261 
262    return out;
263 }
264 
265 static nir_ssa_def *
nir_fsat_signed(nir_builder * b,nir_ssa_def * x)266 nir_fsat_signed(nir_builder *b, nir_ssa_def *x)
267 {
268    return nir_fclamp(b, x, nir_imm_floatN_t(b, -1.0, x->bit_size),
269                            nir_imm_floatN_t(b, +1.0, x->bit_size));
270 }
271 
272 /* Given a blend state, the source color, and the destination color,
273  * return the blended color
274  */
275 
276 static nir_ssa_def *
nir_blend(nir_builder * b,const nir_lower_blend_options * options,unsigned rt,nir_ssa_def * src,nir_ssa_def * src1,nir_ssa_def * dst)277 nir_blend(
278    nir_builder *b,
279    const nir_lower_blend_options *options,
280    unsigned rt,
281    nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst)
282 {
283    /* Grab the blend constant ahead of time */
284    nir_ssa_def *bconst;
285    if (options->scalar_blend_const) {
286       bconst = nir_vec4(b,
287                         nir_load_blend_const_color_r_float(b),
288                         nir_load_blend_const_color_g_float(b),
289                         nir_load_blend_const_color_b_float(b),
290                         nir_load_blend_const_color_a_float(b));
291    } else {
292       bconst = nir_load_blend_const_color_rgba(b);
293    }
294 
295    if (src->bit_size == 16)
296       bconst = nir_f2f16(b, bconst);
297 
298    /* Fixed-point framebuffers require their inputs clamped. */
299    enum pipe_format format = options->format[rt];
300 
301    /* From section 17.3.6 "Blending" of the OpenGL 4.5 spec:
302     *
303     *     If the color buffer is fixed-point, the components of the source and
304     *     destination values and blend factors are each clamped to [0, 1] or
305     *     [-1, 1] respectively for an unsigned normalized or signed normalized
306     *     color buffer prior to evaluating the blend equation. If the color
307     *     buffer is floating-point, no clamping occurs.
308     */
309    if (util_format_is_unorm(format))
310       src = nir_fsat(b, src);
311    else if (util_format_is_snorm(format))
312       src = nir_fsat_signed(b, src);
313 
314    /* DST_ALPHA reads back 1.0 if there is no alpha channel */
315    const struct util_format_description *desc =
316       util_format_description(format);
317 
318    if (desc->nr_channels < 4) {
319       nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size);
320       nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size);
321 
322       dst = nir_vec4(b, nir_channel(b, dst, 0),
323             desc->nr_channels > 1 ? nir_channel(b, dst, 1) : zero,
324             desc->nr_channels > 2 ? nir_channel(b, dst, 2) : zero,
325             desc->nr_channels > 3 ? nir_channel(b, dst, 3) : one);
326    }
327 
328    /* We blend per channel and recombine later */
329    nir_ssa_def *channels[4];
330 
331    for (unsigned c = 0; c < 4; ++c) {
332       /* Decide properties based on channel */
333       nir_lower_blend_channel chan =
334          (c < 3) ? options->rt[rt].rgb : options->rt[rt].alpha;
335 
336       nir_ssa_def *psrc = nir_channel(b, src, c);
337       nir_ssa_def *pdst = nir_channel(b, dst, c);
338 
339       if (nir_blend_factored(chan.func)) {
340          psrc = nir_blend_factor(
341                    b, psrc,
342                    src, src1, dst, bconst, c,
343                    chan.src_factor, chan.invert_src_factor);
344 
345          pdst = nir_blend_factor(
346                    b, pdst,
347                    src, src1, dst, bconst, c,
348                    chan.dst_factor, chan.invert_dst_factor);
349       }
350 
351       channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
352    }
353 
354    return nir_vec(b, channels, 4);
355 }
356 
357 static int
color_index_for_var(const nir_variable * var)358 color_index_for_var(const nir_variable *var)
359 {
360    if (var->data.location != FRAG_RESULT_COLOR &&
361        var->data.location < FRAG_RESULT_DATA0)
362       return -1;
363 
364    return (var->data.location == FRAG_RESULT_COLOR) ? 0 :
365           (var->data.location - FRAG_RESULT_DATA0);
366 }
367 
368 static bool
nir_lower_blend_store(nir_builder * b,nir_intrinsic_instr * store,const nir_lower_blend_options * options)369 nir_lower_blend_store(nir_builder *b, nir_intrinsic_instr *store,
370                       const nir_lower_blend_options *options)
371 {
372    assert(store->intrinsic == nir_intrinsic_store_deref);
373 
374    nir_variable *var = nir_intrinsic_get_var(store, 0);
375    int rt = color_index_for_var(var);
376 
377    /* No blend lowering requested on this RT */
378    if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE)
379       return false;
380 
381    b->cursor = nir_before_instr(&store->instr);
382 
383    /* Grab the input color.  We always want 4 channels during blend.  Dead
384     * code will clean up any channels we don't need.
385     */
386    assert(store->src[1].is_ssa);
387    nir_ssa_def *src = nir_pad_vector(b, store->src[1].ssa, 4);
388 
389    /* Grab the previous fragment color */
390    var->data.fb_fetch_output = true;
391    b->shader->info.outputs_read |= BITFIELD64_BIT(var->data.location);
392    b->shader->info.fs.uses_fbfetch_output = true;
393    nir_ssa_def *dst = nir_pad_vector(b, nir_load_var(b, var), 4);
394 
395    /* Blend the two colors per the passed options */
396    nir_ssa_def *blended = src;
397 
398    if (options->logicop_enable) {
399       blended = nir_blend_logicop(b, options, rt, src, dst);
400    } else if (!util_format_is_pure_integer(options->format[rt])) {
401       assert(!util_format_is_scaled(options->format[rt]));
402       blended = nir_blend(b, options, rt, src, options->src1, dst);
403    }
404 
405    /* Apply a colormask */
406    blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst);
407 
408    const unsigned num_components = glsl_get_vector_elements(var->type);
409 
410    /* Shave off any components we don't want to store */
411    blended = nir_trim_vector(b, blended, num_components);
412 
413    /* Grow or shrink the store destination as needed */
414    assert(nir_intrinsic_write_mask(store) ==
415           nir_component_mask(store->num_components));
416    store->num_components = num_components;
417    store->dest.ssa.num_components = num_components;
418    nir_intrinsic_set_write_mask(store, nir_component_mask(num_components));
419 
420    /* Write out the final color instead of the input */
421    nir_instr_rewrite_src_ssa(&store->instr, &store->src[1], blended);
422    return true;
423 }
424 
425 static bool
nir_lower_blend_instr(nir_builder * b,nir_instr * instr,void * data)426 nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data)
427 {
428    const nir_lower_blend_options *options = data;
429 
430    switch (instr->type) {
431    case nir_instr_type_deref: {
432       /* Fix up output deref types, as needed */
433       nir_deref_instr *deref = nir_instr_as_deref(instr);
434       if (!nir_deref_mode_is(deref, nir_var_shader_out))
435          return false;
436 
437       /* Indirects must be already lowered and output variables split */
438       assert(deref->deref_type == nir_deref_type_var);
439 
440       if (deref->type == deref->var->type)
441          return false;
442 
443       deref->type = deref->var->type;
444       return true;
445    }
446 
447    case nir_instr_type_intrinsic: {
448       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
449       if (intrin->intrinsic != nir_intrinsic_load_deref &&
450           intrin->intrinsic != nir_intrinsic_store_deref)
451          return false;
452 
453       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
454       if (!nir_deref_mode_is(deref, nir_var_shader_out))
455          return false;
456 
457       assert(glsl_type_is_vector_or_scalar(deref->type));
458 
459       if (intrin->intrinsic == nir_intrinsic_load_deref) {
460          /* We need to fix up framebuffer if num_components changed */
461          const unsigned num_components = glsl_get_vector_elements(deref->type);
462          if (intrin->num_components == num_components)
463             return false;
464 
465          b->cursor = nir_after_instr(&intrin->instr);
466 
467          assert(intrin->dest.is_ssa);
468          nir_ssa_def *val = nir_resize_vector(b, &intrin->dest.ssa,
469                                               num_components);
470          intrin->num_components = num_components,
471          nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, val,
472                                         val->parent_instr);
473          return true;
474       } else {
475          return nir_lower_blend_store(b, intrin, options);
476       }
477    }
478 
479    default:
480       return false;
481    }
482 }
483 
484 /** Lower blending to framebuffer fetch and some math
485  *
486  * This pass requires that indirects are lowered and output variables split
487  * so that we have a single output variable for each RT.  We could go to the
488  * effort of handling arrays (possibly of arrays) but, given that we need
489  * indirects lowered anyway (we need constant indices to look up blend
490  * functions and formats), we may as well require variables to be split.
491  * This can be done by calling nir_lower_io_arrays_to_elements_no_indirect().
492  */
493 void
nir_lower_blend(nir_shader * shader,const nir_lower_blend_options * options)494 nir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options)
495 {
496    assert(shader->info.stage == MESA_SHADER_FRAGMENT);
497 
498    /* Re-type any blended output variables to have the same number of
499     * components as the image format.  The GL 4.6 Spec says:
500     *
501     *    "If a fragment shader writes to none of gl_FragColor, gl_FragData,
502     *    nor any user-defined output variables, the values of the fragment
503     *    colors following shader execution are undefined, and may differ for
504     *    each fragment color.  If some, but not all elements of gl_FragData or
505     *    of theser-defined output variables are written, the values of
506     *    fragment colors corresponding to unwritten elements orariables are
507     *    similarly undefined."
508     *
509     * Note the phrase "following shader execution".  Those color values are
510     * then supposed to go into blending which may, depending on the blend
511     * mode, apply constraints that result in well-defined rendering.  It's
512     * fine if we have to pad out a value with undef but we then need to blend
513     * that garbage value to ensure correct results.
514     *
515     * This may also, depending on output format, be a small optimization
516     * allowing NIR to dead-code unused calculations.
517     */
518    nir_foreach_shader_out_variable(var, shader) {
519       int rt = color_index_for_var(var);
520 
521       /* No blend lowering requested on this RT */
522       if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE)
523          continue;
524 
525       const unsigned num_format_components =
526          util_format_get_nr_components(options->format[rt]);
527 
528       /* Indirects must be already lowered and output variables split */
529       assert(glsl_type_is_vector_or_scalar(var->type));
530       var->type = glsl_replace_vector_type(var->type, num_format_components);
531    }
532 
533    nir_shader_instructions_pass(shader, nir_lower_blend_instr,
534                                 nir_metadata_block_index |
535                                 nir_metadata_dominance,
536                                 (void *)options);
537 }
538