1 /*
2 * Copyright (C) 2019 Alyssa Rosenzweig
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * @file
26 *
27 * Implements the fragment pipeline (blending and writeout) in software, to be
28 * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
29 * shader variant on typical GPUs. This pass is useful if hardware lacks
30 * fixed-function blending in part or in full.
31 */
32
33 #include "compiler/nir/nir.h"
34 #include "compiler/nir/nir_builder.h"
35 #include "compiler/nir/nir_format_convert.h"
36 #include "nir_lower_blend.h"
37
38 /* Given processed factors, combine them per a blend function */
39
40 static nir_ssa_def *
nir_blend_func(nir_builder * b,enum blend_func func,nir_ssa_def * src,nir_ssa_def * dst)41 nir_blend_func(
42 nir_builder *b,
43 enum blend_func func,
44 nir_ssa_def *src, nir_ssa_def *dst)
45 {
46 switch (func) {
47 case BLEND_FUNC_ADD:
48 return nir_fadd(b, src, dst);
49 case BLEND_FUNC_SUBTRACT:
50 return nir_fsub(b, src, dst);
51 case BLEND_FUNC_REVERSE_SUBTRACT:
52 return nir_fsub(b, dst, src);
53 case BLEND_FUNC_MIN:
54 return nir_fmin(b, src, dst);
55 case BLEND_FUNC_MAX:
56 return nir_fmax(b, src, dst);
57 }
58
59 unreachable("Invalid blend function");
60 }
61
62 /* Does this blend function multiply by a blend factor? */
63
64 static bool
nir_blend_factored(enum blend_func func)65 nir_blend_factored(enum blend_func func)
66 {
67 switch (func) {
68 case BLEND_FUNC_ADD:
69 case BLEND_FUNC_SUBTRACT:
70 case BLEND_FUNC_REVERSE_SUBTRACT:
71 return true;
72 default:
73 return false;
74 }
75 }
76
77 /* Compute a src_alpha_saturate factor */
78 static nir_ssa_def *
nir_alpha_saturate(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,unsigned chan,bool half)79 nir_alpha_saturate(
80 nir_builder *b,
81 nir_ssa_def *src, nir_ssa_def *dst,
82 unsigned chan,
83 bool half)
84 {
85 nir_ssa_def *Asrc = nir_channel(b, src, 3);
86 nir_ssa_def *Adst = nir_channel(b, dst, 3);
87 nir_ssa_def *one = half ? nir_imm_float16(b, 1.0) : nir_imm_float(b, 1.0);
88 nir_ssa_def *Adsti = nir_fsub(b, one, Adst);
89
90 return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
91 }
92
93 /* Returns a scalar single factor, unmultiplied */
94
95 static nir_ssa_def *
nir_blend_factor_value(nir_builder * b,nir_ssa_def * src,nir_ssa_def * src1,nir_ssa_def * dst,nir_ssa_def * bconst,unsigned chan,enum blend_factor factor,bool half)96 nir_blend_factor_value(
97 nir_builder *b,
98 nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
99 unsigned chan,
100 enum blend_factor factor,
101 bool half)
102 {
103 switch (factor) {
104 case BLEND_FACTOR_ZERO:
105 return half ? nir_imm_float16(b, 0.0) : nir_imm_float(b, 0.0);
106 case BLEND_FACTOR_SRC_COLOR:
107 return nir_channel(b, src, chan);
108 case BLEND_FACTOR_SRC1_COLOR:
109 return nir_channel(b, src1, chan);
110 case BLEND_FACTOR_DST_COLOR:
111 return nir_channel(b, dst, chan);
112 case BLEND_FACTOR_SRC_ALPHA:
113 return nir_channel(b, src, 3);
114 case BLEND_FACTOR_SRC1_ALPHA:
115 return nir_channel(b, src1, 3);
116 case BLEND_FACTOR_DST_ALPHA:
117 return nir_channel(b, dst, 3);
118 case BLEND_FACTOR_CONSTANT_COLOR:
119 return nir_channel(b, bconst, chan);
120 case BLEND_FACTOR_CONSTANT_ALPHA:
121 return nir_channel(b, bconst, 3);
122 case BLEND_FACTOR_SRC_ALPHA_SATURATE:
123 return nir_alpha_saturate(b, src, dst, chan, half);
124 }
125
126 unreachable("Invalid blend factor");
127 }
128
129 static nir_ssa_def *
nir_blend_factor(nir_builder * b,nir_ssa_def * raw_scalar,nir_ssa_def * src,nir_ssa_def * src1,nir_ssa_def * dst,nir_ssa_def * bconst,unsigned chan,enum blend_factor factor,bool inverted,bool half)130 nir_blend_factor(
131 nir_builder *b,
132 nir_ssa_def *raw_scalar,
133 nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
134 unsigned chan,
135 enum blend_factor factor,
136 bool inverted,
137 bool half)
138 {
139 nir_ssa_def *f =
140 nir_blend_factor_value(b, src, src1, dst, bconst, chan, factor, half);
141
142 nir_ssa_def *unity = half ? nir_imm_float16(b, 1.0) : nir_imm_float(b, 1.0);
143
144 if (inverted)
145 f = nir_fsub(b, unity, f);
146
147 return nir_fmul(b, raw_scalar, f);
148 }
149
150 /* Given a colormask, "blend" with the destination */
151
152 static nir_ssa_def *
nir_color_mask(nir_builder * b,unsigned mask,nir_ssa_def * src,nir_ssa_def * dst)153 nir_color_mask(
154 nir_builder *b,
155 unsigned mask,
156 nir_ssa_def *src,
157 nir_ssa_def *dst)
158 {
159 nir_ssa_def *masked[4];
160
161 for (unsigned c = 0; c < 4; ++c) {
162 bool enab = (mask & (1 << c));
163 masked[c] = enab ? nir_channel(b, src, c) : nir_channel(b, dst, c);
164 }
165
166 return nir_vec(b, masked, 4);
167 }
168
169 static nir_ssa_def *
nir_logicop_func(nir_builder * b,unsigned func,nir_ssa_def * src,nir_ssa_def * dst)170 nir_logicop_func(
171 nir_builder *b,
172 unsigned func,
173 nir_ssa_def *src, nir_ssa_def *dst)
174 {
175 switch (func) {
176 case PIPE_LOGICOP_CLEAR:
177 return nir_imm_ivec4(b, 0, 0, 0, 0);
178 case PIPE_LOGICOP_NOR:
179 return nir_inot(b, nir_ior(b, src, dst));
180 case PIPE_LOGICOP_AND_INVERTED:
181 return nir_iand(b, nir_inot(b, src), dst);
182 case PIPE_LOGICOP_COPY_INVERTED:
183 return nir_inot(b, src);
184 case PIPE_LOGICOP_AND_REVERSE:
185 return nir_iand(b, src, nir_inot(b, dst));
186 case PIPE_LOGICOP_INVERT:
187 return nir_inot(b, dst);
188 case PIPE_LOGICOP_XOR:
189 return nir_ixor(b, src, dst);
190 case PIPE_LOGICOP_NAND:
191 return nir_inot(b, nir_iand(b, src, dst));
192 case PIPE_LOGICOP_AND:
193 return nir_iand(b, src, dst);
194 case PIPE_LOGICOP_EQUIV:
195 return nir_inot(b, nir_ixor(b, src, dst));
196 case PIPE_LOGICOP_NOOP:
197 return dst;
198 case PIPE_LOGICOP_OR_INVERTED:
199 return nir_ior(b, nir_inot(b, src), dst);
200 case PIPE_LOGICOP_COPY:
201 return src;
202 case PIPE_LOGICOP_OR_REVERSE:
203 return nir_ior(b, src, nir_inot(b, dst));
204 case PIPE_LOGICOP_OR:
205 return nir_ior(b, src, dst);
206 case PIPE_LOGICOP_SET:
207 return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
208 }
209
210 unreachable("Invalid logciop function");
211 }
212
213 static nir_ssa_def *
nir_blend_logicop(nir_builder * b,nir_lower_blend_options options,nir_ssa_def * src,nir_ssa_def * dst)214 nir_blend_logicop(
215 nir_builder *b,
216 nir_lower_blend_options options,
217 nir_ssa_def *src, nir_ssa_def *dst)
218 {
219 const struct util_format_description *format_desc =
220 util_format_description(options.format);
221
222 if (options.half) {
223 src = nir_f2f32(b, src);
224 dst = nir_f2f32(b, dst);
225 }
226
227 assert(src->num_components <= 4);
228 assert(dst->num_components <= 4);
229
230 unsigned bits[4];
231 for (int i = 0; i < 4; ++i)
232 bits[i] = format_desc->channel[i].size;
233
234 src = nir_format_float_to_unorm(b, src, bits);
235 dst = nir_format_float_to_unorm(b, dst, bits);
236
237 nir_ssa_def *out = nir_logicop_func(b, options.logicop_func, src, dst);
238
239 if (bits[0] < 32) {
240 nir_const_value mask[4];
241 for (int i = 0; i < 4; ++i)
242 mask[i] = nir_const_value_for_int((1u << bits[i]) - 1, 32);
243
244 out = nir_iand(b, out, nir_build_imm(b, 4, 32, mask));
245 }
246
247 out = nir_format_unorm_to_float(b, out, bits);
248
249 if (options.half)
250 out = nir_f2f16(b, out);
251
252 return out;
253 }
254
255 /* Given a blend state, the source color, and the destination color,
256 * return the blended color
257 */
258
259 static nir_ssa_def *
nir_blend(nir_builder * b,nir_lower_blend_options options,nir_ssa_def * src,nir_ssa_def * src1,nir_ssa_def * dst)260 nir_blend(
261 nir_builder *b,
262 nir_lower_blend_options options,
263 nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst)
264 {
265 if (options.logicop_enable)
266 return nir_blend_logicop(b, options, src, dst);
267
268 /* Grab the blend constant ahead of time */
269 nir_ssa_def *bconst;
270 if (options.is_bifrost) {
271 /* Bifrost is a scalar architecture, so let's split loads now to avoid a
272 * lowering pass.
273 */
274 bconst = nir_vec4(b,
275 nir_load_blend_const_color_r_float(b),
276 nir_load_blend_const_color_g_float(b),
277 nir_load_blend_const_color_b_float(b),
278 nir_load_blend_const_color_a_float(b));
279 } else {
280 bconst = nir_load_blend_const_color_rgba(b);
281 }
282
283 if (options.half)
284 bconst = nir_f2f16(b, bconst);
285
286 /* We blend per channel and recombine later */
287 nir_ssa_def *channels[4];
288
289 for (unsigned c = 0; c < 4; ++c) {
290 /* Decide properties based on channel */
291 nir_lower_blend_channel chan =
292 (c < 3) ? options.rgb : options.alpha;
293
294 nir_ssa_def *psrc = nir_channel(b, src, c);
295 nir_ssa_def *pdst = nir_channel(b, dst, c);
296
297 if (nir_blend_factored(chan.func)) {
298 psrc = nir_blend_factor(
299 b, psrc,
300 src, src1, dst, bconst, c,
301 chan.src_factor, chan.invert_src_factor, options.half);
302
303 pdst = nir_blend_factor(
304 b, pdst,
305 src, src1, dst, bconst, c,
306 chan.dst_factor, chan.invert_dst_factor, options.half);
307 }
308
309 channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
310 }
311
312 /* Then just recombine with an applied colormask */
313 nir_ssa_def *blended = nir_vec(b, channels, 4);
314 return nir_color_mask(b, options.colormask, blended, dst);
315 }
316
317 static bool
nir_is_blend_channel_replace(nir_lower_blend_channel chan)318 nir_is_blend_channel_replace(nir_lower_blend_channel chan)
319 {
320 return
321 (chan.src_factor == BLEND_FACTOR_ZERO) &&
322 (chan.dst_factor == BLEND_FACTOR_ZERO) &&
323 (chan.invert_src_factor && !chan.invert_dst_factor) &&
324 (chan.func == BLEND_FUNC_ADD || chan.func == BLEND_FUNC_SUBTRACT || chan.func == BLEND_FUNC_MAX);
325 }
326
327 static bool
nir_is_blend_replace(nir_lower_blend_options options)328 nir_is_blend_replace(nir_lower_blend_options options)
329 {
330 return
331 nir_is_blend_channel_replace(options.rgb) &&
332 nir_is_blend_channel_replace(options.alpha);
333 }
334
335 void
nir_lower_blend(nir_shader * shader,nir_lower_blend_options options)336 nir_lower_blend(nir_shader *shader, nir_lower_blend_options options)
337 {
338 /* Blend shaders are represented as special fragment shaders */
339 assert(shader->info.stage == MESA_SHADER_FRAGMENT);
340
341 /* Special case replace, since there's nothing to do and we don't want to
342 * degrade intermediate precision (e.g. for non-blendable R32F targets) */
343 if (nir_is_blend_replace(options))
344 return;
345
346 nir_foreach_function(func, shader) {
347 nir_foreach_block(block, func->impl) {
348 nir_foreach_instr_safe(instr, block) {
349 if (instr->type != nir_instr_type_intrinsic)
350 continue;
351
352 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
353 if (intr->intrinsic != nir_intrinsic_store_deref)
354 continue;
355
356 /* TODO: Extending to MRT */
357 nir_variable *var = nir_intrinsic_get_var(intr, 0);
358 if (var->data.location != FRAG_RESULT_COLOR)
359 continue;
360
361 nir_builder b;
362 nir_builder_init(&b, func->impl);
363 b.cursor = nir_before_instr(instr);
364
365 /* Grab the input color */
366 nir_ssa_def *src = nir_ssa_for_src(&b, intr->src[1], 4);
367
368 /* Grab the dual-source input color */
369 nir_ssa_def *src1 = options.src1;
370
371 /* Grab the tilebuffer color - io lowered to load_output */
372 nir_ssa_def *dst = nir_load_var(&b, var);
373
374 /* Blend the two colors per the passed options */
375 nir_ssa_def *blended = nir_blend(&b, options, src, src1, dst);
376
377 /* Write out the final color instead of the input */
378 nir_instr_rewrite_src(instr, &intr->src[1],
379 nir_src_for_ssa(blended));
380
381 }
382 }
383
384 nir_metadata_preserve(func->impl, nir_metadata_block_index |
385 nir_metadata_dominance);
386 }
387 }
388