1 /*
2  * Copyright © 2023 Timothy Arceri <tarceri@itsqueeze.com>
3  * Copyright © 2016 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  */
24 
25 
26 #include "nir.h"
27 #include "nir_builder.h"
28 #include "gl_nir.h"
29 #include "program/prog_instruction.h"
30 
31 #include "util/bitscan.h"
32 #include "main/shader_types.h"
33 
34 #define imm1(b, x) nir_imm_float(b, x)
35 #define imm3(b, x) nir_imm_vec3(b, x, x, x)
36 
37 static nir_def *
swizzle(nir_builder * b,nir_def * src,int swizzle,int components)38 swizzle(nir_builder *b, nir_def *src, int swizzle, int components)
39 {
40    unsigned swizzle_arr[4];
41    swizzle_arr[0] = GET_SWZ(swizzle, 0);
42    swizzle_arr[1] = GET_SWZ(swizzle, 1);
43    swizzle_arr[2] = GET_SWZ(swizzle, 2);
44    swizzle_arr[3] = GET_SWZ(swizzle, 3);
45 
46    return nir_swizzle(b, src, swizzle_arr, components);
47 }
48 
49 static nir_def *
swizzle_x(nir_builder * b,nir_def * src)50 swizzle_x(nir_builder *b, nir_def *src)
51 {
52    return nir_channel(b, src, 0);
53 }
54 
55 static nir_def *
swizzle_y(nir_builder * b,nir_def * src)56 swizzle_y(nir_builder *b, nir_def *src)
57 {
58    return nir_channel(b, src, 1);
59 }
60 
61 static nir_def *
swizzle_z(nir_builder * b,nir_def * src)62 swizzle_z(nir_builder *b, nir_def *src)
63 {
64    return nir_channel(b, src, 2);
65 }
66 
67 static nir_def *
swizzle_w(nir_builder * b,nir_def * src)68 swizzle_w(nir_builder *b, nir_def *src)
69 {
70    return nir_channel(b, src, 3);
71 }
72 
73 static nir_def *
blend_multiply(nir_builder * b,nir_def * src,nir_def * dst)74 blend_multiply(nir_builder *b, nir_def *src, nir_def *dst)
75 {
76    /* f(Cs,Cd) = Cs*Cd */
77    return nir_fmul(b, src, dst);
78 }
79 
80 static nir_def *
blend_screen(nir_builder * b,nir_def * src,nir_def * dst)81 blend_screen(nir_builder *b, nir_def *src, nir_def *dst)
82 {
83    /* f(Cs,Cd) = Cs+Cd-Cs*Cd */
84    return nir_fsub(b, nir_fadd(b, src, dst), nir_fmul(b, src, dst));
85 }
86 
87 static nir_def *
blend_overlay(nir_builder * b,nir_def * src,nir_def * dst)88 blend_overlay(nir_builder *b, nir_def *src, nir_def *dst)
89 {
90    /* f(Cs,Cd) = 2*Cs*Cd, if Cd <= 0.5
91     *            1-2*(1-Cs)*(1-Cd), otherwise
92     */
93    nir_def *rule_1 = nir_fmul(b, nir_fmul(b, src, dst), imm3(b, 2.0));
94    nir_def *rule_2 =
95       nir_fsub(b, imm3(b, 1.0), nir_fmul(b, nir_fmul(b, nir_fsub(b, imm3(b, 1.0), src), nir_fsub(b, imm3(b, 1.0), dst)), imm3(b, 2.0)));
96    return nir_bcsel(b, nir_fge(b, imm3(b, 0.5f), dst), rule_1, rule_2);
97 }
98 
99 static nir_def *
blend_darken(nir_builder * b,nir_def * src,nir_def * dst)100 blend_darken(nir_builder *b, nir_def *src, nir_def *dst)
101 {
102    /* f(Cs,Cd) = min(Cs,Cd) */
103    return nir_fmin(b, src, dst);
104 }
105 
106 static nir_def *
blend_lighten(nir_builder * b,nir_def * src,nir_def * dst)107 blend_lighten(nir_builder *b, nir_def *src, nir_def *dst)
108 {
109    /* f(Cs,Cd) = max(Cs,Cd) */
110    return nir_fmax(b, src, dst);
111 }
112 
113 static nir_def *
blend_colordodge(nir_builder * b,nir_def * src,nir_def * dst)114 blend_colordodge(nir_builder *b, nir_def *src, nir_def *dst)
115 {
116    /* f(Cs,Cd) =
117     *   0, if Cd <= 0
118     *   min(1,Cd/(1-Cs)), if Cd > 0 and Cs < 1
119     *   1, if Cd > 0 and Cs >= 1
120     */
121    return nir_bcsel(b, nir_fge(b, imm3(b, 0.0), dst), imm3(b, 0.0),
122                     nir_bcsel(b, nir_fge(b, src, imm3(b, 1.0)), imm3(b, 1.0),
123                               nir_fmin(b, imm3(b, 1.0), nir_fdiv(b, dst, nir_fsub(b, imm3(b, 1.0), src)))));
124 }
125 
126 static nir_def *
blend_colorburn(nir_builder * b,nir_def * src,nir_def * dst)127 blend_colorburn(nir_builder *b, nir_def *src, nir_def *dst)
128 {
129    /* f(Cs,Cd) =
130     *   1, if Cd >= 1
131     *   1 - min(1,(1-Cd)/Cs), if Cd < 1 and Cs > 0
132     *   0, if Cd < 1 and Cs <= 0
133     */
134    return nir_bcsel(b, nir_fge(b, dst, imm3(b, 1.0)), imm3(b, 1.0),
135                     nir_bcsel(b, nir_fge(b, imm3(b, 0.0), src), imm3(b, 0.0),
136                               nir_fsub(b, imm3(b, 1.0), nir_fmin(b, imm3(b, 1.0), nir_fdiv(b, nir_fsub(b, imm3(b, 1.0), dst), src)))));
137 }
138 
139 static nir_def *
blend_hardlight(nir_builder * b,nir_def * src,nir_def * dst)140 blend_hardlight(nir_builder *b, nir_def *src, nir_def *dst)
141 {
142    /* f(Cs,Cd) = 2*Cs*Cd, if Cs <= 0.5
143     *            1-2*(1-Cs)*(1-Cd), otherwise
144     */
145    nir_def *rule_1 = nir_fmul(b, imm3(b, 2.0), nir_fmul(b, src, dst));
146    nir_def *rule_2 =
147       nir_fsub(b, imm3(b, 1.0), nir_fmul(b, imm3(b, 2.0), nir_fmul(b, nir_fsub(b, imm3(b, 1.0), src), nir_fsub(b, imm3(b, 1.0), dst))));
148    return nir_bcsel(b, nir_fge(b, imm3(b, 0.5), src), rule_1, rule_2);
149 }
150 
151 static nir_def *
blend_softlight(nir_builder * b,nir_def * src,nir_def * dst)152 blend_softlight(nir_builder *b, nir_def *src, nir_def *dst)
153 {
154    /* f(Cs,Cd) =
155     *   Cd-(1-2*Cs)*Cd*(1-Cd),
156     *     if Cs <= 0.5
157     *   Cd+(2*Cs-1)*Cd*((16*Cd-12)*Cd+3),
158     *     if Cs > 0.5 and Cd <= 0.25
159     *   Cd+(2*Cs-1)*(sqrt(Cd)-Cd),
160     *     if Cs > 0.5 and Cd > 0.25
161     *
162     * We can simplify this to
163     *
164     * f(Cs,Cd) = Cd+(2*Cs-1)*g(Cs,Cd) where
165     * g(Cs,Cd) = Cd*Cd-Cd             if Cs <= 0.5
166     *            Cd*((16*Cd-12)*Cd+3) if Cs > 0.5 and Cd <= 0.25
167     *            sqrt(Cd)-Cd,         otherwise
168     */
169    nir_def *factor_1 = nir_fmul(b, dst, nir_fsub(b, imm3(b, 1.0), dst));
170    nir_def *factor_2 =
171       nir_fmul(b, dst, nir_fadd(b, nir_fmul(b, nir_fsub(b, nir_fmul(b, imm3(b, 16.0), dst), imm3(b, 12.0)), dst), imm3(b, 3.0)));
172    nir_def *factor_3 = nir_fsub(b, nir_fsqrt(b, dst), dst);
173    nir_def *factor = nir_bcsel(b, nir_fge(b, imm3(b, 0.5), src), factor_1,
174                                    nir_bcsel(b, nir_fge(b, imm3(b, 0.25), dst), factor_2, factor_3));
175    return nir_fadd(b, dst, nir_fmul(b, nir_fsub(b, nir_fmul(b, imm3(b, 2.0), src), imm3(b, 1.0)), factor));
176 }
177 
178 static nir_def *
blend_difference(nir_builder * b,nir_def * src,nir_def * dst)179 blend_difference(nir_builder *b, nir_def *src, nir_def *dst)
180 {
181    return nir_fabs(b, nir_fsub(b, dst, src));
182 }
183 
184 static nir_def *
blend_exclusion(nir_builder * b,nir_def * src,nir_def * dst)185 blend_exclusion(nir_builder *b, nir_def *src, nir_def *dst)
186 {
187    return nir_fadd(b, src, nir_fsub(b, dst, nir_fmul(b, imm3(b, 2.0), nir_fmul(b, src, dst))));
188 }
189 
190 /* Return the minimum of a vec3's components */
191 static nir_def *
minv3(nir_builder * b,nir_def * v)192 minv3(nir_builder *b, nir_def *v)
193 {
194    return nir_fmin(b, nir_fmin(b, swizzle_x(b, v), swizzle_y(b, v)), swizzle_z(b, v));
195 }
196 
197 /* Return the maximum of a vec3's components */
198 static nir_def *
maxv3(nir_builder * b,nir_def * v)199 maxv3(nir_builder *b, nir_def *v)
200 {
201    return nir_fmax(b, nir_fmax(b, swizzle_x(b, v), swizzle_y(b, v)), swizzle_z(b, v));
202 }
203 
204 static nir_def *
lumv3(nir_builder * b,nir_def * c)205 lumv3(nir_builder *b, nir_def *c)
206 {
207    return nir_fdot(b, c, nir_imm_vec3(b, 0.30, 0.59, 0.11));
208 }
209 
210 static nir_def *
satv3(nir_builder * b,nir_def * c)211 satv3(nir_builder *b, nir_def *c)
212 {
213    return nir_fsub(b, maxv3(b, c), minv3(b, c));
214 }
215 
216 static nir_variable *
add_temp_var(nir_builder * b,char * name,const struct glsl_type * type)217 add_temp_var(nir_builder *b, char *name, const struct glsl_type *type)
218 {
219    nir_variable *var = rzalloc(b->shader, nir_variable);
220    var->type = type;
221    var->name = ralloc_strdup(var, name);
222    var->data.mode = nir_var_function_temp;
223    nir_function_impl_add_variable(b->impl, var);
224 
225   return var;
226 }
227 
228 /* Take the base RGB color <cbase> and override its luminosity with that
229  * of the RGB color <clum>.
230  *
231  * This follows the equations given in the ES 3.2 (June 15th, 2016)
232  * specification.  Revision 16 of GL_KHR_blend_equation_advanced and
233  * revision 9 of GL_NV_blend_equation_advanced specify a different set
234  * of equations.  Older revisions match ES 3.2's text, and dEQP expects
235  * the ES 3.2 rules implemented here.
236  */
237 static void
set_lum(nir_builder * b,nir_variable * color,nir_variable * cbase,nir_variable * clum)238 set_lum(nir_builder *b,
239         nir_variable *color,
240         nir_variable *cbase,
241         nir_variable *clum)
242 {
243    nir_def *cbase_def = nir_load_var(b, cbase);
244    nir_store_var(b, color, nir_fadd(b, cbase_def, nir_fsub(b, lumv3(b, nir_load_var(b, clum)), lumv3(b, cbase_def))), ~0);
245 
246    nir_variable *llum = add_temp_var(b, "__blend_lum", glsl_float_type());
247    nir_variable *mincol = add_temp_var(b, "__blend_mincol", glsl_float_type());
248    nir_variable *maxcol = add_temp_var(b, "__blend_maxcol", glsl_float_type());
249 
250    nir_def *color_def = nir_load_var(b, color);
251    nir_store_var(b, llum, lumv3(b, color_def), ~0);
252    nir_store_var(b, mincol, minv3(b, color_def), ~0);
253    nir_store_var(b, maxcol, maxv3(b, color_def), ~0);
254 
255    nir_def *mincol_def = nir_load_var(b, mincol);
256    nir_def *llum_def = nir_load_var(b, llum);
257    nir_if *nif = nir_push_if(b, nir_flt(b, mincol_def, imm1(b, 0.0)));
258 
259    /* Add then block */
260    nir_store_var(b, color, nir_fadd(b, llum_def, nir_fdiv(b, nir_fmul(b, nir_fsub(b, color_def, llum_def), llum_def), nir_fsub(b, llum_def, mincol_def))), ~0);
261 
262    /* Add else block */
263    nir_push_else(b, nif);
264    nir_def *maxcol_def = nir_load_var(b, maxcol);
265    nir_if *nif2 = nir_push_if(b, nir_flt(b, imm1(b, 1.0), maxcol_def));
266    nir_store_var(b, color, nir_fadd(b, llum_def, nir_fdiv(b, nir_fmul(b, nir_fsub(b, color_def, llum_def), nir_fsub(b, imm3(b, 1.0), llum_def)), nir_fsub(b, maxcol_def, llum_def))), ~0);
267    nir_pop_if(b, nif2);
268    nir_pop_if(b, nif);
269 }
270 
271 /* Take the base RGB color <cbase> and override its saturation with
272  * that of the RGB color <csat>.  The override the luminosity of the
273  * result with that of the RGB color <clum>.
274  */
275 static void
set_lum_sat(nir_builder * b,nir_variable * color,nir_variable * cbase,nir_variable * csat,nir_variable * clum)276 set_lum_sat(nir_builder *b,
277             nir_variable *color,
278             nir_variable *cbase,
279             nir_variable *csat,
280             nir_variable *clum)
281 {
282    nir_def *cbase_def = nir_load_var(b, cbase);
283    nir_def *csat_def = nir_load_var(b, csat);
284 
285    nir_variable *sbase = add_temp_var(b, "__blend_sbase", glsl_float_type());
286    nir_store_var(b, sbase, satv3(b, cbase_def), ~0);
287 
288    /* Equivalent (modulo rounding errors) to setting the
289     * smallest (R,G,B) component to 0, the largest to <ssat>,
290     * and interpolating the "middle" component based on its
291     * original value relative to the smallest/largest.
292     */
293    nir_def *sbase_def = nir_load_var(b, sbase);
294    nir_if *nif = nir_push_if(b, nir_flt(b, imm1(b, 0.0), sbase_def));
295    nir_def *ssat = satv3(b, csat_def);
296    nir_def *minbase = minv3(b, cbase_def);
297    nir_store_var(b, color, nir_fdiv(b, nir_fmul(b, nir_fsub(b, cbase_def, minbase), ssat), sbase_def), ~0);
298    nir_push_else(b, nif);
299    nir_store_var(b, color, imm3(b, 0.0), ~0);
300    nir_pop_if(b, nif);
301 
302    set_lum(b, color, color, clum);
303 }
304 
305 static nir_def *
is_mode(nir_builder * b,nir_variable * mode,enum gl_advanced_blend_mode q)306 is_mode(nir_builder *b, nir_variable *mode, enum gl_advanced_blend_mode q)
307 {
308    return nir_ieq_imm(b, nir_load_var(b, mode), (unsigned) q);
309 }
310 
311 static nir_variable *
calc_blend_result(nir_builder * b,nir_variable * mode,nir_variable * fb,nir_def * blend_src,GLbitfield blend_qualifiers)312 calc_blend_result(nir_builder *b,
313                   nir_variable *mode,
314                   nir_variable *fb,
315                   nir_def *blend_src,
316                   GLbitfield blend_qualifiers)
317 {
318    nir_variable *result = add_temp_var(b, "__blend_result", glsl_vec4_type());
319 
320    /* If we're not doing advanced blending, just write the original value. */
321    nir_if *if_blending = nir_push_if(b, is_mode(b, mode, BLEND_NONE));
322    nir_store_var(b, result, blend_src, ~0);
323 
324    nir_push_else(b, if_blending);
325 
326    /* (Rs', Gs', Bs') =
327     *   (0, 0, 0),              if As == 0
328     *   (Rs/As, Gs/As, Bs/As),  otherwise
329     */
330    nir_variable *src_rgb = add_temp_var(b, "__blend_src_rgb", glsl_vec_type(3));
331    nir_variable *src_alpha = add_temp_var(b, "__blend_src_a", glsl_float_type());
332 
333    /* (Rd', Gd', Bd') =
334     *   (0, 0, 0),              if Ad == 0
335     *   (Rd/Ad, Gd/Ad, Bd/Ad),  otherwise
336     */
337    nir_variable *dst_rgb = add_temp_var(b, "__blend_dst_rgb", glsl_vec_type(3));
338    nir_variable *dst_alpha = add_temp_var(b, "__blend_dst_a", glsl_float_type());
339 
340    nir_def *fb_def = nir_load_var(b, fb);
341    nir_store_var(b, dst_alpha, swizzle_w(b, fb_def), ~0);
342 
343    nir_def *dst_alpha_def = nir_load_var(b, dst_alpha);
344    nir_if *nif = nir_push_if(b, nir_feq(b, dst_alpha_def, imm1(b, 0.0)));
345    nir_store_var(b, dst_rgb, imm3(b, 0.0), ~0);
346    nir_push_else(b, nif);
347    nir_store_var(b, dst_rgb, nir_bcsel(b, nir_feq(b, nir_trim_vector(b, fb_def, 3), swizzle(b, fb_def, SWIZZLE_WWWW, 3)), imm3(b, 1.0), nir_fdiv(b, nir_trim_vector(b, fb_def, 3), dst_alpha_def)), ~0);
348    nir_pop_if(b, nif);
349 
350    nir_store_var(b, src_alpha, swizzle_w(b, blend_src), ~0);
351    nir_def *src_alpha_def = nir_load_var(b, src_alpha);
352    nif = nir_push_if(b, nir_feq(b, src_alpha_def, imm1(b, 0.0)));
353    nir_store_var(b, src_rgb, imm3(b, 0.0), ~0);
354    nir_push_else(b, nif);
355    nir_store_var(b, src_rgb, nir_bcsel(b, nir_feq(b, nir_trim_vector(b, blend_src, 3), swizzle(b, blend_src, SWIZZLE_WWWW, 3)), imm3(b, 1.0), nir_fdiv(b, nir_trim_vector(b, blend_src, 3), src_alpha_def)), ~0);
356    nir_pop_if(b, nif);
357 
358    nir_variable *factor = add_temp_var(b, "__blend_factor", glsl_vec_type(3));
359 
360    nir_def *src_rgb_def = nir_load_var(b, src_rgb);
361    nir_def *dst_rgb_def = nir_load_var(b, dst_rgb);
362 
363    unsigned choices = blend_qualifiers;
364    while (choices) {
365       enum gl_advanced_blend_mode choice = (enum gl_advanced_blend_mode)u_bit_scan(&choices);
366 
367       nir_if *iff = nir_push_if(b, is_mode(b, mode, choice));
368       nir_def *val = NULL;
369 
370       switch (choice) {
371       case BLEND_MULTIPLY:
372          val = blend_multiply(b, src_rgb_def, dst_rgb_def);
373          break;
374       case BLEND_SCREEN:
375          val = blend_screen(b, src_rgb_def, dst_rgb_def);
376          break;
377       case BLEND_OVERLAY:
378          val = blend_overlay(b, src_rgb_def, dst_rgb_def);
379          break;
380       case BLEND_DARKEN:
381          val = blend_darken(b, src_rgb_def, dst_rgb_def);
382          break;
383       case BLEND_LIGHTEN:
384          val = blend_lighten(b, src_rgb_def, dst_rgb_def);
385          break;
386       case BLEND_COLORDODGE:
387          val = blend_colordodge(b, src_rgb_def, dst_rgb_def);
388          break;
389       case BLEND_COLORBURN:
390          val = blend_colorburn(b, src_rgb_def, dst_rgb_def);
391          break;
392       case BLEND_HARDLIGHT:
393          val = blend_hardlight(b, src_rgb_def, dst_rgb_def);
394          break;
395       case BLEND_SOFTLIGHT:
396          val = blend_softlight(b, src_rgb_def, dst_rgb_def);
397          break;
398       case BLEND_DIFFERENCE:
399          val = blend_difference(b, src_rgb_def, dst_rgb_def);
400          break;
401       case BLEND_EXCLUSION:
402          val = blend_exclusion(b, src_rgb_def, dst_rgb_def);
403          break;
404       case BLEND_HSL_HUE:
405          set_lum_sat(b, factor, src_rgb, dst_rgb, dst_rgb);
406          break;
407       case BLEND_HSL_SATURATION:
408          set_lum_sat(b, factor, dst_rgb, src_rgb, dst_rgb);
409          break;
410       case BLEND_HSL_COLOR:
411          set_lum(b, factor, src_rgb, dst_rgb);
412          break;
413       case BLEND_HSL_LUMINOSITY:
414          set_lum(b, factor, dst_rgb, src_rgb);
415          break;
416       case BLEND_NONE:
417          unreachable("not real cases");
418       }
419 
420       if (val)
421          nir_store_var(b, factor, val, ~0);
422 
423       nir_push_else(b, iff);
424    }
425 
426    /* reset cursor to the outtermost if-statements else block */
427    b->cursor = nir_after_block(nir_if_last_else_block(if_blending));
428 
429    /* p0(As,Ad) = As*Ad
430     * p1(As,Ad) = As*(1-Ad)
431     * p2(As,Ad) = Ad*(1-As)
432     */
433    nir_variable *p0 = add_temp_var(b, "__blend_p0", glsl_float_type());
434    nir_variable *p1 = add_temp_var(b, "__blend_p1", glsl_float_type());
435    nir_variable *p2 = add_temp_var(b, "__blend_p2", glsl_float_type());
436 
437    nir_store_var(b, p0, nir_fmul(b, src_alpha_def, dst_alpha_def), ~0);
438    nir_store_var(b, p1, nir_fmul(b, src_alpha_def, nir_fsub(b, imm1(b, 1.0), dst_alpha_def)), ~0);
439    nir_store_var(b, p2, nir_fmul(b, dst_alpha_def, nir_fsub(b, imm1(b, 1.0), src_alpha_def)), ~0);
440 
441    /* R = f(Rs',Rd')*p0(As,Ad) + Y*Rs'*p1(As,Ad) + Z*Rd'*p2(As,Ad)
442     * G = f(Gs',Gd')*p0(As,Ad) + Y*Gs'*p1(As,Ad) + Z*Gd'*p2(As,Ad)
443     * B = f(Bs',Bd')*p0(As,Ad) + Y*Bs'*p1(As,Ad) + Z*Bd'*p2(As,Ad)
444     * A =          X*p0(As,Ad) +     Y*p1(As,Ad) +     Z*p2(As,Ad)
445     *
446     * <X, Y, Z> is always <1, 1, 1>, so we can ignore it.
447     *
448     * In vector form, this is:
449     * RGB = factor * p0 + Cs * p1 + Cd * p2
450     *   A = p0 + p1 + p2
451     */
452    src_rgb_def = nir_load_var(b, src_rgb);
453    dst_rgb_def = nir_load_var(b, dst_rgb);
454    /* WRITEMASK_XYZ */
455    nir_store_var(b, result, nir_pad_vec4(b, nir_fadd(b, nir_fadd(b, nir_fmul(b, nir_load_var(b, factor), nir_load_var(b, p0)), nir_fmul(b, src_rgb_def, nir_load_var(b, p1))), nir_fmul(b, dst_rgb_def, nir_load_var(b, p2)))), 0x7);
456    /* WRITEMASK_W */
457    nir_def *val = nir_fadd(b, nir_fadd(b, nir_load_var(b, p0), nir_load_var(b, p1)), nir_load_var(b, p2));
458    nir_store_var(b, result, nir_vec4(b, val, val, val, val), 0x8);
459 
460    /* reset cursor to the end of the main function */
461    b->cursor = nir_after_impl(b->impl);
462 
463    return result;
464 }
465 
466 /**
467  * Dereference var, or var[0] if it's an array.
468  */
469 static nir_def *
load_output(nir_builder * b,nir_variable * var)470 load_output(nir_builder *b, nir_variable *var)
471 {
472    nir_def *var_def;
473    if (glsl_type_is_array(var->type)) {
474       var_def = nir_load_array_var_imm(b, var, 0);
475    } else {
476       var_def = nir_load_var(b, var);
477    }
478 
479    return var_def;
480 }
481 
482 bool
gl_nir_lower_blend_equation_advanced(nir_shader * sh,bool coherent)483 gl_nir_lower_blend_equation_advanced(nir_shader *sh, bool coherent)
484 {
485    assert(sh->info.stage == MESA_SHADER_FRAGMENT);
486 
487    /* All functions should have been inlined at this point */
488    assert(exec_list_length(&sh->functions) == 1);
489    nir_function_impl *impl = nir_shader_get_entrypoint(sh);
490 
491    if (sh->info.fs.advanced_blend_modes == 0) {
492       nir_metadata_preserve(impl, nir_metadata_all);
493       return false;
494    }
495 
496    sh->info.fs.uses_sample_shading = true;
497 
498    nir_builder b = nir_builder_at(nir_after_impl(impl));
499 
500    nir_variable *fb = nir_variable_create(sh, nir_var_shader_out,
501                                           glsl_vec4_type(),
502                                           "__blend_fb_fetch");
503    fb->data.location = -1; /* We will set the location at the end of this pass */
504    fb->data.read_only = 1;
505    fb->data.fb_fetch_output = 1;
506    if (coherent)
507       fb->data.access = ACCESS_COHERENT;
508    fb->data.how_declared = nir_var_hidden;
509 
510    nir_variable *mode = nir_variable_create(sh, nir_var_uniform,
511                                             glsl_uint_type(),
512                                             "gl_AdvancedBlendModeMESA");
513    mode->data.how_declared = nir_var_hidden;
514    mode->state_slots = rzalloc_array(mode, nir_state_slot, 1);
515    mode->num_state_slots = 1;
516    mode->state_slots[0].tokens[0] = STATE_ADVANCED_BLENDING_MODE;
517 
518    /* Gather any output variables referring to render target 0.
519     *
520     * ARB_enhanced_layouts irritatingly allows the shader to specify
521     * multiple output variables for the same render target, each of
522     * which writes a subset of the components, starting at location_frac.
523     * The variables can't overlap, thankfully.
524     */
525    nir_variable *outputs[4] = { NULL, NULL, NULL, NULL };
526    nir_foreach_variable_with_modes(var, sh, nir_var_shader_out) {
527       if (var->data.location == FRAG_RESULT_DATA0 ||
528           var->data.location == FRAG_RESULT_COLOR) {
529          const int components =
530             glsl_get_vector_elements(glsl_without_array(var->type));
531 
532          for (int i = 0; i < components; i++) {
533             if (outputs[var->data.location_frac + i] == NULL)
534                outputs[var->data.location_frac + i] = var;
535          }
536       }
537    }
538 
539    /* Combine values written to outputs into a single RGBA blend source.
540     * We assign <0, 0, 0, 1> to any components with no corresponding output.
541     */
542    nir_def *blend_source;
543    if (outputs[0] &&
544        glsl_get_vector_elements(glsl_without_array(outputs[0]->type)) == 4) {
545       blend_source = load_output(&b, outputs[0]);
546    } else {
547       nir_def *blend_comps[4];
548       for (int i = 0; i < 4; i++) {
549          nir_variable *var = outputs[i];
550          if (var) {
551             blend_comps[i] = swizzle(&b, load_output(&b, outputs[i]),
552                                      i - outputs[i]->data.location_frac, 1);
553          } else {
554             blend_comps[i] = nir_imm_float(&b, i < 3 ? 0.0f : 1.0f);
555          }
556       }
557 
558       blend_source = nir_vec(&b, blend_comps, 4);
559    }
560 
561    nir_variable *result_dest =
562       calc_blend_result(&b, mode, fb, blend_source,
563                         sh->info.fs.advanced_blend_modes);
564 
565    /* Copy the result back to the original values. */
566    for (int i = 0; i < 4; i++) {
567       if (!outputs[i])
568          continue;
569 
570       if (glsl_type_is_array(outputs[i]->type)) {
571          nir_store_array_var_imm(&b, outputs[i], 0, nir_load_var(&b, result_dest), 1 << i);
572       } else {
573          nir_def *val = swizzle(&b, nir_load_var(&b, result_dest), i, 1);
574          nir_store_var(&b, outputs[i], nir_vec4(&b, val, val, val, val), 1 << i);
575       }
576    }
577 
578    nir_metadata_preserve(impl, nir_metadata_none);
579 
580    /* Remove any dead writes before assigning location to __blend_fb_fetch
581     * otherwise they will be unable to be removed.
582     */
583    NIR_PASS(_, sh, nir_split_var_copies);
584    NIR_PASS(_, sh, nir_opt_dead_write_vars);
585 
586    nir_foreach_variable_with_modes(var, sh, nir_var_shader_out) {
587       if (strcmp(var->name, "__blend_fb_fetch") == 0) {
588          var->data.location = FRAG_RESULT_DATA0;
589          break;
590       }
591    }
592 
593    nir_validate_shader(sh, "after lower blend equation advanced");
594    return true;
595 }
596