• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2007 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * quad blending
30  * \author Brian Paul
31  */
32 
33 #include "pipe/p_defines.h"
34 #include "util/u_math.h"
35 #include "util/u_memory.h"
36 #include "util/u_format.h"
37 #include "util/u_dual_blend.h"
38 #include "sp_context.h"
39 #include "sp_state.h"
40 #include "sp_quad.h"
41 #include "sp_tile_cache.h"
42 #include "sp_quad_pipe.h"
43 
44 
45 enum format
46 {
47    RGBA,
48    RGB,
49    LUMINANCE,
50    LUMINANCE_ALPHA,
51    INTENSITY
52 };
53 
54 
55 /** Subclass of quad_stage */
56 struct blend_quad_stage
57 {
58    struct quad_stage base;
59    boolean clamp[PIPE_MAX_COLOR_BUFS];  /**< clamp colors to [0,1]? */
60    enum format base_format[PIPE_MAX_COLOR_BUFS];
61    enum util_format_type format_type[PIPE_MAX_COLOR_BUFS];
62 };
63 
64 
65 /** cast wrapper */
66 static inline struct blend_quad_stage *
blend_quad_stage(struct quad_stage * stage)67 blend_quad_stage(struct quad_stage *stage)
68 {
69    return (struct blend_quad_stage *) stage;
70 }
71 
72 
73 #define VEC4_COPY(DST, SRC) \
74 do { \
75     DST[0] = SRC[0]; \
76     DST[1] = SRC[1]; \
77     DST[2] = SRC[2]; \
78     DST[3] = SRC[3]; \
79 } while(0)
80 
81 #define VEC4_SCALAR(DST, SRC) \
82 do { \
83     DST[0] = SRC; \
84     DST[1] = SRC; \
85     DST[2] = SRC; \
86     DST[3] = SRC; \
87 } while(0)
88 
89 #define VEC4_ADD(R, A, B) \
90 do { \
91    R[0] = A[0] + B[0]; \
92    R[1] = A[1] + B[1]; \
93    R[2] = A[2] + B[2]; \
94    R[3] = A[3] + B[3]; \
95 } while (0)
96 
97 #define VEC4_SUB(R, A, B) \
98 do { \
99    R[0] = A[0] - B[0]; \
100    R[1] = A[1] - B[1]; \
101    R[2] = A[2] - B[2]; \
102    R[3] = A[3] - B[3]; \
103 } while (0)
104 
105 /** Add and limit result to ceiling of 1.0 */
106 #define VEC4_ADD_SAT(R, A, B) \
107 do { \
108    R[0] = A[0] + B[0];  if (R[0] > 1.0f) R[0] = 1.0f; \
109    R[1] = A[1] + B[1];  if (R[1] > 1.0f) R[1] = 1.0f; \
110    R[2] = A[2] + B[2];  if (R[2] > 1.0f) R[2] = 1.0f; \
111    R[3] = A[3] + B[3];  if (R[3] > 1.0f) R[3] = 1.0f; \
112 } while (0)
113 
114 /** Subtract and limit result to floor of 0.0 */
115 #define VEC4_SUB_SAT(R, A, B) \
116 do { \
117    R[0] = A[0] - B[0];  if (R[0] < 0.0f) R[0] = 0.0f; \
118    R[1] = A[1] - B[1];  if (R[1] < 0.0f) R[1] = 0.0f; \
119    R[2] = A[2] - B[2];  if (R[2] < 0.0f) R[2] = 0.0f; \
120    R[3] = A[3] - B[3];  if (R[3] < 0.0f) R[3] = 0.0f; \
121 } while (0)
122 
123 #define VEC4_MUL(R, A, B) \
124 do { \
125    R[0] = A[0] * B[0]; \
126    R[1] = A[1] * B[1]; \
127    R[2] = A[2] * B[2]; \
128    R[3] = A[3] * B[3]; \
129 } while (0)
130 
131 #define VEC4_MIN(R, A, B) \
132 do { \
133    R[0] = (A[0] < B[0]) ? A[0] : B[0]; \
134    R[1] = (A[1] < B[1]) ? A[1] : B[1]; \
135    R[2] = (A[2] < B[2]) ? A[2] : B[2]; \
136    R[3] = (A[3] < B[3]) ? A[3] : B[3]; \
137 } while (0)
138 
139 #define VEC4_MAX(R, A, B) \
140 do { \
141    R[0] = (A[0] > B[0]) ? A[0] : B[0]; \
142    R[1] = (A[1] > B[1]) ? A[1] : B[1]; \
143    R[2] = (A[2] > B[2]) ? A[2] : B[2]; \
144    R[3] = (A[3] > B[3]) ? A[3] : B[3]; \
145 } while (0)
146 
147 
148 
149 static void
logicop_quad(struct quad_stage * qs,float (* quadColor)[4],float (* dest)[4])150 logicop_quad(struct quad_stage *qs,
151              float (*quadColor)[4],
152              float (*dest)[4])
153 {
154    struct softpipe_context *softpipe = qs->softpipe;
155    ubyte src[4][4], dst[4][4], res[4][4];
156    uint *src4 = (uint *) src;
157    uint *dst4 = (uint *) dst;
158    uint *res4 = (uint *) res;
159    uint j;
160 
161 
162    /* convert to ubyte */
163    for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
164       dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
165       dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
166       dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
167       dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
168 
169       src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
170       src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
171       src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
172       src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
173    }
174 
175    switch (softpipe->blend->logicop_func) {
176    case PIPE_LOGICOP_CLEAR:
177       for (j = 0; j < 4; j++)
178          res4[j] = 0;
179       break;
180    case PIPE_LOGICOP_NOR:
181       for (j = 0; j < 4; j++)
182          res4[j] = ~(src4[j] | dst4[j]);
183       break;
184    case PIPE_LOGICOP_AND_INVERTED:
185       for (j = 0; j < 4; j++)
186          res4[j] = ~src4[j] & dst4[j];
187       break;
188    case PIPE_LOGICOP_COPY_INVERTED:
189       for (j = 0; j < 4; j++)
190          res4[j] = ~src4[j];
191       break;
192    case PIPE_LOGICOP_AND_REVERSE:
193       for (j = 0; j < 4; j++)
194          res4[j] = src4[j] & ~dst4[j];
195       break;
196    case PIPE_LOGICOP_INVERT:
197       for (j = 0; j < 4; j++)
198          res4[j] = ~dst4[j];
199       break;
200    case PIPE_LOGICOP_XOR:
201       for (j = 0; j < 4; j++)
202          res4[j] = dst4[j] ^ src4[j];
203       break;
204    case PIPE_LOGICOP_NAND:
205       for (j = 0; j < 4; j++)
206          res4[j] = ~(src4[j] & dst4[j]);
207       break;
208    case PIPE_LOGICOP_AND:
209       for (j = 0; j < 4; j++)
210          res4[j] = src4[j] & dst4[j];
211       break;
212    case PIPE_LOGICOP_EQUIV:
213       for (j = 0; j < 4; j++)
214          res4[j] = ~(src4[j] ^ dst4[j]);
215       break;
216    case PIPE_LOGICOP_NOOP:
217       for (j = 0; j < 4; j++)
218          res4[j] = dst4[j];
219       break;
220    case PIPE_LOGICOP_OR_INVERTED:
221       for (j = 0; j < 4; j++)
222          res4[j] = ~src4[j] | dst4[j];
223       break;
224    case PIPE_LOGICOP_COPY:
225       for (j = 0; j < 4; j++)
226          res4[j] = src4[j];
227       break;
228    case PIPE_LOGICOP_OR_REVERSE:
229       for (j = 0; j < 4; j++)
230          res4[j] = src4[j] | ~dst4[j];
231       break;
232    case PIPE_LOGICOP_OR:
233       for (j = 0; j < 4; j++)
234          res4[j] = src4[j] | dst4[j];
235       break;
236    case PIPE_LOGICOP_SET:
237       for (j = 0; j < 4; j++)
238          res4[j] = ~0;
239       break;
240    default:
241       assert(0 && "invalid logicop mode");
242    }
243 
244    for (j = 0; j < 4; j++) {
245       quadColor[j][0] = ubyte_to_float(res[j][0]);
246       quadColor[j][1] = ubyte_to_float(res[j][1]);
247       quadColor[j][2] = ubyte_to_float(res[j][2]);
248       quadColor[j][3] = ubyte_to_float(res[j][3]);
249    }
250 }
251 
252 
253 
254 /**
255  * Do blending for a 2x2 quad for one color buffer.
256  * \param quadColor  the incoming quad colors
257  * \param dest  the destination/framebuffer quad colors
258  * \param const_blend_color  the constant blend color
259  * \param blend_index  which set of blending terms to use
260  */
261 static void
blend_quad(struct quad_stage * qs,float (* quadColor)[4],float (* quadColor2)[4],float (* dest)[4],const float const_blend_color[4],unsigned blend_index)262 blend_quad(struct quad_stage *qs,
263            float (*quadColor)[4],
264            float (*quadColor2)[4],
265            float (*dest)[4],
266            const float const_blend_color[4],
267            unsigned blend_index)
268 {
269    static const float zero[4] = { 0, 0, 0, 0 };
270    static const float one[4] = { 1, 1, 1, 1 };
271    struct softpipe_context *softpipe = qs->softpipe;
272    float source[4][TGSI_QUAD_SIZE] = { { 0 } };
273    float blend_dest[4][TGSI_QUAD_SIZE];
274 
275    /*
276     * Compute src/first term RGB
277     */
278    switch (softpipe->blend->rt[blend_index].rgb_src_factor) {
279    case PIPE_BLENDFACTOR_ONE:
280       VEC4_COPY(source[0], quadColor[0]); /* R */
281       VEC4_COPY(source[1], quadColor[1]); /* G */
282       VEC4_COPY(source[2], quadColor[2]); /* B */
283       break;
284    case PIPE_BLENDFACTOR_SRC_COLOR:
285       VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
286       VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
287       VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
288       break;
289    case PIPE_BLENDFACTOR_SRC_ALPHA:
290       {
291          const float *alpha = quadColor[3];
292          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
293          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
294          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
295       }
296       break;
297    case PIPE_BLENDFACTOR_DST_COLOR:
298       VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
299       VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
300       VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
301       break;
302    case PIPE_BLENDFACTOR_DST_ALPHA:
303       {
304          const float *alpha = dest[3];
305          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
306          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
307          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
308       }
309       break;
310    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
311       {
312          const float *alpha = quadColor[3];
313          float diff[4], temp[4];
314          VEC4_SUB(diff, one, dest[3]);
315          VEC4_MIN(temp, alpha, diff);
316          VEC4_MUL(source[0], quadColor[0], temp); /* R */
317          VEC4_MUL(source[1], quadColor[1], temp); /* G */
318          VEC4_MUL(source[2], quadColor[2], temp); /* B */
319       }
320       break;
321    case PIPE_BLENDFACTOR_CONST_COLOR:
322       {
323          float comp[4];
324          VEC4_SCALAR(comp, const_blend_color[0]); /* R */
325          VEC4_MUL(source[0], quadColor[0], comp); /* R */
326          VEC4_SCALAR(comp, const_blend_color[1]); /* G */
327          VEC4_MUL(source[1], quadColor[1], comp); /* G */
328          VEC4_SCALAR(comp, const_blend_color[2]); /* B */
329          VEC4_MUL(source[2], quadColor[2], comp); /* B */
330       }
331       break;
332    case PIPE_BLENDFACTOR_CONST_ALPHA:
333       {
334          float alpha[4];
335          VEC4_SCALAR(alpha, const_blend_color[3]);
336          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
337          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
338          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
339       }
340       break;
341    case PIPE_BLENDFACTOR_SRC1_COLOR:
342       VEC4_MUL(source[0], quadColor[0], quadColor2[0]); /* R */
343       VEC4_MUL(source[1], quadColor[1], quadColor2[1]); /* G */
344       VEC4_MUL(source[2], quadColor[2], quadColor2[2]); /* B */
345       break;
346    case PIPE_BLENDFACTOR_SRC1_ALPHA:
347       {
348          const float *alpha = quadColor2[3];
349          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
350          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
351          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
352       }
353       break;
354    case PIPE_BLENDFACTOR_ZERO:
355       VEC4_COPY(source[0], zero); /* R */
356       VEC4_COPY(source[1], zero); /* G */
357       VEC4_COPY(source[2], zero); /* B */
358       break;
359    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
360       {
361          float inv_comp[4];
362          VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
363          VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
364          VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
365          VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
366          VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
367          VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
368       }
369       break;
370    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
371       {
372          float inv_alpha[4];
373          VEC4_SUB(inv_alpha, one, quadColor[3]);
374          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
375          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
376          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
377       }
378       break;
379    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
380       {
381          float inv_alpha[4];
382          VEC4_SUB(inv_alpha, one, dest[3]);
383          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
384          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
385          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
386       }
387       break;
388    case PIPE_BLENDFACTOR_INV_DST_COLOR:
389       {
390          float inv_comp[4];
391          VEC4_SUB(inv_comp, one, dest[0]); /* R */
392          VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
393          VEC4_SUB(inv_comp, one, dest[1]); /* G */
394          VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
395          VEC4_SUB(inv_comp, one, dest[2]); /* B */
396          VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
397       }
398       break;
399    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
400       {
401          float inv_comp[4];
402          /* R */
403          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
404          VEC4_MUL(source[0], quadColor[0], inv_comp);
405          /* G */
406          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
407          VEC4_MUL(source[1], quadColor[1], inv_comp);
408          /* B */
409          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
410          VEC4_MUL(source[2], quadColor[2], inv_comp);
411       }
412       break;
413    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
414       {
415          float inv_alpha[4];
416          VEC4_SCALAR(inv_alpha, 1.0f - const_blend_color[3]);
417          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
418          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
419          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
420       }
421       break;
422    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
423       {
424          float inv_comp[4];
425          VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
426          VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
427          VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
428          VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
429          VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
430          VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
431       }
432       break;
433    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
434       {
435          float inv_alpha[4];
436          VEC4_SUB(inv_alpha, one, quadColor2[3]);
437          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
438          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
439          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
440       }
441       break;
442    default:
443       assert(0 && "invalid rgb src factor");
444    }
445 
446    /*
447     * Compute src/first term A
448     */
449    switch (softpipe->blend->rt[blend_index].alpha_src_factor) {
450    case PIPE_BLENDFACTOR_ONE:
451       VEC4_COPY(source[3], quadColor[3]); /* A */
452       break;
453    case PIPE_BLENDFACTOR_SRC_COLOR:
454       /* fall-through */
455    case PIPE_BLENDFACTOR_SRC_ALPHA:
456       {
457          const float *alpha = quadColor[3];
458          VEC4_MUL(source[3], quadColor[3], alpha); /* A */
459       }
460       break;
461    case PIPE_BLENDFACTOR_DST_COLOR:
462       /* fall-through */
463    case PIPE_BLENDFACTOR_DST_ALPHA:
464       VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
465       break;
466    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
467       /* multiply alpha by 1.0 */
468       VEC4_COPY(source[3], quadColor[3]); /* A */
469       break;
470    case PIPE_BLENDFACTOR_CONST_COLOR:
471       /* fall-through */
472    case PIPE_BLENDFACTOR_CONST_ALPHA:
473       {
474          float comp[4];
475          VEC4_SCALAR(comp, const_blend_color[3]); /* A */
476          VEC4_MUL(source[3], quadColor[3], comp); /* A */
477       }
478       break;
479    case PIPE_BLENDFACTOR_ZERO:
480       VEC4_COPY(source[3], zero); /* A */
481       break;
482    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
483       /* fall-through */
484    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
485       {
486          float inv_alpha[4];
487          VEC4_SUB(inv_alpha, one, quadColor[3]);
488          VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
489       }
490       break;
491    case PIPE_BLENDFACTOR_INV_DST_COLOR:
492       /* fall-through */
493    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
494       {
495          float inv_alpha[4];
496          VEC4_SUB(inv_alpha, one, dest[3]);
497          VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
498       }
499       break;
500    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
501       /* fall-through */
502    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
503       {
504          float inv_comp[4];
505          /* A */
506          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
507          VEC4_MUL(source[3], quadColor[3], inv_comp);
508       }
509       break;
510    case PIPE_BLENDFACTOR_SRC1_COLOR:
511       /* fall-through */
512    case PIPE_BLENDFACTOR_SRC1_ALPHA:
513       {
514          const float *alpha = quadColor2[3];
515          VEC4_MUL(source[3], quadColor[3], alpha); /* A */
516       }
517       break;
518    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
519       /* fall-through */
520    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
521       {
522          float inv_alpha[4];
523          VEC4_SUB(inv_alpha, one, quadColor2[3]);
524          VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
525       }
526       break;
527    default:
528       assert(0 && "invalid alpha src factor");
529    }
530 
531    /* Save the original dest for use in masking */
532    VEC4_COPY(blend_dest[0], dest[0]);
533    VEC4_COPY(blend_dest[1], dest[1]);
534    VEC4_COPY(blend_dest[2], dest[2]);
535    VEC4_COPY(blend_dest[3], dest[3]);
536 
537 
538    /*
539     * Compute blend_dest/second term RGB
540     */
541    switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
542    case PIPE_BLENDFACTOR_ONE:
543       /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
544       break;
545    case PIPE_BLENDFACTOR_SRC_COLOR:
546       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */
547       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */
548       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */
549       break;
550    case PIPE_BLENDFACTOR_SRC_ALPHA:
551       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */
552       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */
553       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */
554       break;
555    case PIPE_BLENDFACTOR_DST_ALPHA:
556       VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */
557       VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */
558       VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */
559       break;
560    case PIPE_BLENDFACTOR_DST_COLOR:
561       VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */
562       VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */
563       VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */
564       break;
565    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
566       {
567          const float *alpha = quadColor[3];
568          float diff[4], temp[4];
569          VEC4_SUB(diff, one, blend_dest[3]);
570          VEC4_MIN(temp, alpha, diff);
571          VEC4_MUL(blend_dest[0], blend_dest[0], temp); /* R */
572          VEC4_MUL(blend_dest[1], blend_dest[1], temp); /* G */
573          VEC4_MUL(blend_dest[2], blend_dest[2], temp); /* B */
574       }
575       break;
576    case PIPE_BLENDFACTOR_CONST_COLOR:
577       {
578          float comp[4];
579          VEC4_SCALAR(comp, const_blend_color[0]); /* R */
580          VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
581          VEC4_SCALAR(comp, const_blend_color[1]); /* G */
582          VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
583          VEC4_SCALAR(comp, const_blend_color[2]); /* B */
584          VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
585       }
586       break;
587    case PIPE_BLENDFACTOR_CONST_ALPHA:
588       {
589          float comp[4];
590          VEC4_SCALAR(comp, const_blend_color[3]); /* A */
591          VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
592          VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
593          VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
594       }
595       break;
596    case PIPE_BLENDFACTOR_ZERO:
597       VEC4_COPY(blend_dest[0], zero); /* R */
598       VEC4_COPY(blend_dest[1], zero); /* G */
599       VEC4_COPY(blend_dest[2], zero); /* B */
600       break;
601    case PIPE_BLENDFACTOR_SRC1_COLOR:
602       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[0]); /* R */
603       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[1]); /* G */
604       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[2]); /* B */
605       break;
606    case PIPE_BLENDFACTOR_SRC1_ALPHA:
607       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[3]); /* R * A */
608       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[3]); /* G * A */
609       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[3]); /* B * A */
610       break;
611    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
612       {
613          float inv_comp[4];
614          VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
615          VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
616          VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
617          VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
618          VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
619          VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
620       }
621       break;
622    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
623       {
624          float one_minus_alpha[TGSI_QUAD_SIZE];
625          VEC4_SUB(one_minus_alpha, one, quadColor[3]);
626          VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
627          VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
628          VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
629       }
630       break;
631    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
632       {
633          float inv_comp[4];
634          VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
635          VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
636          VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
637          VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
638       }
639       break;
640    case PIPE_BLENDFACTOR_INV_DST_COLOR:
641       {
642          float inv_comp[4];
643          VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */
644          VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */
645          VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */
646          VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */
647          VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */
648          VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */
649       }
650       break;
651    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
652       {
653          float inv_comp[4];
654          /* R */
655          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
656          VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
657          /* G */
658          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
659          VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
660          /* B */
661          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
662          VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
663       }
664       break;
665    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
666       {
667          float inv_comp[4];
668          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
669          VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
670          VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
671          VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
672       }
673       break;
674    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
675       {
676          float inv_comp[4];
677          VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
678          VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
679          VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
680          VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
681          VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
682          VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
683       }
684       break;
685    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
686       {
687          float one_minus_alpha[TGSI_QUAD_SIZE];
688          VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
689          VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
690          VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
691          VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
692       }
693       break;
694    default:
695       assert(0 && "invalid rgb dst factor");
696    }
697 
698    /*
699     * Compute blend_dest/second term A
700     */
701    switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
702    case PIPE_BLENDFACTOR_ONE:
703       /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
704       break;
705    case PIPE_BLENDFACTOR_SRC_COLOR:
706       /* fall-through */
707    case PIPE_BLENDFACTOR_SRC_ALPHA:
708       VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */
709       break;
710    case PIPE_BLENDFACTOR_DST_COLOR:
711       /* fall-through */
712    case PIPE_BLENDFACTOR_DST_ALPHA:
713       VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */
714       break;
715    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
716       /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
717       break;
718    case PIPE_BLENDFACTOR_CONST_COLOR:
719       /* fall-through */
720    case PIPE_BLENDFACTOR_CONST_ALPHA:
721       {
722          float comp[4];
723          VEC4_SCALAR(comp, const_blend_color[3]); /* A */
724          VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */
725       }
726       break;
727    case PIPE_BLENDFACTOR_ZERO:
728       VEC4_COPY(blend_dest[3], zero); /* A */
729       break;
730    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
731       /* fall-through */
732    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
733       {
734          float one_minus_alpha[TGSI_QUAD_SIZE];
735          VEC4_SUB(one_minus_alpha, one, quadColor[3]);
736          VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
737       }
738       break;
739    case PIPE_BLENDFACTOR_INV_DST_COLOR:
740       /* fall-through */
741    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
742       {
743          float inv_comp[4];
744          VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
745          VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */
746       }
747       break;
748    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
749       /* fall-through */
750    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
751       {
752          float inv_comp[4];
753          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
754          VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp);
755       }
756       break;
757    case PIPE_BLENDFACTOR_SRC1_COLOR:
758       /* fall-through */
759    case PIPE_BLENDFACTOR_SRC1_ALPHA:
760       VEC4_MUL(blend_dest[3], blend_dest[3], quadColor2[3]); /* A * A */
761       break;
762    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
763       /* fall-through */
764    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
765       {
766          float one_minus_alpha[TGSI_QUAD_SIZE];
767          VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
768          VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
769       }
770       break;
771    default:
772       assert(0 && "invalid alpha dst factor");
773    }
774 
775    /*
776     * Combine RGB terms
777     */
778    switch (softpipe->blend->rt[blend_index].rgb_func) {
779    case PIPE_BLEND_ADD:
780       VEC4_ADD(quadColor[0], source[0], blend_dest[0]); /* R */
781       VEC4_ADD(quadColor[1], source[1], blend_dest[1]); /* G */
782       VEC4_ADD(quadColor[2], source[2], blend_dest[2]); /* B */
783       break;
784    case PIPE_BLEND_SUBTRACT:
785       VEC4_SUB(quadColor[0], source[0], blend_dest[0]); /* R */
786       VEC4_SUB(quadColor[1], source[1], blend_dest[1]); /* G */
787       VEC4_SUB(quadColor[2], source[2], blend_dest[2]); /* B */
788       break;
789    case PIPE_BLEND_REVERSE_SUBTRACT:
790       VEC4_SUB(quadColor[0], blend_dest[0], source[0]); /* R */
791       VEC4_SUB(quadColor[1], blend_dest[1], source[1]); /* G */
792       VEC4_SUB(quadColor[2], blend_dest[2], source[2]); /* B */
793       break;
794    case PIPE_BLEND_MIN:
795       VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */
796       VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */
797       VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */
798       break;
799    case PIPE_BLEND_MAX:
800       VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */
801       VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */
802       VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */
803       break;
804    default:
805       assert(0 && "invalid rgb blend func");
806    }
807 
808    /*
809     * Combine A terms
810     */
811    switch (softpipe->blend->rt[blend_index].alpha_func) {
812    case PIPE_BLEND_ADD:
813       VEC4_ADD(quadColor[3], source[3], blend_dest[3]); /* A */
814       break;
815    case PIPE_BLEND_SUBTRACT:
816       VEC4_SUB(quadColor[3], source[3], blend_dest[3]); /* A */
817       break;
818    case PIPE_BLEND_REVERSE_SUBTRACT:
819       VEC4_SUB(quadColor[3], blend_dest[3], source[3]); /* A */
820       break;
821    case PIPE_BLEND_MIN:
822       VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */
823       break;
824    case PIPE_BLEND_MAX:
825       VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */
826       break;
827    default:
828       assert(0 && "invalid alpha blend func");
829    }
830 }
831 
832 static void
colormask_quad(unsigned colormask,float (* quadColor)[4],float (* dest)[4])833 colormask_quad(unsigned colormask,
834                float (*quadColor)[4],
835                float (*dest)[4])
836 {
837    /* R */
838    if (!(colormask & PIPE_MASK_R))
839       COPY_4V(quadColor[0], dest[0]);
840 
841    /* G */
842    if (!(colormask & PIPE_MASK_G))
843       COPY_4V(quadColor[1], dest[1]);
844 
845    /* B */
846    if (!(colormask & PIPE_MASK_B))
847       COPY_4V(quadColor[2], dest[2]);
848 
849    /* A */
850    if (!(colormask & PIPE_MASK_A))
851       COPY_4V(quadColor[3], dest[3]);
852 }
853 
854 
855 /**
856  * Clamp all colors in a quad to [0, 1]
857  */
858 static void
clamp_colors(float (* quadColor)[4])859 clamp_colors(float (*quadColor)[4])
860 {
861    unsigned i, j;
862 
863    for (i = 0; i < 4; i++) {
864       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
865          quadColor[i][j] = CLAMP(quadColor[i][j], 0.0F, 1.0F);
866       }
867    }
868 }
869 
870 
871 /**
872  * If we're drawing to a luminance, luminance/alpha or intensity surface
873  * we have to adjust (rebase) the fragment/quad colors before writing them
874  * to the tile cache.  The tile cache always stores RGBA colors but if
875  * we're caching a L/A surface (for example) we need to be sure that R=G=B
876  * so that subsequent reads from the surface cache appear to return L/A
877  * values.
878  * The piglit fbo-blending-formats test will exercise this.
879  */
880 static void
rebase_colors(enum format base_format,float (* quadColor)[4])881 rebase_colors(enum format base_format, float (*quadColor)[4])
882 {
883    unsigned i;
884 
885    switch (base_format) {
886    case RGB:
887       for (i = 0; i < 4; i++) {
888          /* A = 1 */
889          quadColor[3][i] = 1.0F;
890       }
891       break;
892    case LUMINANCE:
893       for (i = 0; i < 4; i++) {
894          /* B = G = R */
895          quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
896          /* A = 1 */
897          quadColor[3][i] = 1.0F;
898       }
899       break;
900    case LUMINANCE_ALPHA:
901       for (i = 0; i < 4; i++) {
902          /* B = G = R */
903          quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
904       }
905       break;
906    case INTENSITY:
907       for (i = 0; i < 4; i++) {
908          /* A = B = G = R */
909          quadColor[3][i] = quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
910       }
911       break;
912    default:
913       ; /* nothing */
914    }
915 }
916 
917 static void
blend_fallback(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)918 blend_fallback(struct quad_stage *qs,
919                struct quad_header *quads[],
920                unsigned nr)
921 {
922    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
923    struct softpipe_context *softpipe = qs->softpipe;
924    const struct pipe_blend_state *blend = softpipe->blend;
925    unsigned cbuf;
926    boolean write_all =
927       softpipe->fs_variant->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
928 
929    for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
930       if (softpipe->framebuffer.cbufs[cbuf]) {
931          /* which blend/mask state index to use: */
932          const uint blend_buf = blend->independent_blend_enable ? cbuf : 0;
933          float dest[4][TGSI_QUAD_SIZE];
934          struct softpipe_cached_tile *tile
935             = sp_get_cached_tile(softpipe->cbuf_cache[cbuf],
936                                  quads[0]->input.x0,
937                                  quads[0]->input.y0, quads[0]->input.layer);
938          const boolean clamp = bqs->clamp[cbuf];
939          const float *blend_color;
940          const boolean dual_source_blend = util_blend_state_is_dual(blend, cbuf);
941          uint q, i, j;
942 
943          if (clamp)
944             blend_color = softpipe->blend_color_clamped.color;
945          else
946             blend_color = softpipe->blend_color.color;
947 
948          for (q = 0; q < nr; q++) {
949             struct quad_header *quad = quads[q];
950             float (*quadColor)[4];
951             float (*quadColor2)[4] = NULL;
952             float temp_quad_color[TGSI_QUAD_SIZE][4];
953             const int itx = (quad->input.x0 & (TILE_SIZE-1));
954             const int ity = (quad->input.y0 & (TILE_SIZE-1));
955 
956             if (write_all) {
957                for (j = 0; j < TGSI_QUAD_SIZE; j++) {
958                   for (i = 0; i < 4; i++) {
959                      temp_quad_color[i][j] = quad->output.color[0][i][j];
960                   }
961                }
962                quadColor = temp_quad_color;
963             } else {
964                quadColor = quad->output.color[cbuf];
965                if (dual_source_blend)
966                   quadColor2 = quad->output.color[cbuf + 1];
967             }
968 
969             /* If fixed-point dest color buffer, need to clamp the incoming
970              * fragment colors now.
971              */
972             if (clamp || softpipe->rasterizer->clamp_fragment_color) {
973                clamp_colors(quadColor);
974             }
975 
976             /* get/swizzle dest colors
977              */
978             for (j = 0; j < TGSI_QUAD_SIZE; j++) {
979                int x = itx + (j & 1);
980                int y = ity + (j >> 1);
981                for (i = 0; i < 4; i++) {
982                   dest[i][j] = tile->data.color[y][x][i];
983                }
984             }
985 
986 
987             if (blend->logicop_enable) {
988                if (bqs->format_type[cbuf] != UTIL_FORMAT_TYPE_FLOAT) {
989                   logicop_quad( qs, quadColor, dest );
990                }
991             }
992             else if (blend->rt[blend_buf].blend_enable) {
993                blend_quad(qs, quadColor, quadColor2, dest, blend_color, blend_buf);
994 
995                /* If fixed-point dest color buffer, need to clamp the outgoing
996                 * fragment colors now.
997                 */
998                if (clamp) {
999                   clamp_colors(quadColor);
1000                }
1001             }
1002 
1003             rebase_colors(bqs->base_format[cbuf], quadColor);
1004 
1005             if (blend->rt[blend_buf].colormask != 0xf)
1006                colormask_quad( blend->rt[cbuf].colormask, quadColor, dest);
1007 
1008             /* Output color values
1009              */
1010             for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1011                if (quad->inout.mask & (1 << j)) {
1012                   int x = itx + (j & 1);
1013                   int y = ity + (j >> 1);
1014                   for (i = 0; i < 4; i++) { /* loop over color chans */
1015                      tile->data.color[y][x][i] = quadColor[i][j];
1016                   }
1017                }
1018             }
1019          }
1020       }
1021    }
1022 }
1023 
1024 
1025 static void
blend_single_add_src_alpha_inv_src_alpha(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)1026 blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
1027                                          struct quad_header *quads[],
1028                                          unsigned nr)
1029 {
1030    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1031    static const float one[4] = { 1, 1, 1, 1 };
1032    float one_minus_alpha[TGSI_QUAD_SIZE];
1033    float dest[4][TGSI_QUAD_SIZE];
1034    float source[4][TGSI_QUAD_SIZE];
1035    uint i, j, q;
1036 
1037    struct softpipe_cached_tile *tile
1038       = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1039                            quads[0]->input.x0,
1040                            quads[0]->input.y0, quads[0]->input.layer);
1041 
1042    for (q = 0; q < nr; q++) {
1043       struct quad_header *quad = quads[q];
1044       float (*quadColor)[4] = quad->output.color[0];
1045       const float *alpha = quadColor[3];
1046       const int itx = (quad->input.x0 & (TILE_SIZE-1));
1047       const int ity = (quad->input.y0 & (TILE_SIZE-1));
1048 
1049       /* get/swizzle dest colors */
1050       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1051          int x = itx + (j & 1);
1052          int y = ity + (j >> 1);
1053          for (i = 0; i < 4; i++) {
1054             dest[i][j] = tile->data.color[y][x][i];
1055          }
1056       }
1057 
1058       /* If fixed-point dest color buffer, need to clamp the incoming
1059        * fragment colors now.
1060        */
1061       if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
1062          clamp_colors(quadColor);
1063       }
1064 
1065       VEC4_MUL(source[0], quadColor[0], alpha); /* R */
1066       VEC4_MUL(source[1], quadColor[1], alpha); /* G */
1067       VEC4_MUL(source[2], quadColor[2], alpha); /* B */
1068       VEC4_MUL(source[3], quadColor[3], alpha); /* A */
1069 
1070       VEC4_SUB(one_minus_alpha, one, alpha);
1071       VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
1072       VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
1073       VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
1074       VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
1075 
1076       VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */
1077       VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */
1078       VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */
1079       VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */
1080 
1081       /* If fixed-point dest color buffer, need to clamp the outgoing
1082        * fragment colors now.
1083        */
1084       if (bqs->clamp[0]) {
1085          clamp_colors(quadColor);
1086       }
1087 
1088       rebase_colors(bqs->base_format[0], quadColor);
1089 
1090       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1091          if (quad->inout.mask & (1 << j)) {
1092             int x = itx + (j & 1);
1093             int y = ity + (j >> 1);
1094             for (i = 0; i < 4; i++) { /* loop over color chans */
1095                tile->data.color[y][x][i] = quadColor[i][j];
1096             }
1097          }
1098       }
1099    }
1100 }
1101 
1102 static void
blend_single_add_one_one(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)1103 blend_single_add_one_one(struct quad_stage *qs,
1104                          struct quad_header *quads[],
1105                          unsigned nr)
1106 {
1107    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1108    float dest[4][TGSI_QUAD_SIZE];
1109    uint i, j, q;
1110 
1111    struct softpipe_cached_tile *tile
1112       = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1113                            quads[0]->input.x0,
1114                            quads[0]->input.y0, quads[0]->input.layer);
1115 
1116    for (q = 0; q < nr; q++) {
1117       struct quad_header *quad = quads[q];
1118       float (*quadColor)[4] = quad->output.color[0];
1119       const int itx = (quad->input.x0 & (TILE_SIZE-1));
1120       const int ity = (quad->input.y0 & (TILE_SIZE-1));
1121 
1122       /* get/swizzle dest colors */
1123       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1124          int x = itx + (j & 1);
1125          int y = ity + (j >> 1);
1126          for (i = 0; i < 4; i++) {
1127             dest[i][j] = tile->data.color[y][x][i];
1128          }
1129       }
1130 
1131       /* If fixed-point dest color buffer, need to clamp the incoming
1132        * fragment colors now.
1133        */
1134       if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
1135          clamp_colors(quadColor);
1136       }
1137 
1138       VEC4_ADD(quadColor[0], quadColor[0], dest[0]); /* R */
1139       VEC4_ADD(quadColor[1], quadColor[1], dest[1]); /* G */
1140       VEC4_ADD(quadColor[2], quadColor[2], dest[2]); /* B */
1141       VEC4_ADD(quadColor[3], quadColor[3], dest[3]); /* A */
1142 
1143       /* If fixed-point dest color buffer, need to clamp the outgoing
1144        * fragment colors now.
1145        */
1146       if (bqs->clamp[0]) {
1147          clamp_colors(quadColor);
1148       }
1149 
1150       rebase_colors(bqs->base_format[0], quadColor);
1151 
1152       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1153          if (quad->inout.mask & (1 << j)) {
1154             int x = itx + (j & 1);
1155             int y = ity + (j >> 1);
1156             for (i = 0; i < 4; i++) { /* loop over color chans */
1157                tile->data.color[y][x][i] = quadColor[i][j];
1158             }
1159          }
1160       }
1161    }
1162 }
1163 
1164 
1165 /**
1166  * Just copy the quad color to the framebuffer tile (respecting the writemask),
1167  * for one color buffer.
1168  * Clamping will be done, if needed (depending on the color buffer's
1169  * datatype) when we write/pack the colors later.
1170  */
1171 static void
single_output_color(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)1172 single_output_color(struct quad_stage *qs,
1173                     struct quad_header *quads[],
1174                     unsigned nr)
1175 {
1176    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1177    uint i, j, q;
1178 
1179    struct softpipe_cached_tile *tile
1180       = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1181                            quads[0]->input.x0,
1182                            quads[0]->input.y0, quads[0]->input.layer);
1183 
1184    for (q = 0; q < nr; q++) {
1185       struct quad_header *quad = quads[q];
1186       float (*quadColor)[4] = quad->output.color[0];
1187       const int itx = (quad->input.x0 & (TILE_SIZE-1));
1188       const int ity = (quad->input.y0 & (TILE_SIZE-1));
1189 
1190       if (qs->softpipe->rasterizer->clamp_fragment_color)
1191          clamp_colors(quadColor);
1192 
1193       rebase_colors(bqs->base_format[0], quadColor);
1194 
1195       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1196          if (quad->inout.mask & (1 << j)) {
1197             int x = itx + (j & 1);
1198             int y = ity + (j >> 1);
1199             for (i = 0; i < 4; i++) { /* loop over color chans */
1200                tile->data.color[y][x][i] = quadColor[i][j];
1201             }
1202          }
1203       }
1204    }
1205 }
1206 
1207 static void
blend_noop(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)1208 blend_noop(struct quad_stage *qs,
1209            struct quad_header *quads[],
1210            unsigned nr)
1211 {
1212 }
1213 
1214 
1215 static void
choose_blend_quad(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)1216 choose_blend_quad(struct quad_stage *qs,
1217                   struct quad_header *quads[],
1218                   unsigned nr)
1219 {
1220    struct blend_quad_stage *bqs = blend_quad_stage(qs);
1221    struct softpipe_context *softpipe = qs->softpipe;
1222    const struct pipe_blend_state *blend = softpipe->blend;
1223    unsigned i;
1224 
1225    qs->run = blend_fallback;
1226 
1227    if (softpipe->framebuffer.nr_cbufs == 0) {
1228       qs->run = blend_noop;
1229    }
1230    else if (!softpipe->blend->logicop_enable &&
1231             softpipe->blend->rt[0].colormask == 0xf &&
1232             softpipe->framebuffer.nr_cbufs == 1)
1233    {
1234       if (softpipe->framebuffer.cbufs[0] == NULL) {
1235          qs->run = blend_noop;
1236       }
1237       else if (!blend->rt[0].blend_enable) {
1238          qs->run = single_output_color;
1239       }
1240       else if (blend->rt[0].rgb_src_factor == blend->rt[0].alpha_src_factor &&
1241                blend->rt[0].rgb_dst_factor == blend->rt[0].alpha_dst_factor &&
1242                blend->rt[0].rgb_func == blend->rt[0].alpha_func)
1243       {
1244          if (blend->rt[0].alpha_func == PIPE_BLEND_ADD) {
1245             if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
1246                 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ONE) {
1247                qs->run = blend_single_add_one_one;
1248             }
1249             else if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA &&
1250                 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
1251                qs->run = blend_single_add_src_alpha_inv_src_alpha;
1252 
1253          }
1254       }
1255    }
1256 
1257    /* For each color buffer, determine if the buffer has destination alpha and
1258     * whether color clamping is needed.
1259     */
1260    for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
1261       if (softpipe->framebuffer.cbufs[i]) {
1262          const enum pipe_format format = softpipe->framebuffer.cbufs[i]->format;
1263          const struct util_format_description *desc =
1264             util_format_description(format);
1265          /* assuming all or no color channels are normalized: */
1266          bqs->clamp[i] = desc->channel[0].normalized;
1267          bqs->format_type[i] = desc->channel[0].type;
1268 
1269          if (util_format_is_intensity(format))
1270             bqs->base_format[i] = INTENSITY;
1271          else if (util_format_is_luminance(format))
1272             bqs->base_format[i] = LUMINANCE;
1273          else if (util_format_is_luminance_alpha(format))
1274             bqs->base_format[i] = LUMINANCE_ALPHA;
1275          else if (!util_format_has_alpha(format))
1276             bqs->base_format[i] = RGB;
1277          else
1278             bqs->base_format[i] = RGBA;
1279       }
1280    }
1281 
1282    qs->run(qs, quads, nr);
1283 }
1284 
1285 
blend_begin(struct quad_stage * qs)1286 static void blend_begin(struct quad_stage *qs)
1287 {
1288    qs->run = choose_blend_quad;
1289 }
1290 
1291 
blend_destroy(struct quad_stage * qs)1292 static void blend_destroy(struct quad_stage *qs)
1293 {
1294    FREE( qs );
1295 }
1296 
1297 
sp_quad_blend_stage(struct softpipe_context * softpipe)1298 struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
1299 {
1300    struct blend_quad_stage *stage = CALLOC_STRUCT(blend_quad_stage);
1301 
1302    if (!stage)
1303       return NULL;
1304 
1305    stage->base.softpipe = softpipe;
1306    stage->base.begin = blend_begin;
1307    stage->base.run = choose_blend_quad;
1308    stage->base.destroy = blend_destroy;
1309 
1310    return &stage->base;
1311 }
1312