• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2007 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * quad blending
30  * \author Brian Paul
31  */
32 
33 #include "pipe/p_defines.h"
34 #include "util/u_math.h"
35 #include "util/u_memory.h"
36 #include "util/format/u_format.h"
37 #include "util/u_dual_blend.h"
38 #include "sp_context.h"
39 #include "sp_state.h"
40 #include "sp_quad.h"
41 #include "sp_tile_cache.h"
42 #include "sp_quad_pipe.h"
43 
44 
45 enum format
46 {
47    RGBA,
48    RGB,
49    LUMINANCE,
50    LUMINANCE_ALPHA,
51    INTENSITY
52 };
53 
54 
55 /** Subclass of quad_stage */
56 struct blend_quad_stage
57 {
58    struct quad_stage base;
59    boolean clamp[PIPE_MAX_COLOR_BUFS];  /**< clamp colors to [0,1]? */
60    enum format base_format[PIPE_MAX_COLOR_BUFS];
61    enum util_format_type format_type[PIPE_MAX_COLOR_BUFS];
62 };
63 
64 
65 /** cast wrapper */
66 static inline struct blend_quad_stage *
blend_quad_stage(struct quad_stage * stage)67 blend_quad_stage(struct quad_stage *stage)
68 {
69    return (struct blend_quad_stage *) stage;
70 }
71 
72 
73 #define VEC4_COPY(DST, SRC) \
74 do { \
75     DST[0] = SRC[0]; \
76     DST[1] = SRC[1]; \
77     DST[2] = SRC[2]; \
78     DST[3] = SRC[3]; \
79 } while(0)
80 
81 #define VEC4_SCALAR(DST, SRC) \
82 do { \
83     DST[0] = SRC; \
84     DST[1] = SRC; \
85     DST[2] = SRC; \
86     DST[3] = SRC; \
87 } while(0)
88 
89 #define VEC4_ADD(R, A, B) \
90 do { \
91    R[0] = A[0] + B[0]; \
92    R[1] = A[1] + B[1]; \
93    R[2] = A[2] + B[2]; \
94    R[3] = A[3] + B[3]; \
95 } while (0)
96 
97 #define VEC4_SUB(R, A, B) \
98 do { \
99    R[0] = A[0] - B[0]; \
100    R[1] = A[1] - B[1]; \
101    R[2] = A[2] - B[2]; \
102    R[3] = A[3] - B[3]; \
103 } while (0)
104 
105 /** Add and limit result to ceiling of 1.0 */
106 #define VEC4_ADD_SAT(R, A, B) \
107 do { \
108    R[0] = A[0] + B[0];  if (R[0] > 1.0f) R[0] = 1.0f; \
109    R[1] = A[1] + B[1];  if (R[1] > 1.0f) R[1] = 1.0f; \
110    R[2] = A[2] + B[2];  if (R[2] > 1.0f) R[2] = 1.0f; \
111    R[3] = A[3] + B[3];  if (R[3] > 1.0f) R[3] = 1.0f; \
112 } while (0)
113 
114 /** Subtract and limit result to floor of 0.0 */
115 #define VEC4_SUB_SAT(R, A, B) \
116 do { \
117    R[0] = A[0] - B[0];  if (R[0] < 0.0f) R[0] = 0.0f; \
118    R[1] = A[1] - B[1];  if (R[1] < 0.0f) R[1] = 0.0f; \
119    R[2] = A[2] - B[2];  if (R[2] < 0.0f) R[2] = 0.0f; \
120    R[3] = A[3] - B[3];  if (R[3] < 0.0f) R[3] = 0.0f; \
121 } while (0)
122 
123 #define VEC4_MUL(R, A, B) \
124 do { \
125    R[0] = A[0] * B[0]; \
126    R[1] = A[1] * B[1]; \
127    R[2] = A[2] * B[2]; \
128    R[3] = A[3] * B[3]; \
129 } while (0)
130 
131 #define VEC4_MIN(R, A, B) \
132 do { \
133    R[0] = (A[0] < B[0]) ? A[0] : B[0]; \
134    R[1] = (A[1] < B[1]) ? A[1] : B[1]; \
135    R[2] = (A[2] < B[2]) ? A[2] : B[2]; \
136    R[3] = (A[3] < B[3]) ? A[3] : B[3]; \
137 } while (0)
138 
139 #define VEC4_MAX(R, A, B) \
140 do { \
141    R[0] = (A[0] > B[0]) ? A[0] : B[0]; \
142    R[1] = (A[1] > B[1]) ? A[1] : B[1]; \
143    R[2] = (A[2] > B[2]) ? A[2] : B[2]; \
144    R[3] = (A[3] > B[3]) ? A[3] : B[3]; \
145 } while (0)
146 
147 
148 
149 static void
logicop_quad(struct quad_stage * qs,float (* quadColor)[4],float (* dest)[4])150 logicop_quad(struct quad_stage *qs,
151              float (*quadColor)[4],
152              float (*dest)[4])
153 {
154    struct softpipe_context *softpipe = qs->softpipe;
155    ubyte src[4][4], dst[4][4], res[4][4];
156    uint *src4 = (uint *) src;
157    uint *dst4 = (uint *) dst;
158    uint *res4 = (uint *) res;
159    uint j;
160 
161 
162    /* convert to ubyte */
163    for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
164       dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
165       dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
166       dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
167       dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
168 
169       src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
170       src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
171       src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
172       src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
173 
174       res[j][0] = 0;
175    }
176 
177    switch (softpipe->blend->logicop_func) {
178    case PIPE_LOGICOP_CLEAR:
179       for (j = 0; j < 4; j++)
180          res4[j] = 0;
181       break;
182    case PIPE_LOGICOP_NOR:
183       for (j = 0; j < 4; j++)
184          res4[j] = ~(src4[j] | dst4[j]);
185       break;
186    case PIPE_LOGICOP_AND_INVERTED:
187       for (j = 0; j < 4; j++)
188          res4[j] = ~src4[j] & dst4[j];
189       break;
190    case PIPE_LOGICOP_COPY_INVERTED:
191       for (j = 0; j < 4; j++)
192          res4[j] = ~src4[j];
193       break;
194    case PIPE_LOGICOP_AND_REVERSE:
195       for (j = 0; j < 4; j++)
196          res4[j] = src4[j] & ~dst4[j];
197       break;
198    case PIPE_LOGICOP_INVERT:
199       for (j = 0; j < 4; j++)
200          res4[j] = ~dst4[j];
201       break;
202    case PIPE_LOGICOP_XOR:
203       for (j = 0; j < 4; j++)
204          res4[j] = dst4[j] ^ src4[j];
205       break;
206    case PIPE_LOGICOP_NAND:
207       for (j = 0; j < 4; j++)
208          res4[j] = ~(src4[j] & dst4[j]);
209       break;
210    case PIPE_LOGICOP_AND:
211       for (j = 0; j < 4; j++)
212          res4[j] = src4[j] & dst4[j];
213       break;
214    case PIPE_LOGICOP_EQUIV:
215       for (j = 0; j < 4; j++)
216          res4[j] = ~(src4[j] ^ dst4[j]);
217       break;
218    case PIPE_LOGICOP_NOOP:
219       for (j = 0; j < 4; j++)
220          res4[j] = dst4[j];
221       break;
222    case PIPE_LOGICOP_OR_INVERTED:
223       for (j = 0; j < 4; j++)
224          res4[j] = ~src4[j] | dst4[j];
225       break;
226    case PIPE_LOGICOP_COPY:
227       for (j = 0; j < 4; j++)
228          res4[j] = src4[j];
229       break;
230    case PIPE_LOGICOP_OR_REVERSE:
231       for (j = 0; j < 4; j++)
232          res4[j] = src4[j] | ~dst4[j];
233       break;
234    case PIPE_LOGICOP_OR:
235       for (j = 0; j < 4; j++)
236          res4[j] = src4[j] | dst4[j];
237       break;
238    case PIPE_LOGICOP_SET:
239       for (j = 0; j < 4; j++)
240          res4[j] = ~0;
241       break;
242    default:
243       assert(0 && "invalid logicop mode");
244    }
245 
246    for (j = 0; j < 4; j++) {
247       quadColor[j][0] = ubyte_to_float(res[j][0]);
248       quadColor[j][1] = ubyte_to_float(res[j][1]);
249       quadColor[j][2] = ubyte_to_float(res[j][2]);
250       quadColor[j][3] = ubyte_to_float(res[j][3]);
251    }
252 }
253 
254 
255 
256 /**
257  * Do blending for a 2x2 quad for one color buffer.
258  * \param quadColor  the incoming quad colors
259  * \param dest  the destination/framebuffer quad colors
260  * \param const_blend_color  the constant blend color
261  * \param blend_index  which set of blending terms to use
262  */
263 static void
blend_quad(struct quad_stage * qs,float (* quadColor)[4],float (* quadColor2)[4],float (* dest)[4],const float const_blend_color[4],unsigned blend_index)264 blend_quad(struct quad_stage *qs,
265            float (*quadColor)[4],
266            float (*quadColor2)[4],
267            float (*dest)[4],
268            const float const_blend_color[4],
269            unsigned blend_index)
270 {
271    static const float zero[4] = { 0, 0, 0, 0 };
272    static const float one[4] = { 1, 1, 1, 1 };
273    struct softpipe_context *softpipe = qs->softpipe;
274    float source[4][TGSI_QUAD_SIZE] = { { 0 } };
275    float blend_dest[4][TGSI_QUAD_SIZE];
276 
277    /*
278     * Compute src/first term RGB
279     */
280    switch (softpipe->blend->rt[blend_index].rgb_src_factor) {
281    case PIPE_BLENDFACTOR_ONE:
282       VEC4_COPY(source[0], quadColor[0]); /* R */
283       VEC4_COPY(source[1], quadColor[1]); /* G */
284       VEC4_COPY(source[2], quadColor[2]); /* B */
285       break;
286    case PIPE_BLENDFACTOR_SRC_COLOR:
287       VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
288       VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
289       VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
290       break;
291    case PIPE_BLENDFACTOR_SRC_ALPHA:
292       {
293          const float *alpha = quadColor[3];
294          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
295          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
296          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
297       }
298       break;
299    case PIPE_BLENDFACTOR_DST_COLOR:
300       VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
301       VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
302       VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
303       break;
304    case PIPE_BLENDFACTOR_DST_ALPHA:
305       {
306          const float *alpha = dest[3];
307          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
308          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
309          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
310       }
311       break;
312    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
313       {
314          const float *alpha = quadColor[3];
315          float diff[4], temp[4];
316          VEC4_SUB(diff, one, dest[3]);
317          VEC4_MIN(temp, alpha, diff);
318          VEC4_MUL(source[0], quadColor[0], temp); /* R */
319          VEC4_MUL(source[1], quadColor[1], temp); /* G */
320          VEC4_MUL(source[2], quadColor[2], temp); /* B */
321       }
322       break;
323    case PIPE_BLENDFACTOR_CONST_COLOR:
324       {
325          float comp[4];
326          VEC4_SCALAR(comp, const_blend_color[0]); /* R */
327          VEC4_MUL(source[0], quadColor[0], comp); /* R */
328          VEC4_SCALAR(comp, const_blend_color[1]); /* G */
329          VEC4_MUL(source[1], quadColor[1], comp); /* G */
330          VEC4_SCALAR(comp, const_blend_color[2]); /* B */
331          VEC4_MUL(source[2], quadColor[2], comp); /* B */
332       }
333       break;
334    case PIPE_BLENDFACTOR_CONST_ALPHA:
335       {
336          float alpha[4];
337          VEC4_SCALAR(alpha, const_blend_color[3]);
338          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
339          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
340          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
341       }
342       break;
343    case PIPE_BLENDFACTOR_SRC1_COLOR:
344       VEC4_MUL(source[0], quadColor[0], quadColor2[0]); /* R */
345       VEC4_MUL(source[1], quadColor[1], quadColor2[1]); /* G */
346       VEC4_MUL(source[2], quadColor[2], quadColor2[2]); /* B */
347       break;
348    case PIPE_BLENDFACTOR_SRC1_ALPHA:
349       {
350          const float *alpha = quadColor2[3];
351          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
352          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
353          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
354       }
355       break;
356    case PIPE_BLENDFACTOR_ZERO:
357       VEC4_COPY(source[0], zero); /* R */
358       VEC4_COPY(source[1], zero); /* G */
359       VEC4_COPY(source[2], zero); /* B */
360       break;
361    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
362       {
363          float inv_comp[4];
364          VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
365          VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
366          VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
367          VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
368          VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
369          VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
370       }
371       break;
372    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
373       {
374          float inv_alpha[4];
375          VEC4_SUB(inv_alpha, one, quadColor[3]);
376          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
377          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
378          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
379       }
380       break;
381    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
382       {
383          float inv_alpha[4];
384          VEC4_SUB(inv_alpha, one, dest[3]);
385          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
386          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
387          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
388       }
389       break;
390    case PIPE_BLENDFACTOR_INV_DST_COLOR:
391       {
392          float inv_comp[4];
393          VEC4_SUB(inv_comp, one, dest[0]); /* R */
394          VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
395          VEC4_SUB(inv_comp, one, dest[1]); /* G */
396          VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
397          VEC4_SUB(inv_comp, one, dest[2]); /* B */
398          VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
399       }
400       break;
401    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
402       {
403          float inv_comp[4];
404          /* R */
405          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
406          VEC4_MUL(source[0], quadColor[0], inv_comp);
407          /* G */
408          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
409          VEC4_MUL(source[1], quadColor[1], inv_comp);
410          /* B */
411          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
412          VEC4_MUL(source[2], quadColor[2], inv_comp);
413       }
414       break;
415    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
416       {
417          float inv_alpha[4];
418          VEC4_SCALAR(inv_alpha, 1.0f - const_blend_color[3]);
419          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
420          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
421          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
422       }
423       break;
424    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
425       {
426          float inv_comp[4];
427          VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
428          VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
429          VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
430          VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
431          VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
432          VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
433       }
434       break;
435    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
436       {
437          float inv_alpha[4];
438          VEC4_SUB(inv_alpha, one, quadColor2[3]);
439          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
440          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
441          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
442       }
443       break;
444    default:
445       assert(0 && "invalid rgb src factor");
446    }
447 
448    /*
449     * Compute src/first term A
450     */
451    switch (softpipe->blend->rt[blend_index].alpha_src_factor) {
452    case PIPE_BLENDFACTOR_ONE:
453       VEC4_COPY(source[3], quadColor[3]); /* A */
454       break;
455    case PIPE_BLENDFACTOR_SRC_COLOR:
456       FALLTHROUGH;
457    case PIPE_BLENDFACTOR_SRC_ALPHA:
458       {
459          const float *alpha = quadColor[3];
460          VEC4_MUL(source[3], quadColor[3], alpha); /* A */
461       }
462       break;
463    case PIPE_BLENDFACTOR_DST_COLOR:
464       FALLTHROUGH;
465    case PIPE_BLENDFACTOR_DST_ALPHA:
466       VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
467       break;
468    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
469       /* multiply alpha by 1.0 */
470       VEC4_COPY(source[3], quadColor[3]); /* A */
471       break;
472    case PIPE_BLENDFACTOR_CONST_COLOR:
473       FALLTHROUGH;
474    case PIPE_BLENDFACTOR_CONST_ALPHA:
475       {
476          float comp[4];
477          VEC4_SCALAR(comp, const_blend_color[3]); /* A */
478          VEC4_MUL(source[3], quadColor[3], comp); /* A */
479       }
480       break;
481    case PIPE_BLENDFACTOR_ZERO:
482       VEC4_COPY(source[3], zero); /* A */
483       break;
484    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
485       FALLTHROUGH;
486    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
487       {
488          float inv_alpha[4];
489          VEC4_SUB(inv_alpha, one, quadColor[3]);
490          VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
491       }
492       break;
493    case PIPE_BLENDFACTOR_INV_DST_COLOR:
494       FALLTHROUGH;
495    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
496       {
497          float inv_alpha[4];
498          VEC4_SUB(inv_alpha, one, dest[3]);
499          VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
500       }
501       break;
502    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
503       FALLTHROUGH;
504    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
505       {
506          float inv_comp[4];
507          /* A */
508          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
509          VEC4_MUL(source[3], quadColor[3], inv_comp);
510       }
511       break;
512    case PIPE_BLENDFACTOR_SRC1_COLOR:
513       FALLTHROUGH;
514    case PIPE_BLENDFACTOR_SRC1_ALPHA:
515       {
516          const float *alpha = quadColor2[3];
517          VEC4_MUL(source[3], quadColor[3], alpha); /* A */
518       }
519       break;
520    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
521       FALLTHROUGH;
522    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
523       {
524          float inv_alpha[4];
525          VEC4_SUB(inv_alpha, one, quadColor2[3]);
526          VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
527       }
528       break;
529    default:
530       assert(0 && "invalid alpha src factor");
531    }
532 
533    /* Save the original dest for use in masking */
534    VEC4_COPY(blend_dest[0], dest[0]);
535    VEC4_COPY(blend_dest[1], dest[1]);
536    VEC4_COPY(blend_dest[2], dest[2]);
537    VEC4_COPY(blend_dest[3], dest[3]);
538 
539 
540    /*
541     * Compute blend_dest/second term RGB
542     */
543    switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
544    case PIPE_BLENDFACTOR_ONE:
545       /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
546       break;
547    case PIPE_BLENDFACTOR_SRC_COLOR:
548       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */
549       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */
550       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */
551       break;
552    case PIPE_BLENDFACTOR_SRC_ALPHA:
553       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */
554       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */
555       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */
556       break;
557    case PIPE_BLENDFACTOR_DST_ALPHA:
558       VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */
559       VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */
560       VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */
561       break;
562    case PIPE_BLENDFACTOR_DST_COLOR:
563       VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */
564       VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */
565       VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */
566       break;
567    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
568       {
569          const float *alpha = quadColor[3];
570          float diff[4], temp[4];
571          VEC4_SUB(diff, one, blend_dest[3]);
572          VEC4_MIN(temp, alpha, diff);
573          VEC4_MUL(blend_dest[0], blend_dest[0], temp); /* R */
574          VEC4_MUL(blend_dest[1], blend_dest[1], temp); /* G */
575          VEC4_MUL(blend_dest[2], blend_dest[2], temp); /* B */
576       }
577       break;
578    case PIPE_BLENDFACTOR_CONST_COLOR:
579       {
580          float comp[4];
581          VEC4_SCALAR(comp, const_blend_color[0]); /* R */
582          VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
583          VEC4_SCALAR(comp, const_blend_color[1]); /* G */
584          VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
585          VEC4_SCALAR(comp, const_blend_color[2]); /* B */
586          VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
587       }
588       break;
589    case PIPE_BLENDFACTOR_CONST_ALPHA:
590       {
591          float comp[4];
592          VEC4_SCALAR(comp, const_blend_color[3]); /* A */
593          VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
594          VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
595          VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
596       }
597       break;
598    case PIPE_BLENDFACTOR_ZERO:
599       VEC4_COPY(blend_dest[0], zero); /* R */
600       VEC4_COPY(blend_dest[1], zero); /* G */
601       VEC4_COPY(blend_dest[2], zero); /* B */
602       break;
603    case PIPE_BLENDFACTOR_SRC1_COLOR:
604       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[0]); /* R */
605       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[1]); /* G */
606       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[2]); /* B */
607       break;
608    case PIPE_BLENDFACTOR_SRC1_ALPHA:
609       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[3]); /* R * A */
610       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[3]); /* G * A */
611       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[3]); /* B * A */
612       break;
613    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
614       {
615          float inv_comp[4];
616          VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
617          VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
618          VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
619          VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
620          VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
621          VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
622       }
623       break;
624    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
625       {
626          float one_minus_alpha[TGSI_QUAD_SIZE];
627          VEC4_SUB(one_minus_alpha, one, quadColor[3]);
628          VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
629          VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
630          VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
631       }
632       break;
633    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
634       {
635          float inv_comp[4];
636          VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
637          VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
638          VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
639          VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
640       }
641       break;
642    case PIPE_BLENDFACTOR_INV_DST_COLOR:
643       {
644          float inv_comp[4];
645          VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */
646          VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */
647          VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */
648          VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */
649          VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */
650          VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */
651       }
652       break;
653    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
654       {
655          float inv_comp[4];
656          /* R */
657          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
658          VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
659          /* G */
660          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
661          VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
662          /* B */
663          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
664          VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
665       }
666       break;
667    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
668       {
669          float inv_comp[4];
670          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
671          VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
672          VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
673          VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
674       }
675       break;
676    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
677       {
678          float inv_comp[4];
679          VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
680          VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
681          VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
682          VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
683          VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
684          VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
685       }
686       break;
687    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
688       {
689          float one_minus_alpha[TGSI_QUAD_SIZE];
690          VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
691          VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
692          VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
693          VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
694       }
695       break;
696    default:
697       assert(0 && "invalid rgb dst factor");
698    }
699 
700    /*
701     * Compute blend_dest/second term A
702     */
703    switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
704    case PIPE_BLENDFACTOR_ONE:
705       /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
706       break;
707    case PIPE_BLENDFACTOR_SRC_COLOR:
708       FALLTHROUGH;
709    case PIPE_BLENDFACTOR_SRC_ALPHA:
710       VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */
711       break;
712    case PIPE_BLENDFACTOR_DST_COLOR:
713       FALLTHROUGH;
714    case PIPE_BLENDFACTOR_DST_ALPHA:
715       VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */
716       break;
717    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
718       /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
719       break;
720    case PIPE_BLENDFACTOR_CONST_COLOR:
721       FALLTHROUGH;
722    case PIPE_BLENDFACTOR_CONST_ALPHA:
723       {
724          float comp[4];
725          VEC4_SCALAR(comp, const_blend_color[3]); /* A */
726          VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */
727       }
728       break;
729    case PIPE_BLENDFACTOR_ZERO:
730       VEC4_COPY(blend_dest[3], zero); /* A */
731       break;
732    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
733       FALLTHROUGH;
734    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
735       {
736          float one_minus_alpha[TGSI_QUAD_SIZE];
737          VEC4_SUB(one_minus_alpha, one, quadColor[3]);
738          VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
739       }
740       break;
741    case PIPE_BLENDFACTOR_INV_DST_COLOR:
742       FALLTHROUGH;
743    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
744       {
745          float inv_comp[4];
746          VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
747          VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */
748       }
749       break;
750    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
751       FALLTHROUGH;
752    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
753       {
754          float inv_comp[4];
755          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
756          VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp);
757       }
758       break;
759    case PIPE_BLENDFACTOR_SRC1_COLOR:
760       FALLTHROUGH;
761    case PIPE_BLENDFACTOR_SRC1_ALPHA:
762       VEC4_MUL(blend_dest[3], blend_dest[3], quadColor2[3]); /* A * A */
763       break;
764    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
765       FALLTHROUGH;
766    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
767       {
768          float one_minus_alpha[TGSI_QUAD_SIZE];
769          VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
770          VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
771       }
772       break;
773    default:
774       assert(0 && "invalid alpha dst factor");
775    }
776 
777    /*
778     * Combine RGB terms
779     */
780    switch (softpipe->blend->rt[blend_index].rgb_func) {
781    case PIPE_BLEND_ADD:
782       VEC4_ADD(quadColor[0], source[0], blend_dest[0]); /* R */
783       VEC4_ADD(quadColor[1], source[1], blend_dest[1]); /* G */
784       VEC4_ADD(quadColor[2], source[2], blend_dest[2]); /* B */
785       break;
786    case PIPE_BLEND_SUBTRACT:
787       VEC4_SUB(quadColor[0], source[0], blend_dest[0]); /* R */
788       VEC4_SUB(quadColor[1], source[1], blend_dest[1]); /* G */
789       VEC4_SUB(quadColor[2], source[2], blend_dest[2]); /* B */
790       break;
791    case PIPE_BLEND_REVERSE_SUBTRACT:
792       VEC4_SUB(quadColor[0], blend_dest[0], source[0]); /* R */
793       VEC4_SUB(quadColor[1], blend_dest[1], source[1]); /* G */
794       VEC4_SUB(quadColor[2], blend_dest[2], source[2]); /* B */
795       break;
796    case PIPE_BLEND_MIN:
797       VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */
798       VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */
799       VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */
800       break;
801    case PIPE_BLEND_MAX:
802       VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */
803       VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */
804       VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */
805       break;
806    default:
807       assert(0 && "invalid rgb blend func");
808    }
809 
810    /*
811     * Combine A terms
812     */
813    switch (softpipe->blend->rt[blend_index].alpha_func) {
814    case PIPE_BLEND_ADD:
815       VEC4_ADD(quadColor[3], source[3], blend_dest[3]); /* A */
816       break;
817    case PIPE_BLEND_SUBTRACT:
818       VEC4_SUB(quadColor[3], source[3], blend_dest[3]); /* A */
819       break;
820    case PIPE_BLEND_REVERSE_SUBTRACT:
821       VEC4_SUB(quadColor[3], blend_dest[3], source[3]); /* A */
822       break;
823    case PIPE_BLEND_MIN:
824       VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */
825       break;
826    case PIPE_BLEND_MAX:
827       VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */
828       break;
829    default:
830       assert(0 && "invalid alpha blend func");
831    }
832 }
833 
834 static void
colormask_quad(unsigned colormask,float (* quadColor)[4],float (* dest)[4])835 colormask_quad(unsigned colormask,
836                float (*quadColor)[4],
837                float (*dest)[4])
838 {
839    /* R */
840    if (!(colormask & PIPE_MASK_R))
841       COPY_4V(quadColor[0], dest[0]);
842 
843    /* G */
844    if (!(colormask & PIPE_MASK_G))
845       COPY_4V(quadColor[1], dest[1]);
846 
847    /* B */
848    if (!(colormask & PIPE_MASK_B))
849       COPY_4V(quadColor[2], dest[2]);
850 
851    /* A */
852    if (!(colormask & PIPE_MASK_A))
853       COPY_4V(quadColor[3], dest[3]);
854 }
855 
856 
857 /**
858  * Clamp all colors in a quad to [0, 1]
859  */
860 static void
clamp_colors(float (* quadColor)[4])861 clamp_colors(float (*quadColor)[4])
862 {
863    unsigned i, j;
864 
865    for (i = 0; i < 4; i++) {
866       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
867          quadColor[i][j] = CLAMP(quadColor[i][j], 0.0F, 1.0F);
868       }
869    }
870 }
871 
872 
873 /**
874  * If we're drawing to a luminance, luminance/alpha or intensity surface
875  * we have to adjust (rebase) the fragment/quad colors before writing them
876  * to the tile cache.  The tile cache always stores RGBA colors but if
877  * we're caching a L/A surface (for example) we need to be sure that R=G=B
878  * so that subsequent reads from the surface cache appear to return L/A
879  * values.
880  * The piglit fbo-blending-formats test will exercise this.
881  */
882 static void
rebase_colors(enum format base_format,float (* quadColor)[4])883 rebase_colors(enum format base_format, float (*quadColor)[4])
884 {
885    unsigned i;
886 
887    switch (base_format) {
888    case RGB:
889       for (i = 0; i < 4; i++) {
890          /* A = 1 */
891          quadColor[3][i] = 1.0F;
892       }
893       break;
894    case LUMINANCE:
895       for (i = 0; i < 4; i++) {
896          /* B = G = R */
897          quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
898          /* A = 1 */
899          quadColor[3][i] = 1.0F;
900       }
901       break;
902    case LUMINANCE_ALPHA:
903       for (i = 0; i < 4; i++) {
904          /* B = G = R */
905          quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
906       }
907       break;
908    case INTENSITY:
909       for (i = 0; i < 4; i++) {
910          /* A = B = G = R */
911          quadColor[3][i] = quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
912       }
913       break;
914    default:
915       ; /* nothing */
916    }
917 }
918 
919 static void
blend_fallback(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)920 blend_fallback(struct quad_stage *qs,
921                struct quad_header *quads[],
922                unsigned nr)
923 {
924    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
925    struct softpipe_context *softpipe = qs->softpipe;
926    const struct pipe_blend_state *blend = softpipe->blend;
927    unsigned cbuf;
928    boolean write_all =
929       softpipe->fs_variant->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
930 
931    for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
932       if (softpipe->framebuffer.cbufs[cbuf]) {
933          /* which blend/mask state index to use: */
934          const uint blend_buf = blend->independent_blend_enable ? cbuf : 0;
935          float dest[4][TGSI_QUAD_SIZE];
936          struct softpipe_cached_tile *tile
937             = sp_get_cached_tile(softpipe->cbuf_cache[cbuf],
938                                  quads[0]->input.x0,
939                                  quads[0]->input.y0, quads[0]->input.layer);
940          const boolean clamp = bqs->clamp[cbuf];
941          const float *blend_color;
942          const boolean dual_source_blend = util_blend_state_is_dual(blend, cbuf);
943          uint q, i, j;
944 
945          if (clamp)
946             blend_color = softpipe->blend_color_clamped.color;
947          else
948             blend_color = softpipe->blend_color.color;
949 
950          for (q = 0; q < nr; q++) {
951             struct quad_header *quad = quads[q];
952             float (*quadColor)[4];
953             float (*quadColor2)[4] = NULL;
954             float temp_quad_color[TGSI_QUAD_SIZE][4];
955             const int itx = (quad->input.x0 & (TILE_SIZE-1));
956             const int ity = (quad->input.y0 & (TILE_SIZE-1));
957 
958             if (write_all) {
959                for (j = 0; j < TGSI_QUAD_SIZE; j++) {
960                   for (i = 0; i < 4; i++) {
961                      temp_quad_color[i][j] = quad->output.color[0][i][j];
962                   }
963                }
964                quadColor = temp_quad_color;
965             } else {
966                quadColor = quad->output.color[cbuf];
967                if (dual_source_blend)
968                   quadColor2 = quad->output.color[cbuf + 1];
969             }
970 
971             /* If fixed-point dest color buffer, need to clamp the incoming
972              * fragment colors now.
973              */
974             if (clamp || softpipe->rasterizer->clamp_fragment_color) {
975                clamp_colors(quadColor);
976             }
977 
978             /* get/swizzle dest colors
979              */
980             for (j = 0; j < TGSI_QUAD_SIZE; j++) {
981                int x = itx + (j & 1);
982                int y = ity + (j >> 1);
983                for (i = 0; i < 4; i++) {
984                   dest[i][j] = tile->data.color[y][x][i];
985                }
986             }
987 
988 
989             if (blend->logicop_enable) {
990                if (bqs->format_type[cbuf] != UTIL_FORMAT_TYPE_FLOAT) {
991                   logicop_quad( qs, quadColor, dest );
992                }
993             }
994             else if (blend->rt[blend_buf].blend_enable) {
995                blend_quad(qs, quadColor, quadColor2, dest, blend_color, blend_buf);
996 
997                /* If fixed-point dest color buffer, need to clamp the outgoing
998                 * fragment colors now.
999                 */
1000                if (clamp) {
1001                   clamp_colors(quadColor);
1002                }
1003             }
1004 
1005             rebase_colors(bqs->base_format[cbuf], quadColor);
1006 
1007             if (blend->rt[blend_buf].colormask != 0xf)
1008                colormask_quad( blend->rt[blend_buf].colormask, quadColor, dest);
1009 
1010             /* Output color values
1011              */
1012             for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1013                if (quad->inout.mask & (1 << j)) {
1014                   int x = itx + (j & 1);
1015                   int y = ity + (j >> 1);
1016                   for (i = 0; i < 4; i++) { /* loop over color chans */
1017                      tile->data.color[y][x][i] = quadColor[i][j];
1018                   }
1019                }
1020             }
1021          }
1022       }
1023    }
1024 }
1025 
1026 
1027 static void
blend_single_add_src_alpha_inv_src_alpha(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)1028 blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
1029                                          struct quad_header *quads[],
1030                                          unsigned nr)
1031 {
1032    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1033    static const float one[4] = { 1, 1, 1, 1 };
1034    float one_minus_alpha[TGSI_QUAD_SIZE];
1035    float dest[4][TGSI_QUAD_SIZE];
1036    float source[4][TGSI_QUAD_SIZE];
1037    uint i, j, q;
1038 
1039    struct softpipe_cached_tile *tile
1040       = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1041                            quads[0]->input.x0,
1042                            quads[0]->input.y0, quads[0]->input.layer);
1043 
1044    for (q = 0; q < nr; q++) {
1045       struct quad_header *quad = quads[q];
1046       float (*quadColor)[4] = quad->output.color[0];
1047       const float *alpha = quadColor[3];
1048       const int itx = (quad->input.x0 & (TILE_SIZE-1));
1049       const int ity = (quad->input.y0 & (TILE_SIZE-1));
1050 
1051       /* get/swizzle dest colors */
1052       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1053          int x = itx + (j & 1);
1054          int y = ity + (j >> 1);
1055          for (i = 0; i < 4; i++) {
1056             dest[i][j] = tile->data.color[y][x][i];
1057          }
1058       }
1059 
1060       /* If fixed-point dest color buffer, need to clamp the incoming
1061        * fragment colors now.
1062        */
1063       if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
1064          clamp_colors(quadColor);
1065       }
1066 
1067       VEC4_MUL(source[0], quadColor[0], alpha); /* R */
1068       VEC4_MUL(source[1], quadColor[1], alpha); /* G */
1069       VEC4_MUL(source[2], quadColor[2], alpha); /* B */
1070       VEC4_MUL(source[3], quadColor[3], alpha); /* A */
1071 
1072       VEC4_SUB(one_minus_alpha, one, alpha);
1073       VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
1074       VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
1075       VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
1076       VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
1077 
1078       VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */
1079       VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */
1080       VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */
1081       VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */
1082 
1083       /* If fixed-point dest color buffer, need to clamp the outgoing
1084        * fragment colors now.
1085        */
1086       if (bqs->clamp[0]) {
1087          clamp_colors(quadColor);
1088       }
1089 
1090       rebase_colors(bqs->base_format[0], quadColor);
1091 
1092       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1093          if (quad->inout.mask & (1 << j)) {
1094             int x = itx + (j & 1);
1095             int y = ity + (j >> 1);
1096             for (i = 0; i < 4; i++) { /* loop over color chans */
1097                tile->data.color[y][x][i] = quadColor[i][j];
1098             }
1099          }
1100       }
1101    }
1102 }
1103 
1104 static void
blend_single_add_one_one(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)1105 blend_single_add_one_one(struct quad_stage *qs,
1106                          struct quad_header *quads[],
1107                          unsigned nr)
1108 {
1109    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1110    float dest[4][TGSI_QUAD_SIZE];
1111    uint i, j, q;
1112 
1113    struct softpipe_cached_tile *tile
1114       = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1115                            quads[0]->input.x0,
1116                            quads[0]->input.y0, quads[0]->input.layer);
1117 
1118    for (q = 0; q < nr; q++) {
1119       struct quad_header *quad = quads[q];
1120       float (*quadColor)[4] = quad->output.color[0];
1121       const int itx = (quad->input.x0 & (TILE_SIZE-1));
1122       const int ity = (quad->input.y0 & (TILE_SIZE-1));
1123 
1124       /* get/swizzle dest colors */
1125       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1126          int x = itx + (j & 1);
1127          int y = ity + (j >> 1);
1128          for (i = 0; i < 4; i++) {
1129             dest[i][j] = tile->data.color[y][x][i];
1130          }
1131       }
1132 
1133       /* If fixed-point dest color buffer, need to clamp the incoming
1134        * fragment colors now.
1135        */
1136       if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
1137          clamp_colors(quadColor);
1138       }
1139 
1140       VEC4_ADD(quadColor[0], quadColor[0], dest[0]); /* R */
1141       VEC4_ADD(quadColor[1], quadColor[1], dest[1]); /* G */
1142       VEC4_ADD(quadColor[2], quadColor[2], dest[2]); /* B */
1143       VEC4_ADD(quadColor[3], quadColor[3], dest[3]); /* A */
1144 
1145       /* If fixed-point dest color buffer, need to clamp the outgoing
1146        * fragment colors now.
1147        */
1148       if (bqs->clamp[0]) {
1149          clamp_colors(quadColor);
1150       }
1151 
1152       rebase_colors(bqs->base_format[0], quadColor);
1153 
1154       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1155          if (quad->inout.mask & (1 << j)) {
1156             int x = itx + (j & 1);
1157             int y = ity + (j >> 1);
1158             for (i = 0; i < 4; i++) { /* loop over color chans */
1159                tile->data.color[y][x][i] = quadColor[i][j];
1160             }
1161          }
1162       }
1163    }
1164 }
1165 
1166 
1167 /**
1168  * Just copy the quad color to the framebuffer tile (respecting the writemask),
1169  * for one color buffer.
1170  * Clamping will be done, if needed (depending on the color buffer's
1171  * datatype) when we write/pack the colors later.
1172  */
1173 static void
single_output_color(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)1174 single_output_color(struct quad_stage *qs,
1175                     struct quad_header *quads[],
1176                     unsigned nr)
1177 {
1178    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1179    uint i, j, q;
1180 
1181    struct softpipe_cached_tile *tile
1182       = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1183                            quads[0]->input.x0,
1184                            quads[0]->input.y0, quads[0]->input.layer);
1185 
1186    for (q = 0; q < nr; q++) {
1187       struct quad_header *quad = quads[q];
1188       float (*quadColor)[4] = quad->output.color[0];
1189       const int itx = (quad->input.x0 & (TILE_SIZE-1));
1190       const int ity = (quad->input.y0 & (TILE_SIZE-1));
1191 
1192       if (qs->softpipe->rasterizer->clamp_fragment_color)
1193          clamp_colors(quadColor);
1194 
1195       rebase_colors(bqs->base_format[0], quadColor);
1196 
1197       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1198          if (quad->inout.mask & (1 << j)) {
1199             int x = itx + (j & 1);
1200             int y = ity + (j >> 1);
1201             for (i = 0; i < 4; i++) { /* loop over color chans */
1202                tile->data.color[y][x][i] = quadColor[i][j];
1203             }
1204          }
1205       }
1206    }
1207 }
1208 
1209 static void
blend_noop(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)1210 blend_noop(struct quad_stage *qs,
1211            struct quad_header *quads[],
1212            unsigned nr)
1213 {
1214 }
1215 
1216 
1217 static void
choose_blend_quad(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)1218 choose_blend_quad(struct quad_stage *qs,
1219                   struct quad_header *quads[],
1220                   unsigned nr)
1221 {
1222    struct blend_quad_stage *bqs = blend_quad_stage(qs);
1223    struct softpipe_context *softpipe = qs->softpipe;
1224    const struct pipe_blend_state *blend = softpipe->blend;
1225    unsigned i;
1226 
1227    qs->run = blend_fallback;
1228 
1229    if (softpipe->framebuffer.nr_cbufs == 0) {
1230       qs->run = blend_noop;
1231    }
1232    else if (!softpipe->blend->logicop_enable &&
1233             softpipe->blend->rt[0].colormask == 0xf &&
1234             softpipe->framebuffer.nr_cbufs == 1)
1235    {
1236       if (softpipe->framebuffer.cbufs[0] == NULL) {
1237          qs->run = blend_noop;
1238       }
1239       else if (!blend->rt[0].blend_enable) {
1240          qs->run = single_output_color;
1241       }
1242       else if (blend->rt[0].rgb_src_factor == blend->rt[0].alpha_src_factor &&
1243                blend->rt[0].rgb_dst_factor == blend->rt[0].alpha_dst_factor &&
1244                blend->rt[0].rgb_func == blend->rt[0].alpha_func)
1245       {
1246          if (blend->rt[0].alpha_func == PIPE_BLEND_ADD) {
1247             if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
1248                 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ONE) {
1249                qs->run = blend_single_add_one_one;
1250             }
1251             else if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA &&
1252                 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
1253                qs->run = blend_single_add_src_alpha_inv_src_alpha;
1254 
1255          }
1256       }
1257    }
1258 
1259    /* For each color buffer, determine if the buffer has destination alpha and
1260     * whether color clamping is needed.
1261     */
1262    for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
1263       if (softpipe->framebuffer.cbufs[i]) {
1264          const enum pipe_format format = softpipe->framebuffer.cbufs[i]->format;
1265          const struct util_format_description *desc =
1266             util_format_description(format);
1267          /* assuming all or no color channels are normalized: */
1268          bqs->clamp[i] = desc->channel[0].normalized;
1269          bqs->format_type[i] = desc->channel[0].type;
1270 
1271          if (util_format_is_intensity(format))
1272             bqs->base_format[i] = INTENSITY;
1273          else if (util_format_is_luminance(format))
1274             bqs->base_format[i] = LUMINANCE;
1275          else if (util_format_is_luminance_alpha(format))
1276             bqs->base_format[i] = LUMINANCE_ALPHA;
1277          else if (!util_format_has_alpha(format))
1278             bqs->base_format[i] = RGB;
1279          else
1280             bqs->base_format[i] = RGBA;
1281       }
1282    }
1283 
1284    qs->run(qs, quads, nr);
1285 }
1286 
1287 
blend_begin(struct quad_stage * qs)1288 static void blend_begin(struct quad_stage *qs)
1289 {
1290    qs->run = choose_blend_quad;
1291 }
1292 
1293 
blend_destroy(struct quad_stage * qs)1294 static void blend_destroy(struct quad_stage *qs)
1295 {
1296    FREE( qs );
1297 }
1298 
1299 
sp_quad_blend_stage(struct softpipe_context * softpipe)1300 struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
1301 {
1302    struct blend_quad_stage *stage = CALLOC_STRUCT(blend_quad_stage);
1303 
1304    if (!stage)
1305       return NULL;
1306 
1307    stage->base.softpipe = softpipe;
1308    stage->base.begin = blend_begin;
1309    stage->base.run = choose_blend_quad;
1310    stage->base.destroy = blend_destroy;
1311 
1312    return &stage->base;
1313 }
1314