1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * quad blending
30 * \author Brian Paul
31 */
32
33 #include "pipe/p_defines.h"
34 #include "util/u_math.h"
35 #include "util/u_memory.h"
36 #include "util/format/u_format.h"
37 #include "util/u_dual_blend.h"
38 #include "sp_context.h"
39 #include "sp_state.h"
40 #include "sp_quad.h"
41 #include "sp_tile_cache.h"
42 #include "sp_quad_pipe.h"
43
44
45 enum format
46 {
47 RGBA,
48 RGB,
49 LUMINANCE,
50 LUMINANCE_ALPHA,
51 INTENSITY
52 };
53
54
55 /** Subclass of quad_stage */
56 struct blend_quad_stage
57 {
58 struct quad_stage base;
59 boolean clamp[PIPE_MAX_COLOR_BUFS]; /**< clamp colors to [0,1]? */
60 enum format base_format[PIPE_MAX_COLOR_BUFS];
61 enum util_format_type format_type[PIPE_MAX_COLOR_BUFS];
62 };
63
64
65 /** cast wrapper */
66 static inline struct blend_quad_stage *
blend_quad_stage(struct quad_stage * stage)67 blend_quad_stage(struct quad_stage *stage)
68 {
69 return (struct blend_quad_stage *) stage;
70 }
71
72
73 #define VEC4_COPY(DST, SRC) \
74 do { \
75 DST[0] = SRC[0]; \
76 DST[1] = SRC[1]; \
77 DST[2] = SRC[2]; \
78 DST[3] = SRC[3]; \
79 } while(0)
80
81 #define VEC4_SCALAR(DST, SRC) \
82 do { \
83 DST[0] = SRC; \
84 DST[1] = SRC; \
85 DST[2] = SRC; \
86 DST[3] = SRC; \
87 } while(0)
88
89 #define VEC4_ADD(R, A, B) \
90 do { \
91 R[0] = A[0] + B[0]; \
92 R[1] = A[1] + B[1]; \
93 R[2] = A[2] + B[2]; \
94 R[3] = A[3] + B[3]; \
95 } while (0)
96
97 #define VEC4_SUB(R, A, B) \
98 do { \
99 R[0] = A[0] - B[0]; \
100 R[1] = A[1] - B[1]; \
101 R[2] = A[2] - B[2]; \
102 R[3] = A[3] - B[3]; \
103 } while (0)
104
105 /** Add and limit result to ceiling of 1.0 */
106 #define VEC4_ADD_SAT(R, A, B) \
107 do { \
108 R[0] = A[0] + B[0]; if (R[0] > 1.0f) R[0] = 1.0f; \
109 R[1] = A[1] + B[1]; if (R[1] > 1.0f) R[1] = 1.0f; \
110 R[2] = A[2] + B[2]; if (R[2] > 1.0f) R[2] = 1.0f; \
111 R[3] = A[3] + B[3]; if (R[3] > 1.0f) R[3] = 1.0f; \
112 } while (0)
113
114 /** Subtract and limit result to floor of 0.0 */
115 #define VEC4_SUB_SAT(R, A, B) \
116 do { \
117 R[0] = A[0] - B[0]; if (R[0] < 0.0f) R[0] = 0.0f; \
118 R[1] = A[1] - B[1]; if (R[1] < 0.0f) R[1] = 0.0f; \
119 R[2] = A[2] - B[2]; if (R[2] < 0.0f) R[2] = 0.0f; \
120 R[3] = A[3] - B[3]; if (R[3] < 0.0f) R[3] = 0.0f; \
121 } while (0)
122
123 #define VEC4_MUL(R, A, B) \
124 do { \
125 R[0] = A[0] * B[0]; \
126 R[1] = A[1] * B[1]; \
127 R[2] = A[2] * B[2]; \
128 R[3] = A[3] * B[3]; \
129 } while (0)
130
131 #define VEC4_MIN(R, A, B) \
132 do { \
133 R[0] = (A[0] < B[0]) ? A[0] : B[0]; \
134 R[1] = (A[1] < B[1]) ? A[1] : B[1]; \
135 R[2] = (A[2] < B[2]) ? A[2] : B[2]; \
136 R[3] = (A[3] < B[3]) ? A[3] : B[3]; \
137 } while (0)
138
139 #define VEC4_MAX(R, A, B) \
140 do { \
141 R[0] = (A[0] > B[0]) ? A[0] : B[0]; \
142 R[1] = (A[1] > B[1]) ? A[1] : B[1]; \
143 R[2] = (A[2] > B[2]) ? A[2] : B[2]; \
144 R[3] = (A[3] > B[3]) ? A[3] : B[3]; \
145 } while (0)
146
147
148
149 static void
logicop_quad(struct quad_stage * qs,float (* quadColor)[4],float (* dest)[4])150 logicop_quad(struct quad_stage *qs,
151 float (*quadColor)[4],
152 float (*dest)[4])
153 {
154 struct softpipe_context *softpipe = qs->softpipe;
155 ubyte src[4][4], dst[4][4], res[4][4];
156 uint *src4 = (uint *) src;
157 uint *dst4 = (uint *) dst;
158 uint *res4 = (uint *) res;
159 uint j;
160
161
162 /* convert to ubyte */
163 for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
164 dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
165 dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
166 dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
167 dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
168
169 src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
170 src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
171 src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
172 src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
173
174 res[j][0] = 0;
175 }
176
177 switch (softpipe->blend->logicop_func) {
178 case PIPE_LOGICOP_CLEAR:
179 for (j = 0; j < 4; j++)
180 res4[j] = 0;
181 break;
182 case PIPE_LOGICOP_NOR:
183 for (j = 0; j < 4; j++)
184 res4[j] = ~(src4[j] | dst4[j]);
185 break;
186 case PIPE_LOGICOP_AND_INVERTED:
187 for (j = 0; j < 4; j++)
188 res4[j] = ~src4[j] & dst4[j];
189 break;
190 case PIPE_LOGICOP_COPY_INVERTED:
191 for (j = 0; j < 4; j++)
192 res4[j] = ~src4[j];
193 break;
194 case PIPE_LOGICOP_AND_REVERSE:
195 for (j = 0; j < 4; j++)
196 res4[j] = src4[j] & ~dst4[j];
197 break;
198 case PIPE_LOGICOP_INVERT:
199 for (j = 0; j < 4; j++)
200 res4[j] = ~dst4[j];
201 break;
202 case PIPE_LOGICOP_XOR:
203 for (j = 0; j < 4; j++)
204 res4[j] = dst4[j] ^ src4[j];
205 break;
206 case PIPE_LOGICOP_NAND:
207 for (j = 0; j < 4; j++)
208 res4[j] = ~(src4[j] & dst4[j]);
209 break;
210 case PIPE_LOGICOP_AND:
211 for (j = 0; j < 4; j++)
212 res4[j] = src4[j] & dst4[j];
213 break;
214 case PIPE_LOGICOP_EQUIV:
215 for (j = 0; j < 4; j++)
216 res4[j] = ~(src4[j] ^ dst4[j]);
217 break;
218 case PIPE_LOGICOP_NOOP:
219 for (j = 0; j < 4; j++)
220 res4[j] = dst4[j];
221 break;
222 case PIPE_LOGICOP_OR_INVERTED:
223 for (j = 0; j < 4; j++)
224 res4[j] = ~src4[j] | dst4[j];
225 break;
226 case PIPE_LOGICOP_COPY:
227 for (j = 0; j < 4; j++)
228 res4[j] = src4[j];
229 break;
230 case PIPE_LOGICOP_OR_REVERSE:
231 for (j = 0; j < 4; j++)
232 res4[j] = src4[j] | ~dst4[j];
233 break;
234 case PIPE_LOGICOP_OR:
235 for (j = 0; j < 4; j++)
236 res4[j] = src4[j] | dst4[j];
237 break;
238 case PIPE_LOGICOP_SET:
239 for (j = 0; j < 4; j++)
240 res4[j] = ~0;
241 break;
242 default:
243 assert(0 && "invalid logicop mode");
244 }
245
246 for (j = 0; j < 4; j++) {
247 quadColor[j][0] = ubyte_to_float(res[j][0]);
248 quadColor[j][1] = ubyte_to_float(res[j][1]);
249 quadColor[j][2] = ubyte_to_float(res[j][2]);
250 quadColor[j][3] = ubyte_to_float(res[j][3]);
251 }
252 }
253
254
255
256 /**
257 * Do blending for a 2x2 quad for one color buffer.
258 * \param quadColor the incoming quad colors
259 * \param dest the destination/framebuffer quad colors
260 * \param const_blend_color the constant blend color
261 * \param blend_index which set of blending terms to use
262 */
263 static void
blend_quad(struct quad_stage * qs,float (* quadColor)[4],float (* quadColor2)[4],float (* dest)[4],const float const_blend_color[4],unsigned blend_index)264 blend_quad(struct quad_stage *qs,
265 float (*quadColor)[4],
266 float (*quadColor2)[4],
267 float (*dest)[4],
268 const float const_blend_color[4],
269 unsigned blend_index)
270 {
271 static const float zero[4] = { 0, 0, 0, 0 };
272 static const float one[4] = { 1, 1, 1, 1 };
273 struct softpipe_context *softpipe = qs->softpipe;
274 float source[4][TGSI_QUAD_SIZE] = { { 0 } };
275 float blend_dest[4][TGSI_QUAD_SIZE];
276
277 /*
278 * Compute src/first term RGB
279 */
280 switch (softpipe->blend->rt[blend_index].rgb_src_factor) {
281 case PIPE_BLENDFACTOR_ONE:
282 VEC4_COPY(source[0], quadColor[0]); /* R */
283 VEC4_COPY(source[1], quadColor[1]); /* G */
284 VEC4_COPY(source[2], quadColor[2]); /* B */
285 break;
286 case PIPE_BLENDFACTOR_SRC_COLOR:
287 VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
288 VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
289 VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
290 break;
291 case PIPE_BLENDFACTOR_SRC_ALPHA:
292 {
293 const float *alpha = quadColor[3];
294 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
295 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
296 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
297 }
298 break;
299 case PIPE_BLENDFACTOR_DST_COLOR:
300 VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
301 VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
302 VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
303 break;
304 case PIPE_BLENDFACTOR_DST_ALPHA:
305 {
306 const float *alpha = dest[3];
307 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
308 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
309 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
310 }
311 break;
312 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
313 {
314 const float *alpha = quadColor[3];
315 float diff[4], temp[4];
316 VEC4_SUB(diff, one, dest[3]);
317 VEC4_MIN(temp, alpha, diff);
318 VEC4_MUL(source[0], quadColor[0], temp); /* R */
319 VEC4_MUL(source[1], quadColor[1], temp); /* G */
320 VEC4_MUL(source[2], quadColor[2], temp); /* B */
321 }
322 break;
323 case PIPE_BLENDFACTOR_CONST_COLOR:
324 {
325 float comp[4];
326 VEC4_SCALAR(comp, const_blend_color[0]); /* R */
327 VEC4_MUL(source[0], quadColor[0], comp); /* R */
328 VEC4_SCALAR(comp, const_blend_color[1]); /* G */
329 VEC4_MUL(source[1], quadColor[1], comp); /* G */
330 VEC4_SCALAR(comp, const_blend_color[2]); /* B */
331 VEC4_MUL(source[2], quadColor[2], comp); /* B */
332 }
333 break;
334 case PIPE_BLENDFACTOR_CONST_ALPHA:
335 {
336 float alpha[4];
337 VEC4_SCALAR(alpha, const_blend_color[3]);
338 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
339 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
340 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
341 }
342 break;
343 case PIPE_BLENDFACTOR_SRC1_COLOR:
344 VEC4_MUL(source[0], quadColor[0], quadColor2[0]); /* R */
345 VEC4_MUL(source[1], quadColor[1], quadColor2[1]); /* G */
346 VEC4_MUL(source[2], quadColor[2], quadColor2[2]); /* B */
347 break;
348 case PIPE_BLENDFACTOR_SRC1_ALPHA:
349 {
350 const float *alpha = quadColor2[3];
351 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
352 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
353 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
354 }
355 break;
356 case PIPE_BLENDFACTOR_ZERO:
357 VEC4_COPY(source[0], zero); /* R */
358 VEC4_COPY(source[1], zero); /* G */
359 VEC4_COPY(source[2], zero); /* B */
360 break;
361 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
362 {
363 float inv_comp[4];
364 VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
365 VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
366 VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
367 VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
368 VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
369 VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
370 }
371 break;
372 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
373 {
374 float inv_alpha[4];
375 VEC4_SUB(inv_alpha, one, quadColor[3]);
376 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
377 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
378 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
379 }
380 break;
381 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
382 {
383 float inv_alpha[4];
384 VEC4_SUB(inv_alpha, one, dest[3]);
385 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
386 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
387 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
388 }
389 break;
390 case PIPE_BLENDFACTOR_INV_DST_COLOR:
391 {
392 float inv_comp[4];
393 VEC4_SUB(inv_comp, one, dest[0]); /* R */
394 VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
395 VEC4_SUB(inv_comp, one, dest[1]); /* G */
396 VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
397 VEC4_SUB(inv_comp, one, dest[2]); /* B */
398 VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
399 }
400 break;
401 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
402 {
403 float inv_comp[4];
404 /* R */
405 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
406 VEC4_MUL(source[0], quadColor[0], inv_comp);
407 /* G */
408 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
409 VEC4_MUL(source[1], quadColor[1], inv_comp);
410 /* B */
411 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
412 VEC4_MUL(source[2], quadColor[2], inv_comp);
413 }
414 break;
415 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
416 {
417 float inv_alpha[4];
418 VEC4_SCALAR(inv_alpha, 1.0f - const_blend_color[3]);
419 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
420 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
421 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
422 }
423 break;
424 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
425 {
426 float inv_comp[4];
427 VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
428 VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
429 VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
430 VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
431 VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
432 VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
433 }
434 break;
435 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
436 {
437 float inv_alpha[4];
438 VEC4_SUB(inv_alpha, one, quadColor2[3]);
439 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
440 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
441 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
442 }
443 break;
444 default:
445 assert(0 && "invalid rgb src factor");
446 }
447
448 /*
449 * Compute src/first term A
450 */
451 switch (softpipe->blend->rt[blend_index].alpha_src_factor) {
452 case PIPE_BLENDFACTOR_ONE:
453 VEC4_COPY(source[3], quadColor[3]); /* A */
454 break;
455 case PIPE_BLENDFACTOR_SRC_COLOR:
456 FALLTHROUGH;
457 case PIPE_BLENDFACTOR_SRC_ALPHA:
458 {
459 const float *alpha = quadColor[3];
460 VEC4_MUL(source[3], quadColor[3], alpha); /* A */
461 }
462 break;
463 case PIPE_BLENDFACTOR_DST_COLOR:
464 FALLTHROUGH;
465 case PIPE_BLENDFACTOR_DST_ALPHA:
466 VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
467 break;
468 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
469 /* multiply alpha by 1.0 */
470 VEC4_COPY(source[3], quadColor[3]); /* A */
471 break;
472 case PIPE_BLENDFACTOR_CONST_COLOR:
473 FALLTHROUGH;
474 case PIPE_BLENDFACTOR_CONST_ALPHA:
475 {
476 float comp[4];
477 VEC4_SCALAR(comp, const_blend_color[3]); /* A */
478 VEC4_MUL(source[3], quadColor[3], comp); /* A */
479 }
480 break;
481 case PIPE_BLENDFACTOR_ZERO:
482 VEC4_COPY(source[3], zero); /* A */
483 break;
484 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
485 FALLTHROUGH;
486 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
487 {
488 float inv_alpha[4];
489 VEC4_SUB(inv_alpha, one, quadColor[3]);
490 VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
491 }
492 break;
493 case PIPE_BLENDFACTOR_INV_DST_COLOR:
494 FALLTHROUGH;
495 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
496 {
497 float inv_alpha[4];
498 VEC4_SUB(inv_alpha, one, dest[3]);
499 VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
500 }
501 break;
502 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
503 FALLTHROUGH;
504 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
505 {
506 float inv_comp[4];
507 /* A */
508 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
509 VEC4_MUL(source[3], quadColor[3], inv_comp);
510 }
511 break;
512 case PIPE_BLENDFACTOR_SRC1_COLOR:
513 FALLTHROUGH;
514 case PIPE_BLENDFACTOR_SRC1_ALPHA:
515 {
516 const float *alpha = quadColor2[3];
517 VEC4_MUL(source[3], quadColor[3], alpha); /* A */
518 }
519 break;
520 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
521 FALLTHROUGH;
522 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
523 {
524 float inv_alpha[4];
525 VEC4_SUB(inv_alpha, one, quadColor2[3]);
526 VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
527 }
528 break;
529 default:
530 assert(0 && "invalid alpha src factor");
531 }
532
533 /* Save the original dest for use in masking */
534 VEC4_COPY(blend_dest[0], dest[0]);
535 VEC4_COPY(blend_dest[1], dest[1]);
536 VEC4_COPY(blend_dest[2], dest[2]);
537 VEC4_COPY(blend_dest[3], dest[3]);
538
539
540 /*
541 * Compute blend_dest/second term RGB
542 */
543 switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
544 case PIPE_BLENDFACTOR_ONE:
545 /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */
546 break;
547 case PIPE_BLENDFACTOR_SRC_COLOR:
548 VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */
549 VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */
550 VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */
551 break;
552 case PIPE_BLENDFACTOR_SRC_ALPHA:
553 VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */
554 VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */
555 VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */
556 break;
557 case PIPE_BLENDFACTOR_DST_ALPHA:
558 VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */
559 VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */
560 VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */
561 break;
562 case PIPE_BLENDFACTOR_DST_COLOR:
563 VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */
564 VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */
565 VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */
566 break;
567 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
568 {
569 const float *alpha = quadColor[3];
570 float diff[4], temp[4];
571 VEC4_SUB(diff, one, blend_dest[3]);
572 VEC4_MIN(temp, alpha, diff);
573 VEC4_MUL(blend_dest[0], blend_dest[0], temp); /* R */
574 VEC4_MUL(blend_dest[1], blend_dest[1], temp); /* G */
575 VEC4_MUL(blend_dest[2], blend_dest[2], temp); /* B */
576 }
577 break;
578 case PIPE_BLENDFACTOR_CONST_COLOR:
579 {
580 float comp[4];
581 VEC4_SCALAR(comp, const_blend_color[0]); /* R */
582 VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
583 VEC4_SCALAR(comp, const_blend_color[1]); /* G */
584 VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
585 VEC4_SCALAR(comp, const_blend_color[2]); /* B */
586 VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
587 }
588 break;
589 case PIPE_BLENDFACTOR_CONST_ALPHA:
590 {
591 float comp[4];
592 VEC4_SCALAR(comp, const_blend_color[3]); /* A */
593 VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
594 VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
595 VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
596 }
597 break;
598 case PIPE_BLENDFACTOR_ZERO:
599 VEC4_COPY(blend_dest[0], zero); /* R */
600 VEC4_COPY(blend_dest[1], zero); /* G */
601 VEC4_COPY(blend_dest[2], zero); /* B */
602 break;
603 case PIPE_BLENDFACTOR_SRC1_COLOR:
604 VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[0]); /* R */
605 VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[1]); /* G */
606 VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[2]); /* B */
607 break;
608 case PIPE_BLENDFACTOR_SRC1_ALPHA:
609 VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[3]); /* R * A */
610 VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[3]); /* G * A */
611 VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[3]); /* B * A */
612 break;
613 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
614 {
615 float inv_comp[4];
616 VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
617 VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
618 VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
619 VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
620 VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
621 VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
622 }
623 break;
624 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
625 {
626 float one_minus_alpha[TGSI_QUAD_SIZE];
627 VEC4_SUB(one_minus_alpha, one, quadColor[3]);
628 VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
629 VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
630 VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
631 }
632 break;
633 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
634 {
635 float inv_comp[4];
636 VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
637 VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
638 VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
639 VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
640 }
641 break;
642 case PIPE_BLENDFACTOR_INV_DST_COLOR:
643 {
644 float inv_comp[4];
645 VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */
646 VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */
647 VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */
648 VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */
649 VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */
650 VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */
651 }
652 break;
653 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
654 {
655 float inv_comp[4];
656 /* R */
657 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
658 VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
659 /* G */
660 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
661 VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
662 /* B */
663 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
664 VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
665 }
666 break;
667 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
668 {
669 float inv_comp[4];
670 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
671 VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
672 VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
673 VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
674 }
675 break;
676 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
677 {
678 float inv_comp[4];
679 VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
680 VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
681 VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
682 VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
683 VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
684 VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
685 }
686 break;
687 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
688 {
689 float one_minus_alpha[TGSI_QUAD_SIZE];
690 VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
691 VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
692 VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
693 VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
694 }
695 break;
696 default:
697 assert(0 && "invalid rgb dst factor");
698 }
699
700 /*
701 * Compute blend_dest/second term A
702 */
703 switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
704 case PIPE_BLENDFACTOR_ONE:
705 /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */
706 break;
707 case PIPE_BLENDFACTOR_SRC_COLOR:
708 FALLTHROUGH;
709 case PIPE_BLENDFACTOR_SRC_ALPHA:
710 VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */
711 break;
712 case PIPE_BLENDFACTOR_DST_COLOR:
713 FALLTHROUGH;
714 case PIPE_BLENDFACTOR_DST_ALPHA:
715 VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */
716 break;
717 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
718 /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */
719 break;
720 case PIPE_BLENDFACTOR_CONST_COLOR:
721 FALLTHROUGH;
722 case PIPE_BLENDFACTOR_CONST_ALPHA:
723 {
724 float comp[4];
725 VEC4_SCALAR(comp, const_blend_color[3]); /* A */
726 VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */
727 }
728 break;
729 case PIPE_BLENDFACTOR_ZERO:
730 VEC4_COPY(blend_dest[3], zero); /* A */
731 break;
732 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
733 FALLTHROUGH;
734 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
735 {
736 float one_minus_alpha[TGSI_QUAD_SIZE];
737 VEC4_SUB(one_minus_alpha, one, quadColor[3]);
738 VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
739 }
740 break;
741 case PIPE_BLENDFACTOR_INV_DST_COLOR:
742 FALLTHROUGH;
743 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
744 {
745 float inv_comp[4];
746 VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
747 VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */
748 }
749 break;
750 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
751 FALLTHROUGH;
752 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
753 {
754 float inv_comp[4];
755 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
756 VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp);
757 }
758 break;
759 case PIPE_BLENDFACTOR_SRC1_COLOR:
760 FALLTHROUGH;
761 case PIPE_BLENDFACTOR_SRC1_ALPHA:
762 VEC4_MUL(blend_dest[3], blend_dest[3], quadColor2[3]); /* A * A */
763 break;
764 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
765 FALLTHROUGH;
766 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
767 {
768 float one_minus_alpha[TGSI_QUAD_SIZE];
769 VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
770 VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
771 }
772 break;
773 default:
774 assert(0 && "invalid alpha dst factor");
775 }
776
777 /*
778 * Combine RGB terms
779 */
780 switch (softpipe->blend->rt[blend_index].rgb_func) {
781 case PIPE_BLEND_ADD:
782 VEC4_ADD(quadColor[0], source[0], blend_dest[0]); /* R */
783 VEC4_ADD(quadColor[1], source[1], blend_dest[1]); /* G */
784 VEC4_ADD(quadColor[2], source[2], blend_dest[2]); /* B */
785 break;
786 case PIPE_BLEND_SUBTRACT:
787 VEC4_SUB(quadColor[0], source[0], blend_dest[0]); /* R */
788 VEC4_SUB(quadColor[1], source[1], blend_dest[1]); /* G */
789 VEC4_SUB(quadColor[2], source[2], blend_dest[2]); /* B */
790 break;
791 case PIPE_BLEND_REVERSE_SUBTRACT:
792 VEC4_SUB(quadColor[0], blend_dest[0], source[0]); /* R */
793 VEC4_SUB(quadColor[1], blend_dest[1], source[1]); /* G */
794 VEC4_SUB(quadColor[2], blend_dest[2], source[2]); /* B */
795 break;
796 case PIPE_BLEND_MIN:
797 VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */
798 VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */
799 VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */
800 break;
801 case PIPE_BLEND_MAX:
802 VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */
803 VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */
804 VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */
805 break;
806 default:
807 assert(0 && "invalid rgb blend func");
808 }
809
810 /*
811 * Combine A terms
812 */
813 switch (softpipe->blend->rt[blend_index].alpha_func) {
814 case PIPE_BLEND_ADD:
815 VEC4_ADD(quadColor[3], source[3], blend_dest[3]); /* A */
816 break;
817 case PIPE_BLEND_SUBTRACT:
818 VEC4_SUB(quadColor[3], source[3], blend_dest[3]); /* A */
819 break;
820 case PIPE_BLEND_REVERSE_SUBTRACT:
821 VEC4_SUB(quadColor[3], blend_dest[3], source[3]); /* A */
822 break;
823 case PIPE_BLEND_MIN:
824 VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */
825 break;
826 case PIPE_BLEND_MAX:
827 VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */
828 break;
829 default:
830 assert(0 && "invalid alpha blend func");
831 }
832 }
833
834 static void
colormask_quad(unsigned colormask,float (* quadColor)[4],float (* dest)[4])835 colormask_quad(unsigned colormask,
836 float (*quadColor)[4],
837 float (*dest)[4])
838 {
839 /* R */
840 if (!(colormask & PIPE_MASK_R))
841 COPY_4V(quadColor[0], dest[0]);
842
843 /* G */
844 if (!(colormask & PIPE_MASK_G))
845 COPY_4V(quadColor[1], dest[1]);
846
847 /* B */
848 if (!(colormask & PIPE_MASK_B))
849 COPY_4V(quadColor[2], dest[2]);
850
851 /* A */
852 if (!(colormask & PIPE_MASK_A))
853 COPY_4V(quadColor[3], dest[3]);
854 }
855
856
857 /**
858 * Clamp all colors in a quad to [0, 1]
859 */
860 static void
clamp_colors(float (* quadColor)[4])861 clamp_colors(float (*quadColor)[4])
862 {
863 unsigned i, j;
864
865 for (i = 0; i < 4; i++) {
866 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
867 quadColor[i][j] = CLAMP(quadColor[i][j], 0.0F, 1.0F);
868 }
869 }
870 }
871
872
873 /**
874 * If we're drawing to a luminance, luminance/alpha or intensity surface
875 * we have to adjust (rebase) the fragment/quad colors before writing them
876 * to the tile cache. The tile cache always stores RGBA colors but if
877 * we're caching a L/A surface (for example) we need to be sure that R=G=B
878 * so that subsequent reads from the surface cache appear to return L/A
879 * values.
880 * The piglit fbo-blending-formats test will exercise this.
881 */
882 static void
rebase_colors(enum format base_format,float (* quadColor)[4])883 rebase_colors(enum format base_format, float (*quadColor)[4])
884 {
885 unsigned i;
886
887 switch (base_format) {
888 case RGB:
889 for (i = 0; i < 4; i++) {
890 /* A = 1 */
891 quadColor[3][i] = 1.0F;
892 }
893 break;
894 case LUMINANCE:
895 for (i = 0; i < 4; i++) {
896 /* B = G = R */
897 quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
898 /* A = 1 */
899 quadColor[3][i] = 1.0F;
900 }
901 break;
902 case LUMINANCE_ALPHA:
903 for (i = 0; i < 4; i++) {
904 /* B = G = R */
905 quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
906 }
907 break;
908 case INTENSITY:
909 for (i = 0; i < 4; i++) {
910 /* A = B = G = R */
911 quadColor[3][i] = quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
912 }
913 break;
914 default:
915 ; /* nothing */
916 }
917 }
918
919 static void
blend_fallback(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)920 blend_fallback(struct quad_stage *qs,
921 struct quad_header *quads[],
922 unsigned nr)
923 {
924 const struct blend_quad_stage *bqs = blend_quad_stage(qs);
925 struct softpipe_context *softpipe = qs->softpipe;
926 const struct pipe_blend_state *blend = softpipe->blend;
927 unsigned cbuf;
928 boolean write_all =
929 softpipe->fs_variant->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
930
931 for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
932 if (softpipe->framebuffer.cbufs[cbuf]) {
933 /* which blend/mask state index to use: */
934 const uint blend_buf = blend->independent_blend_enable ? cbuf : 0;
935 float dest[4][TGSI_QUAD_SIZE];
936 struct softpipe_cached_tile *tile
937 = sp_get_cached_tile(softpipe->cbuf_cache[cbuf],
938 quads[0]->input.x0,
939 quads[0]->input.y0, quads[0]->input.layer);
940 const boolean clamp = bqs->clamp[cbuf];
941 const float *blend_color;
942 const boolean dual_source_blend = util_blend_state_is_dual(blend, cbuf);
943 uint q, i, j;
944
945 if (clamp)
946 blend_color = softpipe->blend_color_clamped.color;
947 else
948 blend_color = softpipe->blend_color.color;
949
950 for (q = 0; q < nr; q++) {
951 struct quad_header *quad = quads[q];
952 float (*quadColor)[4];
953 float (*quadColor2)[4] = NULL;
954 float temp_quad_color[TGSI_QUAD_SIZE][4];
955 const int itx = (quad->input.x0 & (TILE_SIZE-1));
956 const int ity = (quad->input.y0 & (TILE_SIZE-1));
957
958 if (write_all) {
959 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
960 for (i = 0; i < 4; i++) {
961 temp_quad_color[i][j] = quad->output.color[0][i][j];
962 }
963 }
964 quadColor = temp_quad_color;
965 } else {
966 quadColor = quad->output.color[cbuf];
967 if (dual_source_blend)
968 quadColor2 = quad->output.color[cbuf + 1];
969 }
970
971 /* If fixed-point dest color buffer, need to clamp the incoming
972 * fragment colors now.
973 */
974 if (clamp || softpipe->rasterizer->clamp_fragment_color) {
975 clamp_colors(quadColor);
976 }
977
978 /* get/swizzle dest colors
979 */
980 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
981 int x = itx + (j & 1);
982 int y = ity + (j >> 1);
983 for (i = 0; i < 4; i++) {
984 dest[i][j] = tile->data.color[y][x][i];
985 }
986 }
987
988
989 if (blend->logicop_enable) {
990 if (bqs->format_type[cbuf] != UTIL_FORMAT_TYPE_FLOAT) {
991 logicop_quad( qs, quadColor, dest );
992 }
993 }
994 else if (blend->rt[blend_buf].blend_enable) {
995 blend_quad(qs, quadColor, quadColor2, dest, blend_color, blend_buf);
996
997 /* If fixed-point dest color buffer, need to clamp the outgoing
998 * fragment colors now.
999 */
1000 if (clamp) {
1001 clamp_colors(quadColor);
1002 }
1003 }
1004
1005 rebase_colors(bqs->base_format[cbuf], quadColor);
1006
1007 if (blend->rt[blend_buf].colormask != 0xf)
1008 colormask_quad( blend->rt[blend_buf].colormask, quadColor, dest);
1009
1010 /* Output color values
1011 */
1012 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1013 if (quad->inout.mask & (1 << j)) {
1014 int x = itx + (j & 1);
1015 int y = ity + (j >> 1);
1016 for (i = 0; i < 4; i++) { /* loop over color chans */
1017 tile->data.color[y][x][i] = quadColor[i][j];
1018 }
1019 }
1020 }
1021 }
1022 }
1023 }
1024 }
1025
1026
1027 static void
blend_single_add_src_alpha_inv_src_alpha(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)1028 blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
1029 struct quad_header *quads[],
1030 unsigned nr)
1031 {
1032 const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1033 static const float one[4] = { 1, 1, 1, 1 };
1034 float one_minus_alpha[TGSI_QUAD_SIZE];
1035 float dest[4][TGSI_QUAD_SIZE];
1036 float source[4][TGSI_QUAD_SIZE];
1037 uint i, j, q;
1038
1039 struct softpipe_cached_tile *tile
1040 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1041 quads[0]->input.x0,
1042 quads[0]->input.y0, quads[0]->input.layer);
1043
1044 for (q = 0; q < nr; q++) {
1045 struct quad_header *quad = quads[q];
1046 float (*quadColor)[4] = quad->output.color[0];
1047 const float *alpha = quadColor[3];
1048 const int itx = (quad->input.x0 & (TILE_SIZE-1));
1049 const int ity = (quad->input.y0 & (TILE_SIZE-1));
1050
1051 /* get/swizzle dest colors */
1052 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1053 int x = itx + (j & 1);
1054 int y = ity + (j >> 1);
1055 for (i = 0; i < 4; i++) {
1056 dest[i][j] = tile->data.color[y][x][i];
1057 }
1058 }
1059
1060 /* If fixed-point dest color buffer, need to clamp the incoming
1061 * fragment colors now.
1062 */
1063 if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
1064 clamp_colors(quadColor);
1065 }
1066
1067 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
1068 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
1069 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
1070 VEC4_MUL(source[3], quadColor[3], alpha); /* A */
1071
1072 VEC4_SUB(one_minus_alpha, one, alpha);
1073 VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
1074 VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
1075 VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
1076 VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
1077
1078 VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */
1079 VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */
1080 VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */
1081 VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */
1082
1083 /* If fixed-point dest color buffer, need to clamp the outgoing
1084 * fragment colors now.
1085 */
1086 if (bqs->clamp[0]) {
1087 clamp_colors(quadColor);
1088 }
1089
1090 rebase_colors(bqs->base_format[0], quadColor);
1091
1092 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1093 if (quad->inout.mask & (1 << j)) {
1094 int x = itx + (j & 1);
1095 int y = ity + (j >> 1);
1096 for (i = 0; i < 4; i++) { /* loop over color chans */
1097 tile->data.color[y][x][i] = quadColor[i][j];
1098 }
1099 }
1100 }
1101 }
1102 }
1103
1104 static void
blend_single_add_one_one(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)1105 blend_single_add_one_one(struct quad_stage *qs,
1106 struct quad_header *quads[],
1107 unsigned nr)
1108 {
1109 const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1110 float dest[4][TGSI_QUAD_SIZE];
1111 uint i, j, q;
1112
1113 struct softpipe_cached_tile *tile
1114 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1115 quads[0]->input.x0,
1116 quads[0]->input.y0, quads[0]->input.layer);
1117
1118 for (q = 0; q < nr; q++) {
1119 struct quad_header *quad = quads[q];
1120 float (*quadColor)[4] = quad->output.color[0];
1121 const int itx = (quad->input.x0 & (TILE_SIZE-1));
1122 const int ity = (quad->input.y0 & (TILE_SIZE-1));
1123
1124 /* get/swizzle dest colors */
1125 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1126 int x = itx + (j & 1);
1127 int y = ity + (j >> 1);
1128 for (i = 0; i < 4; i++) {
1129 dest[i][j] = tile->data.color[y][x][i];
1130 }
1131 }
1132
1133 /* If fixed-point dest color buffer, need to clamp the incoming
1134 * fragment colors now.
1135 */
1136 if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
1137 clamp_colors(quadColor);
1138 }
1139
1140 VEC4_ADD(quadColor[0], quadColor[0], dest[0]); /* R */
1141 VEC4_ADD(quadColor[1], quadColor[1], dest[1]); /* G */
1142 VEC4_ADD(quadColor[2], quadColor[2], dest[2]); /* B */
1143 VEC4_ADD(quadColor[3], quadColor[3], dest[3]); /* A */
1144
1145 /* If fixed-point dest color buffer, need to clamp the outgoing
1146 * fragment colors now.
1147 */
1148 if (bqs->clamp[0]) {
1149 clamp_colors(quadColor);
1150 }
1151
1152 rebase_colors(bqs->base_format[0], quadColor);
1153
1154 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1155 if (quad->inout.mask & (1 << j)) {
1156 int x = itx + (j & 1);
1157 int y = ity + (j >> 1);
1158 for (i = 0; i < 4; i++) { /* loop over color chans */
1159 tile->data.color[y][x][i] = quadColor[i][j];
1160 }
1161 }
1162 }
1163 }
1164 }
1165
1166
1167 /**
1168 * Just copy the quad color to the framebuffer tile (respecting the writemask),
1169 * for one color buffer.
1170 * Clamping will be done, if needed (depending on the color buffer's
1171 * datatype) when we write/pack the colors later.
1172 */
1173 static void
single_output_color(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)1174 single_output_color(struct quad_stage *qs,
1175 struct quad_header *quads[],
1176 unsigned nr)
1177 {
1178 const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1179 uint i, j, q;
1180
1181 struct softpipe_cached_tile *tile
1182 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1183 quads[0]->input.x0,
1184 quads[0]->input.y0, quads[0]->input.layer);
1185
1186 for (q = 0; q < nr; q++) {
1187 struct quad_header *quad = quads[q];
1188 float (*quadColor)[4] = quad->output.color[0];
1189 const int itx = (quad->input.x0 & (TILE_SIZE-1));
1190 const int ity = (quad->input.y0 & (TILE_SIZE-1));
1191
1192 if (qs->softpipe->rasterizer->clamp_fragment_color)
1193 clamp_colors(quadColor);
1194
1195 rebase_colors(bqs->base_format[0], quadColor);
1196
1197 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1198 if (quad->inout.mask & (1 << j)) {
1199 int x = itx + (j & 1);
1200 int y = ity + (j >> 1);
1201 for (i = 0; i < 4; i++) { /* loop over color chans */
1202 tile->data.color[y][x][i] = quadColor[i][j];
1203 }
1204 }
1205 }
1206 }
1207 }
1208
1209 static void
blend_noop(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)1210 blend_noop(struct quad_stage *qs,
1211 struct quad_header *quads[],
1212 unsigned nr)
1213 {
1214 }
1215
1216
1217 static void
choose_blend_quad(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)1218 choose_blend_quad(struct quad_stage *qs,
1219 struct quad_header *quads[],
1220 unsigned nr)
1221 {
1222 struct blend_quad_stage *bqs = blend_quad_stage(qs);
1223 struct softpipe_context *softpipe = qs->softpipe;
1224 const struct pipe_blend_state *blend = softpipe->blend;
1225 unsigned i;
1226
1227 qs->run = blend_fallback;
1228
1229 if (softpipe->framebuffer.nr_cbufs == 0) {
1230 qs->run = blend_noop;
1231 }
1232 else if (!softpipe->blend->logicop_enable &&
1233 softpipe->blend->rt[0].colormask == 0xf &&
1234 softpipe->framebuffer.nr_cbufs == 1)
1235 {
1236 if (softpipe->framebuffer.cbufs[0] == NULL) {
1237 qs->run = blend_noop;
1238 }
1239 else if (!blend->rt[0].blend_enable) {
1240 qs->run = single_output_color;
1241 }
1242 else if (blend->rt[0].rgb_src_factor == blend->rt[0].alpha_src_factor &&
1243 blend->rt[0].rgb_dst_factor == blend->rt[0].alpha_dst_factor &&
1244 blend->rt[0].rgb_func == blend->rt[0].alpha_func)
1245 {
1246 if (blend->rt[0].alpha_func == PIPE_BLEND_ADD) {
1247 if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
1248 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ONE) {
1249 qs->run = blend_single_add_one_one;
1250 }
1251 else if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA &&
1252 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
1253 qs->run = blend_single_add_src_alpha_inv_src_alpha;
1254
1255 }
1256 }
1257 }
1258
1259 /* For each color buffer, determine if the buffer has destination alpha and
1260 * whether color clamping is needed.
1261 */
1262 for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
1263 if (softpipe->framebuffer.cbufs[i]) {
1264 const enum pipe_format format = softpipe->framebuffer.cbufs[i]->format;
1265 const struct util_format_description *desc =
1266 util_format_description(format);
1267 /* assuming all or no color channels are normalized: */
1268 bqs->clamp[i] = desc->channel[0].normalized;
1269 bqs->format_type[i] = desc->channel[0].type;
1270
1271 if (util_format_is_intensity(format))
1272 bqs->base_format[i] = INTENSITY;
1273 else if (util_format_is_luminance(format))
1274 bqs->base_format[i] = LUMINANCE;
1275 else if (util_format_is_luminance_alpha(format))
1276 bqs->base_format[i] = LUMINANCE_ALPHA;
1277 else if (!util_format_has_alpha(format))
1278 bqs->base_format[i] = RGB;
1279 else
1280 bqs->base_format[i] = RGBA;
1281 }
1282 }
1283
1284 qs->run(qs, quads, nr);
1285 }
1286
1287
blend_begin(struct quad_stage * qs)1288 static void blend_begin(struct quad_stage *qs)
1289 {
1290 qs->run = choose_blend_quad;
1291 }
1292
1293
blend_destroy(struct quad_stage * qs)1294 static void blend_destroy(struct quad_stage *qs)
1295 {
1296 FREE( qs );
1297 }
1298
1299
sp_quad_blend_stage(struct softpipe_context * softpipe)1300 struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
1301 {
1302 struct blend_quad_stage *stage = CALLOC_STRUCT(blend_quad_stage);
1303
1304 if (!stage)
1305 return NULL;
1306
1307 stage->base.softpipe = softpipe;
1308 stage->base.begin = blend_begin;
1309 stage->base.run = choose_blend_quad;
1310 stage->base.destroy = blend_destroy;
1311
1312 return &stage->base;
1313 }
1314