1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file blend.cpp
24 *
25 * @brief Implementation for blending operations.
26 *
27 ******************************************************************************/
28 #include "state.h"
29
30 template<bool Color, bool Alpha>
31 INLINE
GenerateBlendFactor(SWR_BLEND_FACTOR func,simdvector & constantColor,simdvector & src,simdvector & src1,simdvector & dst,simdvector & out)32 void GenerateBlendFactor(SWR_BLEND_FACTOR func, simdvector &constantColor, simdvector &src, simdvector &src1, simdvector &dst, simdvector &out)
33 {
34 simdvector result;
35
36 switch (func)
37 {
38 case BLENDFACTOR_ZERO:
39 result.x = _simd_setzero_ps();
40 result.y = _simd_setzero_ps();
41 result.z = _simd_setzero_ps();
42 result.w = _simd_setzero_ps();
43 break;
44
45 case BLENDFACTOR_ONE:
46 result.x = _simd_set1_ps(1.0);
47 result.y = _simd_set1_ps(1.0);
48 result.z = _simd_set1_ps(1.0);
49 result.w = _simd_set1_ps(1.0);
50 break;
51
52 case BLENDFACTOR_SRC_COLOR:
53 result = src;
54 break;
55
56 case BLENDFACTOR_DST_COLOR:
57 result = dst;
58 break;
59
60 case BLENDFACTOR_INV_SRC_COLOR:
61 result.x = _simd_sub_ps(_simd_set1_ps(1.0), src.x);
62 result.y = _simd_sub_ps(_simd_set1_ps(1.0), src.y);
63 result.z = _simd_sub_ps(_simd_set1_ps(1.0), src.z);
64 result.w = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
65 break;
66
67 case BLENDFACTOR_INV_DST_COLOR:
68 result.x = _simd_sub_ps(_simd_set1_ps(1.0), dst.x);
69 result.y = _simd_sub_ps(_simd_set1_ps(1.0), dst.y);
70 result.z = _simd_sub_ps(_simd_set1_ps(1.0), dst.z);
71 result.w = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
72 break;
73
74 case BLENDFACTOR_SRC_ALPHA: result.x = src.w;
75 result.y = src.w;
76 result.z = src.w;
77 result.w = src.w;
78 break;
79
80 case BLENDFACTOR_INV_SRC_ALPHA:
81 {
82 simdscalar oneMinusSrcA = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
83 result.x = oneMinusSrcA;
84 result.y = oneMinusSrcA;
85 result.z = oneMinusSrcA;
86 result.w = oneMinusSrcA;
87 break;
88 }
89
90 case BLENDFACTOR_DST_ALPHA: result.x = dst.w;
91 result.y = dst.w;
92 result.z = dst.w;
93 result.w = dst.w;
94 break;
95
96 case BLENDFACTOR_INV_DST_ALPHA:
97 {
98 simdscalar oneMinusDstA = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
99 result.x = oneMinusDstA;
100 result.y = oneMinusDstA;
101 result.z = oneMinusDstA;
102 result.w = oneMinusDstA;
103 break;
104 }
105
106 case BLENDFACTOR_SRC_ALPHA_SATURATE:
107 {
108 simdscalar sat = _simd_min_ps(src.w, _simd_sub_ps(_simd_set1_ps(1.0), dst.w));
109 result.x = sat;
110 result.y = sat;
111 result.z = sat;
112 result.w = _simd_set1_ps(1.0);
113 break;
114 }
115
116 case BLENDFACTOR_CONST_COLOR:
117 result.x = constantColor[0];
118 result.y = constantColor[1];
119 result.z = constantColor[2];
120 result.w = constantColor[3];
121 break;
122
123 case BLENDFACTOR_CONST_ALPHA:
124 result.x = result.y = result.z = result.w = constantColor[3];
125 break;
126
127 case BLENDFACTOR_INV_CONST_COLOR:
128 {
129 result.x = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[0]);
130 result.y = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[1]);
131 result.z = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[2]);
132 result.w = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
133 break;
134 }
135
136 case BLENDFACTOR_INV_CONST_ALPHA:
137 {
138 result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
139 break;
140 }
141
142 case BLENDFACTOR_SRC1_COLOR:
143 result.x = src1.x;
144 result.y = src1.y;
145 result.z = src1.z;
146 result.w = src1.w;
147 break;
148
149 case BLENDFACTOR_SRC1_ALPHA:
150 result.x = result.y = result.z = result.w = src1.w;
151 break;
152
153 case BLENDFACTOR_INV_SRC1_COLOR:
154 result.x = _simd_sub_ps(_simd_set1_ps(1.0f), src1.x);
155 result.y = _simd_sub_ps(_simd_set1_ps(1.0f), src1.y);
156 result.z = _simd_sub_ps(_simd_set1_ps(1.0f), src1.z);
157 result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
158 break;
159
160 case BLENDFACTOR_INV_SRC1_ALPHA:
161 result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
162 break;
163
164 default: SWR_INVALID("Unimplemented blend factor: %d", func);
165 }
166
167 if (Color)
168 {
169 out.x = result.x;
170 out.y = result.y;
171 out.z = result.z;
172 }
173 if (Alpha)
174 {
175 out.w = result.w;
176 }
177
178 }
179
180 template<bool Color, bool Alpha>
BlendFunc(SWR_BLEND_OP blendOp,simdvector & src,simdvector & srcFactor,simdvector & dst,simdvector & dstFactor,simdvector & out)181 INLINE void BlendFunc(SWR_BLEND_OP blendOp, simdvector &src, simdvector &srcFactor, simdvector &dst, simdvector &dstFactor, simdvector &out)
182 {
183 simdvector result;
184
185 switch (blendOp)
186 {
187 case BLENDOP_ADD:
188 result.x = _simd_fmadd_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x));
189 result.y = _simd_fmadd_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y));
190 result.z = _simd_fmadd_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z));
191 result.w = _simd_fmadd_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w));
192 break;
193
194 case BLENDOP_SUBTRACT:
195 result.x = _simd_fmsub_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x));
196 result.y = _simd_fmsub_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y));
197 result.z = _simd_fmsub_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z));
198 result.w = _simd_fmsub_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w));
199 break;
200
201 case BLENDOP_REVSUBTRACT:
202 result.x = _simd_fmsub_ps(dstFactor.x, dst.x, _simd_mul_ps(srcFactor.x, src.x));
203 result.y = _simd_fmsub_ps(dstFactor.y, dst.y, _simd_mul_ps(srcFactor.y, src.y));
204 result.z = _simd_fmsub_ps(dstFactor.z, dst.z, _simd_mul_ps(srcFactor.z, src.z));
205 result.w = _simd_fmsub_ps(dstFactor.w, dst.w, _simd_mul_ps(srcFactor.w, src.w));
206 break;
207
208 case BLENDOP_MIN:
209 result.x = _simd_min_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
210 result.y = _simd_min_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
211 result.z = _simd_min_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
212 result.w = _simd_min_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
213 break;
214
215 case BLENDOP_MAX:
216 result.x = _simd_max_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
217 result.y = _simd_max_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
218 result.z = _simd_max_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
219 result.w = _simd_max_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
220 break;
221
222 default:
223 SWR_INVALID("Unimplemented blend function: %d", blendOp);
224 }
225
226 if (Color)
227 {
228 out.x = result.x;
229 out.y = result.y;
230 out.z = result.z;
231 }
232 if (Alpha)
233 {
234 out.w = result.w;
235 }
236 }
237
238 template<SWR_TYPE type>
Clamp(simdvector & src)239 INLINE void Clamp(simdvector &src)
240 {
241 switch (type)
242 {
243 case SWR_TYPE_FLOAT:
244 break;
245
246 case SWR_TYPE_UNORM:
247 src.x = _simd_max_ps(src.x, _simd_setzero_ps());
248 src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f));
249
250 src.y = _simd_max_ps(src.y, _simd_setzero_ps());
251 src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f));
252
253 src.z = _simd_max_ps(src.z, _simd_setzero_ps());
254 src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f));
255
256 src.w = _simd_max_ps(src.w, _simd_setzero_ps());
257 src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f));
258 break;
259
260 case SWR_TYPE_SNORM:
261 src.x = _simd_max_ps(src.x, _simd_set1_ps(-1.0f));
262 src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f));
263
264 src.y = _simd_max_ps(src.y, _simd_set1_ps(-1.0f));
265 src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f));
266
267 src.z = _simd_max_ps(src.z, _simd_set1_ps(-1.0f));
268 src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f));
269
270 src.w = _simd_max_ps(src.w, _simd_set1_ps(-1.0f));
271 src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f));
272 break;
273
274 default:
275 SWR_INVALID("Unimplemented clamp: %d", type);
276 break;
277 }
278 }
279
280 template<SWR_TYPE type>
Blend(const SWR_BLEND_STATE * pBlendState,const SWR_RENDER_TARGET_BLEND_STATE * pState,simdvector & src,simdvector & src1,uint8_t * pDst,simdvector & result)281 void Blend(const SWR_BLEND_STATE *pBlendState, const SWR_RENDER_TARGET_BLEND_STATE *pState, simdvector &src, simdvector& src1, uint8_t *pDst, simdvector &result)
282 {
283 // load render target
284 simdvector dst;
285 LoadSOA<KNOB_COLOR_HOT_TILE_FORMAT>(pDst, dst);
286
287 simdvector constColor;
288 constColor.x = _simd_broadcast_ss(&pBlendState->constantColor[0]);
289 constColor.y = _simd_broadcast_ss(&pBlendState->constantColor[1]);
290 constColor.z = _simd_broadcast_ss(&pBlendState->constantColor[2]);
291 constColor.w = _simd_broadcast_ss(&pBlendState->constantColor[3]);
292
293 // clamp src/dst/constant
294 Clamp<type>(src);
295 Clamp<type>(src1);
296 Clamp<type>(dst);
297 Clamp<type>(constColor);
298
299 simdvector srcFactor, dstFactor;
300 if (pBlendState->independentAlphaBlendEnable)
301 {
302 GenerateBlendFactor<true, false>((SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
303 GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->sourceAlphaBlendFactor, constColor, src, src1, dst, srcFactor);
304
305 GenerateBlendFactor<true, false>((SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
306 GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->destAlphaBlendFactor, constColor, src, src1, dst, dstFactor);
307
308 BlendFunc<true, false>((SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
309 BlendFunc<false, true>((SWR_BLEND_OP)pState->alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
310 }
311 else
312 {
313 GenerateBlendFactor<true, true>((SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
314 GenerateBlendFactor<true, true>((SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
315
316 BlendFunc<true, true>((SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
317 }
318 }
319