1 /*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkPM4fPriv.h"
9 #include "SkUtils.h"
10 #include "SkXfermode.h"
11
12 enum DstType {
13 kLinear_Dst,
14 kSRGB_Dst,
15 };
16
scale_by_coverage(const Sk4f & x4,uint8_t coverage)17 static Sk4f scale_by_coverage(const Sk4f& x4, uint8_t coverage) {
18 return x4 * Sk4f(coverage * (1/255.0f));
19 }
20
lerp(const Sk4f & src,const Sk4f & dst,uint8_t srcCoverage)21 static Sk4f lerp(const Sk4f& src, const Sk4f& dst, uint8_t srcCoverage) {
22 return dst + (src - dst) * Sk4f(srcCoverage * (1/255.0f));
23 }
24
load_dst(SkPMColor dstC)25 template <DstType D> Sk4f load_dst(SkPMColor dstC) {
26 return (D == kSRGB_Dst) ? Sk4f_fromS32(dstC) : Sk4f_fromL32(dstC);
27 }
28
srgb_4b_to_linear_unit(SkPMColor dstC)29 static Sk4f srgb_4b_to_linear_unit(SkPMColor dstC) {
30 return Sk4f_fromS32(dstC);
31 }
32
store_dst(const Sk4f & x4)33 template <DstType D> uint32_t store_dst(const Sk4f& x4) {
34 return (D == kSRGB_Dst) ? Sk4f_toS32(x4) : Sk4f_toL32(x4);
35 }
36
linear_unit_to_srgb_255f(const Sk4f & l4)37 static Sk4f linear_unit_to_srgb_255f(const Sk4f& l4) {
38 return linear_to_srgb(l4) * Sk4f(255) + Sk4f(0.5f);
39 }
40
41 ///////////////////////////////////////////////////////////////////////////////////////////////////
42
general_1(const SkXfermode * xfer,uint32_t dst[],const SkPM4f * src,int count,const SkAlpha aa[])43 template <DstType D> void general_1(const SkXfermode* xfer, uint32_t dst[],
44 const SkPM4f* src, int count, const SkAlpha aa[]) {
45 SkXfermodeProc4f proc = xfer->getProc4f();
46 SkPM4f d;
47 if (aa) {
48 for (int i = 0; i < count; ++i) {
49 Sk4f d4 = load_dst<D>(dst[i]);
50 d4.store(d.fVec);
51 Sk4f r4 = Sk4f::Load(proc(*src, d).fVec);
52 dst[i] = store_dst<D>(lerp(r4, d4, aa[i]));
53 }
54 } else {
55 for (int i = 0; i < count; ++i) {
56 load_dst<D>(dst[i]).store(d.fVec);
57 Sk4f r4 = Sk4f::Load(proc(*src, d).fVec);
58 dst[i] = store_dst<D>(r4);
59 }
60 }
61 }
62
general_n(const SkXfermode * xfer,uint32_t dst[],const SkPM4f src[],int count,const SkAlpha aa[])63 template <DstType D> void general_n(const SkXfermode* xfer, uint32_t dst[],
64 const SkPM4f src[], int count, const SkAlpha aa[]) {
65 SkXfermodeProc4f proc = xfer->getProc4f();
66 SkPM4f d;
67 if (aa) {
68 for (int i = 0; i < count; ++i) {
69 Sk4f d4 = load_dst<D>(dst[i]);
70 d4.store(d.fVec);
71 Sk4f r4 = Sk4f::Load(proc(src[i], d).fVec);
72 dst[i] = store_dst<D>(lerp(r4, d4, aa[i]));
73 }
74 } else {
75 for (int i = 0; i < count; ++i) {
76 load_dst<D>(dst[i]).store(d.fVec);
77 Sk4f r4 = Sk4f::Load(proc(src[i], d).fVec);
78 dst[i] = store_dst<D>(r4);
79 }
80 }
81 }
82
83 const SkXfermode::D32Proc gProcs_General[] = {
84 general_n<kLinear_Dst>, general_n<kLinear_Dst>,
85 general_1<kLinear_Dst>, general_1<kLinear_Dst>,
86 general_n<kSRGB_Dst>, general_n<kSRGB_Dst>,
87 general_1<kSRGB_Dst>, general_1<kSRGB_Dst>,
88 };
89
90 ///////////////////////////////////////////////////////////////////////////////////////////////////
91
clear_linear(const SkXfermode *,uint32_t dst[],const SkPM4f[],int count,const SkAlpha aa[])92 static void clear_linear(const SkXfermode*, uint32_t dst[], const SkPM4f[],
93 int count, const SkAlpha aa[]) {
94 if (aa) {
95 for (int i = 0; i < count; ++i) {
96 unsigned a = aa[i];
97 if (a) {
98 SkPMColor dstC = dst[i];
99 SkPMColor C = 0;
100 if (0xFF != a) {
101 C = SkFourByteInterp(C, dstC, a);
102 }
103 dst[i] = C;
104 }
105 }
106 } else {
107 sk_memset32(dst, 0, count);
108 }
109 }
110
clear_srgb(const SkXfermode *,uint32_t dst[],const SkPM4f[],int count,const SkAlpha aa[])111 static void clear_srgb(const SkXfermode*, uint32_t dst[], const SkPM4f[],
112 int count, const SkAlpha aa[]) {
113 if (aa) {
114 for (int i = 0; i < count; ++i) {
115 if (aa[i]) {
116 Sk4f d = Sk4f_fromS32(dst[i]) * Sk4f((255 - aa[i]) * (1/255.0f));
117 dst[i] = Sk4f_toS32(d);
118 }
119 }
120 } else {
121 sk_memset32(dst, 0, count);
122 }
123 }
124
125 const SkXfermode::D32Proc gProcs_Clear[] = {
126 clear_linear, clear_linear,
127 clear_linear, clear_linear,
128 clear_srgb, clear_srgb,
129 clear_srgb, clear_srgb,
130 };
131
132 ///////////////////////////////////////////////////////////////////////////////////////////////////
133
src_n(const SkXfermode *,uint32_t dst[],const SkPM4f src[],int count,const SkAlpha aa[])134 template <DstType D> void src_n(const SkXfermode*, uint32_t dst[],
135 const SkPM4f src[], int count, const SkAlpha aa[]) {
136 for (int i = 0; i < count; ++i) {
137 unsigned a = 0xFF;
138 if (aa) {
139 a = aa[i];
140 if (0 == a) {
141 continue;
142 }
143 }
144 Sk4f r4 = Sk4f::Load(src[i].fVec); // src always overrides dst
145 if (a != 0xFF) {
146 Sk4f d4 = load_dst<D>(dst[i]);
147 r4 = lerp(r4, d4, a);
148 }
149 dst[i] = store_dst<D>(r4);
150 }
151 }
152
lerp(const Sk4f & src,const Sk4f & dst,const Sk4f & src_scale)153 static Sk4f lerp(const Sk4f& src, const Sk4f& dst, const Sk4f& src_scale) {
154 return dst + (src - dst) * src_scale;
155 }
156
src_1(const SkXfermode *,uint32_t dst[],const SkPM4f * src,int count,const SkAlpha aa[])157 template <DstType D> void src_1(const SkXfermode*, uint32_t dst[],
158 const SkPM4f* src, int count, const SkAlpha aa[]) {
159 const Sk4f s4 = Sk4f::Load(src->fVec);
160
161 if (aa) {
162 if (D == kLinear_Dst) {
163 // operate in bias-255 space for src and dst
164 const Sk4f& s4_255 = s4 * Sk4f(255);
165 while (count >= 4) {
166 Sk4f aa4 = SkNx_cast<float>(Sk4b::Load(aa)) * Sk4f(1/255.f);
167 Sk4f r0 = lerp(s4_255, to_4f(dst[0]), Sk4f(aa4[0])) + Sk4f(0.5f);
168 Sk4f r1 = lerp(s4_255, to_4f(dst[1]), Sk4f(aa4[1])) + Sk4f(0.5f);
169 Sk4f r2 = lerp(s4_255, to_4f(dst[2]), Sk4f(aa4[2])) + Sk4f(0.5f);
170 Sk4f r3 = lerp(s4_255, to_4f(dst[3]), Sk4f(aa4[3])) + Sk4f(0.5f);
171 Sk4f_ToBytes((uint8_t*)dst, r0, r1, r2, r3);
172
173 dst += 4;
174 aa += 4;
175 count -= 4;
176 }
177 } else { // kSRGB
178 while (count >= 4) {
179 Sk4f aa4 = SkNx_cast<float>(Sk4b::Load(aa)) * Sk4f(1/255.0f);
180
181 /* If we ever natively support convert 255_linear -> 255_srgb, then perhaps
182 * it would be faster (and possibly allow more code sharing with kLinear) to
183 * stay in that space.
184 */
185 Sk4f r0 = lerp(s4, load_dst<D>(dst[0]), Sk4f(aa4[0]));
186 Sk4f r1 = lerp(s4, load_dst<D>(dst[1]), Sk4f(aa4[1]));
187 Sk4f r2 = lerp(s4, load_dst<D>(dst[2]), Sk4f(aa4[2]));
188 Sk4f r3 = lerp(s4, load_dst<D>(dst[3]), Sk4f(aa4[3]));
189 Sk4f_ToBytes((uint8_t*)dst,
190 linear_unit_to_srgb_255f(r0),
191 linear_unit_to_srgb_255f(r1),
192 linear_unit_to_srgb_255f(r2),
193 linear_unit_to_srgb_255f(r3));
194
195 dst += 4;
196 aa += 4;
197 count -= 4;
198 }
199 }
200 for (int i = 0; i < count; ++i) {
201 unsigned a = aa[i];
202 Sk4f d4 = load_dst<D>(dst[i]);
203 dst[i] = store_dst<D>(lerp(s4, d4, a));
204 }
205 } else {
206 sk_memset32(dst, store_dst<D>(s4), count);
207 }
208 }
209
210 const SkXfermode::D32Proc gProcs_Src[] = {
211 src_n<kLinear_Dst>, src_n<kLinear_Dst>,
212 src_1<kLinear_Dst>, src_1<kLinear_Dst>,
213 src_n<kSRGB_Dst>, src_n<kSRGB_Dst>,
214 src_1<kSRGB_Dst>, src_1<kSRGB_Dst>,
215 };
216
217 ///////////////////////////////////////////////////////////////////////////////////////////////////
218
dst(const SkXfermode *,uint32_t dst[],const SkPM4f[],int count,const SkAlpha aa[])219 static void dst(const SkXfermode*, uint32_t dst[], const SkPM4f[], int count, const SkAlpha aa[]) {}
220
221 const SkXfermode::D32Proc gProcs_Dst[] = {
222 dst, dst, dst, dst, dst, dst, dst, dst,
223 };
224
225 ///////////////////////////////////////////////////////////////////////////////////////////////////
226
srcover_n(const SkXfermode *,uint32_t dst[],const SkPM4f src[],int count,const SkAlpha aa[])227 template <DstType D> void srcover_n(const SkXfermode*, uint32_t dst[],
228 const SkPM4f src[], int count, const SkAlpha aa[]) {
229 if (aa) {
230 for (int i = 0; i < count; ++i) {
231 unsigned a = aa[i];
232 if (0 == a) {
233 continue;
234 }
235 Sk4f s4 = Sk4f::Load(src[i].fVec);
236 Sk4f d4 = load_dst<D>(dst[i]);
237 if (a != 0xFF) {
238 s4 = scale_by_coverage(s4, a);
239 }
240 Sk4f r4 = s4 + d4 * Sk4f(1 - get_alpha(s4));
241 dst[i] = store_dst<D>(r4);
242 }
243 } else {
244 for (int i = 0; i < count; ++i) {
245 Sk4f s4 = Sk4f::Load(src[i].fVec);
246 Sk4f d4 = load_dst<D>(dst[i]);
247 Sk4f r4 = s4 + d4 * Sk4f(1 - get_alpha(s4));
248 dst[i] = store_dst<D>(r4);
249 }
250 }
251 }
252
srcover_linear_dst_1(const SkXfermode *,uint32_t dst[],const SkPM4f * src,int count,const SkAlpha aa[])253 static void srcover_linear_dst_1(const SkXfermode*, uint32_t dst[],
254 const SkPM4f* src, int count, const SkAlpha aa[]) {
255 const Sk4f s4 = Sk4f::Load(src->fVec);
256 const Sk4f dst_scale = Sk4f(1 - get_alpha(s4));
257
258 if (aa) {
259 for (int i = 0; i < count; ++i) {
260 unsigned a = aa[i];
261 if (0 == a) {
262 continue;
263 }
264 Sk4f d4 = Sk4f_fromL32(dst[i]);
265 Sk4f r4;
266 if (a != 0xFF) {
267 Sk4f s4_aa = scale_by_coverage(s4, a);
268 r4 = s4_aa + d4 * Sk4f(1 - get_alpha(s4_aa));
269 } else {
270 r4 = s4 + d4 * dst_scale;
271 }
272 dst[i] = Sk4f_toL32(r4);
273 }
274 } else {
275 const Sk4f s4_255 = s4 * Sk4f(255) + Sk4f(0.5f); // +0.5 to pre-bias for rounding
276 while (count >= 4) {
277 Sk4f d0 = to_4f(dst[0]);
278 Sk4f d1 = to_4f(dst[1]);
279 Sk4f d2 = to_4f(dst[2]);
280 Sk4f d3 = to_4f(dst[3]);
281 Sk4f_ToBytes((uint8_t*)dst,
282 s4_255 + d0 * dst_scale,
283 s4_255 + d1 * dst_scale,
284 s4_255 + d2 * dst_scale,
285 s4_255 + d3 * dst_scale);
286 dst += 4;
287 count -= 4;
288 }
289 for (int i = 0; i < count; ++i) {
290 Sk4f d4 = to_4f(dst[i]);
291 dst[i] = to_4b(s4_255 + d4 * dst_scale);
292 }
293 }
294 }
295
srcover_srgb_dst_1(const SkXfermode *,uint32_t dst[],const SkPM4f * src,int count,const SkAlpha aa[])296 static void srcover_srgb_dst_1(const SkXfermode*, uint32_t dst[],
297 const SkPM4f* src, int count, const SkAlpha aa[]) {
298 Sk4f s4 = Sk4f::Load(src->fVec);
299 Sk4f dst_scale = Sk4f(1 - get_alpha(s4));
300
301 if (aa) {
302 for (int i = 0; i < count; ++i) {
303 unsigned a = aa[i];
304 if (0 == a) {
305 continue;
306 }
307 Sk4f d4 = srgb_4b_to_linear_unit(dst[i]);
308 Sk4f r4;
309 if (a != 0xFF) {
310 const Sk4f s4_aa = scale_by_coverage(s4, a);
311 r4 = s4_aa + d4 * Sk4f(1 - get_alpha(s4_aa));
312 } else {
313 r4 = s4 + d4 * dst_scale;
314 }
315 dst[i] = to_4b(linear_unit_to_srgb_255f(r4));
316 }
317 } else {
318 while (count >= 4) {
319 Sk4f d0 = srgb_4b_to_linear_unit(dst[0]);
320 Sk4f d1 = srgb_4b_to_linear_unit(dst[1]);
321 Sk4f d2 = srgb_4b_to_linear_unit(dst[2]);
322 Sk4f d3 = srgb_4b_to_linear_unit(dst[3]);
323 Sk4f_ToBytes((uint8_t*)dst,
324 linear_unit_to_srgb_255f(s4 + d0 * dst_scale),
325 linear_unit_to_srgb_255f(s4 + d1 * dst_scale),
326 linear_unit_to_srgb_255f(s4 + d2 * dst_scale),
327 linear_unit_to_srgb_255f(s4 + d3 * dst_scale));
328 dst += 4;
329 count -= 4;
330 }
331 for (int i = 0; i < count; ++i) {
332 Sk4f d4 = srgb_4b_to_linear_unit(dst[i]);
333 dst[i] = to_4b(linear_unit_to_srgb_255f(s4 + d4 * dst_scale));
334 }
335 }
336 }
337
338 const SkXfermode::D32Proc gProcs_SrcOver[] = {
339 srcover_n<kLinear_Dst>, src_n<kLinear_Dst>,
340 srcover_linear_dst_1, src_1<kLinear_Dst>,
341
342 srcover_n<kSRGB_Dst>, src_n<kSRGB_Dst>,
343 srcover_srgb_dst_1, src_1<kSRGB_Dst>,
344 };
345
346 ///////////////////////////////////////////////////////////////////////////////////////////////////
347
find_proc(SkXfermode::Mode mode,uint32_t flags)348 static SkXfermode::D32Proc find_proc(SkXfermode::Mode mode, uint32_t flags) {
349 SkASSERT(0 == (flags & ~7));
350 flags &= 7;
351
352 switch (mode) {
353 case SkXfermode::kClear_Mode: return gProcs_Clear[flags];
354 case SkXfermode::kSrc_Mode: return gProcs_Src[flags];
355 case SkXfermode::kDst_Mode: return gProcs_Dst[flags];
356 case SkXfermode::kSrcOver_Mode: return gProcs_SrcOver[flags];
357 default:
358 break;
359 }
360 return gProcs_General[flags];
361 }
362
onGetD32Proc(uint32_t flags) const363 SkXfermode::D32Proc SkXfermode::onGetD32Proc(uint32_t flags) const {
364 SkASSERT(0 == (flags & ~7));
365 flags &= 7;
366
367 Mode mode;
368 return this->asMode(&mode) ? find_proc(mode, flags) : gProcs_General[flags];
369 }
370
GetD32Proc(SkXfermode * xfer,uint32_t flags)371 SkXfermode::D32Proc SkXfermode::GetD32Proc(SkXfermode* xfer, uint32_t flags) {
372 return xfer ? xfer->onGetD32Proc(flags) : find_proc(SkXfermode::kSrcOver_Mode, flags);
373 }
374
375 ///////////////////////////////////////////////////////////////////////////////////////////////////
376 #include "SkColorPriv.h"
377
lcd16_to_unit_4f(uint16_t rgb)378 static Sk4f lcd16_to_unit_4f(uint16_t rgb) {
379 #ifdef SK_PMCOLOR_IS_RGBA
380 Sk4i rgbi = Sk4i(SkGetPackedR16(rgb), SkGetPackedG16(rgb), SkGetPackedB16(rgb), 0);
381 #else
382 Sk4i rgbi = Sk4i(SkGetPackedB16(rgb), SkGetPackedG16(rgb), SkGetPackedR16(rgb), 0);
383 #endif
384 return SkNx_cast<float>(rgbi) * Sk4f(1.0f/31, 1.0f/63, 1.0f/31, 0);
385 }
386
387 template <DstType D>
src_1_lcd(uint32_t dst[],const SkPM4f * src,int count,const uint16_t lcd[])388 void src_1_lcd(uint32_t dst[], const SkPM4f* src, int count, const uint16_t lcd[]) {
389 const Sk4f s4 = Sk4f::Load(src->fVec);
390
391 if (D == kLinear_Dst) {
392 // operate in bias-255 space for src and dst
393 const Sk4f s4bias = s4 * Sk4f(255);
394 for (int i = 0; i < count; ++i) {
395 uint16_t rgb = lcd[i];
396 if (0 == rgb) {
397 continue;
398 }
399 Sk4f d4bias = to_4f(dst[i]);
400 dst[i] = to_4b(lerp(s4bias, d4bias, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT);
401 }
402 } else { // kSRGB
403 for (int i = 0; i < count; ++i) {
404 uint16_t rgb = lcd[i];
405 if (0 == rgb) {
406 continue;
407 }
408 Sk4f d4 = load_dst<D>(dst[i]);
409 dst[i] = store_dst<D>(lerp(s4, d4, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT);
410 }
411 }
412 }
413
414 template <DstType D>
src_n_lcd(uint32_t dst[],const SkPM4f src[],int count,const uint16_t lcd[])415 void src_n_lcd(uint32_t dst[], const SkPM4f src[], int count, const uint16_t lcd[]) {
416 for (int i = 0; i < count; ++i) {
417 uint16_t rgb = lcd[i];
418 if (0 == rgb) {
419 continue;
420 }
421 Sk4f s4 = Sk4f::Load(src[i].fVec);
422 Sk4f d4 = load_dst<D>(dst[i]);
423 dst[i] = store_dst<D>(lerp(s4, d4, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT);
424 }
425 }
426
427 template <DstType D>
srcover_1_lcd(uint32_t dst[],const SkPM4f * src,int count,const uint16_t lcd[])428 void srcover_1_lcd(uint32_t dst[], const SkPM4f* src, int count, const uint16_t lcd[]) {
429 const Sk4f s4 = Sk4f::Load(src->fVec);
430 Sk4f dst_scale = Sk4f(1 - get_alpha(s4));
431
432 for (int i = 0; i < count; ++i) {
433 uint16_t rgb = lcd[i];
434 if (0 == rgb) {
435 continue;
436 }
437 Sk4f d4 = load_dst<D>(dst[i]);
438 Sk4f r4 = s4 + d4 * dst_scale;
439 r4 = lerp(r4, d4, lcd16_to_unit_4f(rgb));
440 dst[i] = store_dst<D>(r4) | (SK_A32_MASK << SK_A32_SHIFT);
441 }
442 }
443
444 template <DstType D>
srcover_n_lcd(uint32_t dst[],const SkPM4f src[],int count,const uint16_t lcd[])445 void srcover_n_lcd(uint32_t dst[], const SkPM4f src[], int count, const uint16_t lcd[]) {
446 for (int i = 0; i < count; ++i) {
447 uint16_t rgb = lcd[i];
448 if (0 == rgb) {
449 continue;
450 }
451 Sk4f s4 = Sk4f::Load(src[i].fVec);
452 Sk4f dst_scale = Sk4f(1 - get_alpha(s4));
453 Sk4f d4 = load_dst<D>(dst[i]);
454 Sk4f r4 = s4 + d4 * dst_scale;
455 r4 = lerp(r4, d4, lcd16_to_unit_4f(rgb));
456 dst[i] = store_dst<D>(r4) | (SK_A32_MASK << SK_A32_SHIFT);
457 }
458 }
459
GetLCD32Proc(uint32_t flags)460 SkXfermode::LCD32Proc SkXfermode::GetLCD32Proc(uint32_t flags) {
461 SkASSERT((flags & ~7) == 0);
462 flags &= 7;
463
464 const LCD32Proc procs[] = {
465 srcover_n_lcd<kSRGB_Dst>, src_n_lcd<kSRGB_Dst>,
466 srcover_1_lcd<kSRGB_Dst>, src_1_lcd<kSRGB_Dst>,
467
468 srcover_n_lcd<kLinear_Dst>, src_n_lcd<kLinear_Dst>,
469 srcover_1_lcd<kLinear_Dst>, src_1_lcd<kLinear_Dst>,
470 };
471 return procs[flags];
472 }
473