• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*
3  * Copyright 2006 The Android Open Source Project
4  *
5  * Use of this source code is governed by a BSD-style license that can be
6  * found in the LICENSE file.
7  */
8 
9 
10 #ifndef SkColorPriv_DEFINED
11 #define SkColorPriv_DEFINED
12 
13 // turn this own for extra debug checking when blending onto 565
14 #ifdef SK_DEBUG
15     #define CHECK_FOR_565_OVERFLOW
16 #endif
17 
18 #include "SkColor.h"
19 #include "SkMath.h"
20 
21 /** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a
22     byte into a scale value, so that we can say scale * value >> 8 instead of
23     alpha * value / 255.
24 
25     In debugging, asserts that alpha is 0..255
26 */
SkAlpha255To256(U8CPU alpha)27 static inline unsigned SkAlpha255To256(U8CPU alpha) {
28     SkASSERT(SkToU8(alpha) == alpha);
29     // this one assues that blending on top of an opaque dst keeps it that way
30     // even though it is less accurate than a+(a>>7) for non-opaque dsts
31     return alpha + 1;
32 }
33 
34 /** Multiplify value by 0..256, and shift the result down 8
35     (i.e. return (value * alpha256) >> 8)
36  */
37 #define SkAlphaMul(value, alpha256)     (SkMulS16(value, alpha256) >> 8)
38 
39 //  The caller may want negative values, so keep all params signed (int)
40 //  so we don't accidentally slip into unsigned math and lose the sign
41 //  extension when we shift (in SkAlphaMul)
SkAlphaBlend(int src,int dst,int scale256)42 static inline int SkAlphaBlend(int src, int dst, int scale256) {
43     SkASSERT((unsigned)scale256 <= 256);
44     return dst + SkAlphaMul(src - dst, scale256);
45 }
46 
47 /**
48  *  Returns (src * alpha + dst * (255 - alpha)) / 255
49  *
50  *  This is more accurate than SkAlphaBlend, but slightly slower
51  */
SkAlphaBlend255(S16CPU src,S16CPU dst,U8CPU alpha)52 static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) {
53     SkASSERT((int16_t)src == src);
54     SkASSERT((int16_t)dst == dst);
55     SkASSERT((uint8_t)alpha == alpha);
56 
57     int prod = SkMulS16(src - dst, alpha) + 128;
58     prod = (prod + (prod >> 8)) >> 8;
59     return dst + prod;
60 }
61 
62 #define SK_R16_BITS     5
63 #define SK_G16_BITS     6
64 #define SK_B16_BITS     5
65 
66 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
67 #define SK_G16_SHIFT    (SK_B16_BITS)
68 #define SK_B16_SHIFT    0
69 
70 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
71 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
72 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
73 
74 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
75 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
76 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
77 
78 #define SkR16Assert(r)  SkASSERT((unsigned)(r) <= SK_R16_MASK)
79 #define SkG16Assert(g)  SkASSERT((unsigned)(g) <= SK_G16_MASK)
80 #define SkB16Assert(b)  SkASSERT((unsigned)(b) <= SK_B16_MASK)
81 
SkPackRGB16(unsigned r,unsigned g,unsigned b)82 static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
83     SkASSERT(r <= SK_R16_MASK);
84     SkASSERT(g <= SK_G16_MASK);
85     SkASSERT(b <= SK_B16_MASK);
86 
87     return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
88 }
89 
90 #define SK_R16_MASK_IN_PLACE        (SK_R16_MASK << SK_R16_SHIFT)
91 #define SK_G16_MASK_IN_PLACE        (SK_G16_MASK << SK_G16_SHIFT)
92 #define SK_B16_MASK_IN_PLACE        (SK_B16_MASK << SK_B16_SHIFT)
93 
94 /** Expand the 16bit color into a 32bit value that can be scaled all at once
95     by a value up to 32. Used in conjunction with SkCompact_rgb_16.
96 */
SkExpand_rgb_16(U16CPU c)97 static inline uint32_t SkExpand_rgb_16(U16CPU c) {
98     SkASSERT(c == (uint16_t)c);
99 
100     return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
101 }
102 
103 /** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
104     color value. The computation yields only 16bits of valid data, but we claim
105     to return 32bits, so that the compiler won't generate extra instructions to
106     "clean" the top 16bits. However, the top 16 can contain garbage, so it is
107     up to the caller to safely ignore them.
108 */
SkCompact_rgb_16(uint32_t c)109 static inline U16CPU SkCompact_rgb_16(uint32_t c) {
110     return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
111 }
112 
113 /** Scale the 16bit color value by the 0..256 scale parameter.
114     The computation yields only 16bits of valid data, but we claim
115     to return 32bits, so that the compiler won't generate extra instructions to
116     "clean" the top 16bits.
117 */
SkAlphaMulRGB16(U16CPU c,unsigned scale)118 static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
119     return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
120 }
121 
122 // this helper explicitly returns a clean 16bit value (but slower)
123 #define SkAlphaMulRGB16_ToU16(c, s)  (uint16_t)SkAlphaMulRGB16(c, s)
124 
125 /** Blend src and dst 16bit colors by the 0..256 scale parameter.
126     The computation yields only 16bits of valid data, but we claim
127     to return 32bits, so that the compiler won't generate extra instructions to
128     "clean" the top 16bits.
129 */
SkBlendRGB16(U16CPU src,U16CPU dst,int srcScale)130 static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
131     SkASSERT((unsigned)srcScale <= 256);
132 
133     srcScale >>= 3;
134 
135     uint32_t src32 = SkExpand_rgb_16(src);
136     uint32_t dst32 = SkExpand_rgb_16(dst);
137     return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
138 }
139 
SkBlendRGB16(const uint16_t src[],uint16_t dst[],int srcScale,int count)140 static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
141                                 int srcScale, int count) {
142     SkASSERT(count > 0);
143     SkASSERT((unsigned)srcScale <= 256);
144 
145     srcScale >>= 3;
146 
147     do {
148         uint32_t src32 = SkExpand_rgb_16(*src++);
149         uint32_t dst32 = SkExpand_rgb_16(*dst);
150         *dst++ = SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
151     } while (--count > 0);
152 }
153 
154 #ifdef SK_DEBUG
SkRGB16Add(U16CPU a,U16CPU b)155     static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
156         SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
157         SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
158         SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
159 
160         return a + b;
161     }
162 #else
163     #define SkRGB16Add(a, b)  ((a) + (b))
164 #endif
165 
166 ///////////////////////////////////////////////////////////////////////////////
167 
168 #define SK_A32_BITS     8
169 #define SK_R32_BITS     8
170 #define SK_G32_BITS     8
171 #define SK_B32_BITS     8
172 
173 #define SK_A32_MASK     ((1 << SK_A32_BITS) - 1)
174 #define SK_R32_MASK     ((1 << SK_R32_BITS) - 1)
175 #define SK_G32_MASK     ((1 << SK_G32_BITS) - 1)
176 #define SK_B32_MASK     ((1 << SK_B32_BITS) - 1)
177 
178 #define SkGetPackedA32(packed)      ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24)
179 #define SkGetPackedR32(packed)      ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24)
180 #define SkGetPackedG32(packed)      ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24)
181 #define SkGetPackedB32(packed)      ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24)
182 
183 #define SkA32Assert(a)  SkASSERT((unsigned)(a) <= SK_A32_MASK)
184 #define SkR32Assert(r)  SkASSERT((unsigned)(r) <= SK_R32_MASK)
185 #define SkG32Assert(g)  SkASSERT((unsigned)(g) <= SK_G32_MASK)
186 #define SkB32Assert(b)  SkASSERT((unsigned)(b) <= SK_B32_MASK)
187 
188 #ifdef SK_DEBUG
SkPMColorAssert(SkPMColor c)189     static inline void SkPMColorAssert(SkPMColor c) {
190         unsigned a = SkGetPackedA32(c);
191         unsigned r = SkGetPackedR32(c);
192         unsigned g = SkGetPackedG32(c);
193         unsigned b = SkGetPackedB32(c);
194 
195         SkA32Assert(a);
196         SkASSERT(r <= a);
197         SkASSERT(g <= a);
198         SkASSERT(b <= a);
199     }
200 #else
201     #define SkPMColorAssert(c)
202 #endif
203 
204 /**
205  *  Pack the components into a SkPMColor, checking (in the debug version) that
206  *  the components are 0..255, and are already premultiplied (i.e. alpha >= color)
207  */
SkPackARGB32(U8CPU a,U8CPU r,U8CPU g,U8CPU b)208 static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
209     SkA32Assert(a);
210     SkASSERT(r <= a);
211     SkASSERT(g <= a);
212     SkASSERT(b <= a);
213 
214     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
215            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
216 }
217 
218 /**
219  * Abstract 4-byte interpolation, implemented on top of SkPMColor
220  * utility functions. Third parameter controls blending of the first two:
221  *   (src, dst, 0) returns dst
222  *   (src, dst, 0xFF) returns src
223  */
SkFourByteInterp(SkPMColor src,SkPMColor dst,U8CPU srcWeight)224 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,
225                                          U8CPU srcWeight) {
226     unsigned scale = SkAlpha255To256(srcWeight);
227 
228     unsigned a = SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale);
229     unsigned r = SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale);
230     unsigned g = SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale);
231     unsigned b = SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale);
232 
233     return SkPackARGB32(a, r, g, b);
234 }
235 
236 /**
237  * 32b optimized version; currently appears to be 10% faster even on 64b
238  * architectures than an equivalent 64b version and 30% faster than
239  * SkFourByteInterp(). Third parameter controls blending of the first two:
240  *   (src, dst, 0) returns dst
241  *   (src, dst, 0xFF) returns src
242  * ** Does not match the results of SkFourByteInterp() because we use
243  * a more accurate scale computation!
244  * TODO: migrate Skia function to using an accurate 255->266 alpha
245  * conversion.
246  */
SkFastFourByteInterp(SkPMColor src,SkPMColor dst,U8CPU srcWeight)247 static inline SkPMColor SkFastFourByteInterp(SkPMColor src,
248                                              SkPMColor dst,
249                                              U8CPU srcWeight) {
250     SkASSERT(srcWeight < 256);
251 
252     // Reorders ARGB to AG-RB in order to reduce the number of operations.
253     const uint32_t mask = 0xFF00FF;
254     uint32_t src_rb = src & mask;
255     uint32_t src_ag = (src >> 8) & mask;
256     uint32_t dst_rb = dst & mask;
257     uint32_t dst_ag = (dst >> 8) & mask;
258 
259     // scale = srcWeight + (srcWeight >> 7) is more accurate than
260     // scale = srcWeight + 1, but 7% slower
261     int scale = srcWeight + (srcWeight >> 7);
262 
263     uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
264     uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
265 
266     return (ret_ag & ~mask) | ((ret_rb & ~mask) >> 8);
267 }
268 
269 /**
270  *  Same as SkPackARGB32, but this version guarantees to not check that the
271  *  values are premultiplied in the debug version.
272  */
SkPackARGB32NoCheck(U8CPU a,U8CPU r,U8CPU g,U8CPU b)273 static inline SkPMColor SkPackARGB32NoCheck(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
274     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
275            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
276 }
277 
278 static inline
SkPremultiplyARGBInline(U8CPU a,U8CPU r,U8CPU g,U8CPU b)279 SkPMColor SkPremultiplyARGBInline(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
280     SkA32Assert(a);
281     SkA32Assert(r);
282     SkA32Assert(g);
283     SkA32Assert(b);
284 
285     if (a != 255) {
286         r = SkMulDiv255Round(r, a);
287         g = SkMulDiv255Round(g, a);
288         b = SkMulDiv255Round(b, a);
289     }
290     return SkPackARGB32(a, r, g, b);
291 }
292 
293 SK_API extern const uint32_t gMask_00FF00FF;
294 
SkAlphaMulQ(uint32_t c,unsigned scale)295 static inline uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
296     uint32_t mask = gMask_00FF00FF;
297 //    uint32_t mask = 0xFF00FF;
298 
299     uint32_t rb = ((c & mask) * scale) >> 8;
300     uint32_t ag = ((c >> 8) & mask) * scale;
301     return (rb & mask) | (ag & ~mask);
302 }
303 
SkPMSrcOver(SkPMColor src,SkPMColor dst)304 static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
305     return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
306 }
307 
SkBlendARGB32(SkPMColor src,SkPMColor dst,U8CPU aa)308 static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
309     SkASSERT((unsigned)aa <= 255);
310 
311     unsigned src_scale = SkAlpha255To256(aa);
312     unsigned dst_scale = SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale));
313 
314     return SkAlphaMulQ(src, src_scale) + SkAlphaMulQ(dst, dst_scale);
315 }
316 
317 ////////////////////////////////////////////////////////////////////////////////////////////
318 // Convert a 32bit pixel to a 16bit pixel (no dither)
319 
320 #define SkR32ToR16_MACRO(r)   ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
321 #define SkG32ToG16_MACRO(g)   ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
322 #define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
323 
324 #ifdef SK_DEBUG
SkR32ToR16(unsigned r)325     static inline unsigned SkR32ToR16(unsigned r) {
326         SkR32Assert(r);
327         return SkR32ToR16_MACRO(r);
328     }
SkG32ToG16(unsigned g)329     static inline unsigned SkG32ToG16(unsigned g) {
330         SkG32Assert(g);
331         return SkG32ToG16_MACRO(g);
332     }
SkB32ToB16(unsigned b)333     static inline unsigned SkB32ToB16(unsigned b) {
334         SkB32Assert(b);
335         return SkB32ToB16_MACRO(b);
336     }
337 #else
338     #define SkR32ToR16(r)   SkR32ToR16_MACRO(r)
339     #define SkG32ToG16(g)   SkG32ToG16_MACRO(g)
340     #define SkB32ToB16(b)   SkB32ToB16_MACRO(b)
341 #endif
342 
343 #define SkPacked32ToR16(c)  (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
344 #define SkPacked32ToG16(c)  (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
345 #define SkPacked32ToB16(c)  (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
346 
SkPixel32ToPixel16(SkPMColor c)347 static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
348     unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
349     unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
350     unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
351     return r | g | b;
352 }
353 
SkPack888ToRGB16(U8CPU r,U8CPU g,U8CPU b)354 static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
355     return  (SkR32ToR16(r) << SK_R16_SHIFT) |
356             (SkG32ToG16(g) << SK_G16_SHIFT) |
357             (SkB32ToB16(b) << SK_B16_SHIFT);
358 }
359 
360 #define SkPixel32ToPixel16_ToU16(src)   SkToU16(SkPixel32ToPixel16(src))
361 
362 /////////////////////////////////////////////////////////////////////////////////////////
363 // Fast dither from 32->16
364 
365 #define SkShouldDitherXY(x, y)  (((x) ^ (y)) & 1)
366 
SkDitherPack888ToRGB16(U8CPU r,U8CPU g,U8CPU b)367 static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
368     r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
369     g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
370     b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
371 
372     return SkPackRGB16(r, g, b);
373 }
374 
SkDitherPixel32ToPixel16(SkPMColor c)375 static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) {
376     return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
377 }
378 
379 /*  Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
380     It is now suitable for combining with a scaled expanded_rgb_16 color
381     as in SkSrcOver32To16().
382     We must do this 565 high-bit replication, in order for the subsequent add
383     to saturate properly (and not overflow). If we take the 8 bits as is, it is
384     possible to overflow.
385 */
SkPMColorToExpanded16x5(SkPMColor c)386 static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) {
387     unsigned sr = SkPacked32ToR16(c);
388     unsigned sg = SkPacked32ToG16(c);
389     unsigned sb = SkPacked32ToB16(c);
390 
391     sr = (sr << 5) | sr;
392     sg = (sg << 5) | (sg >> 1);
393     sb = (sb << 5) | sb;
394     return (sr << 11) | (sg << 21) | (sb << 0);
395 }
396 
397 /*  SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
398     (with dirt in the high 16bits, so caller beware).
399 */
SkSrcOver32To16(SkPMColor src,uint16_t dst)400 static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
401     unsigned sr = SkGetPackedR32(src);
402     unsigned sg = SkGetPackedG32(src);
403     unsigned sb = SkGetPackedB32(src);
404 
405     unsigned dr = SkGetPackedR16(dst);
406     unsigned dg = SkGetPackedG16(dst);
407     unsigned db = SkGetPackedB16(dst);
408 
409     unsigned isa = 255 - SkGetPackedA32(src);
410 
411     dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
412     dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
413     db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
414 
415     return SkPackRGB16(dr, dg, db);
416 }
417 
418 ////////////////////////////////////////////////////////////////////////////////////////////
419 // Convert a 16bit pixel to a 32bit pixel
420 
SkR16ToR32(unsigned r)421 static inline unsigned SkR16ToR32(unsigned r) {
422     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
423 }
424 
SkG16ToG32(unsigned g)425 static inline unsigned SkG16ToG32(unsigned g) {
426     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
427 }
428 
SkB16ToB32(unsigned b)429 static inline unsigned SkB16ToB32(unsigned b) {
430     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
431 }
432 
433 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
434 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
435 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
436 
SkPixel16ToPixel32(U16CPU src)437 static inline SkPMColor SkPixel16ToPixel32(U16CPU src) {
438     SkASSERT(src == SkToU16(src));
439 
440     unsigned    r = SkPacked16ToR32(src);
441     unsigned    g = SkPacked16ToG32(src);
442     unsigned    b = SkPacked16ToB32(src);
443 
444     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
445     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
446     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
447 
448     return SkPackARGB32(0xFF, r, g, b);
449 }
450 
451 // similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor
SkPixel16ToColor(U16CPU src)452 static inline SkColor SkPixel16ToColor(U16CPU src) {
453     SkASSERT(src == SkToU16(src));
454 
455     unsigned    r = SkPacked16ToR32(src);
456     unsigned    g = SkPacked16ToG32(src);
457     unsigned    b = SkPacked16ToB32(src);
458 
459     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
460     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
461     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
462 
463     return SkColorSetRGB(r, g, b);
464 }
465 
466 ///////////////////////////////////////////////////////////////////////////////
467 
468 typedef uint16_t SkPMColor16;
469 
470 // Put in OpenGL order (r g b a)
471 #define SK_A4444_SHIFT    0
472 #define SK_R4444_SHIFT    12
473 #define SK_G4444_SHIFT    8
474 #define SK_B4444_SHIFT    4
475 
476 #define SkA32To4444(a)  ((unsigned)(a) >> 4)
477 #define SkR32To4444(r)  ((unsigned)(r) >> 4)
478 #define SkG32To4444(g)  ((unsigned)(g) >> 4)
479 #define SkB32To4444(b)  ((unsigned)(b) >> 4)
480 
SkReplicateNibble(unsigned nib)481 static inline U8CPU SkReplicateNibble(unsigned nib) {
482     SkASSERT(nib <= 0xF);
483     return (nib << 4) | nib;
484 }
485 
486 #define SkA4444ToA32(a)     SkReplicateNibble(a)
487 #define SkR4444ToR32(r)     SkReplicateNibble(r)
488 #define SkG4444ToG32(g)     SkReplicateNibble(g)
489 #define SkB4444ToB32(b)     SkReplicateNibble(b)
490 
491 #define SkGetPackedA4444(c)     (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
492 #define SkGetPackedR4444(c)     (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
493 #define SkGetPackedG4444(c)     (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
494 #define SkGetPackedB4444(c)     (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
495 
496 #define SkPacked4444ToA32(c)    SkReplicateNibble(SkGetPackedA4444(c))
497 #define SkPacked4444ToR32(c)    SkReplicateNibble(SkGetPackedR4444(c))
498 #define SkPacked4444ToG32(c)    SkReplicateNibble(SkGetPackedG4444(c))
499 #define SkPacked4444ToB32(c)    SkReplicateNibble(SkGetPackedB4444(c))
500 
501 #ifdef SK_DEBUG
SkPMColor16Assert(U16CPU c)502 static inline void SkPMColor16Assert(U16CPU c) {
503     unsigned a = SkGetPackedA4444(c);
504     unsigned r = SkGetPackedR4444(c);
505     unsigned g = SkGetPackedG4444(c);
506     unsigned b = SkGetPackedB4444(c);
507 
508     SkASSERT(a <= 0xF);
509     SkASSERT(r <= a);
510     SkASSERT(g <= a);
511     SkASSERT(b <= a);
512 }
513 #else
514 #define SkPMColor16Assert(c)
515 #endif
516 
SkAlpha15To16(unsigned a)517 static inline unsigned SkAlpha15To16(unsigned a) {
518     SkASSERT(a <= 0xF);
519     return a + (a >> 3);
520 }
521 
522 #ifdef SK_DEBUG
SkAlphaMul4(int value,int scale)523     static inline int SkAlphaMul4(int value, int scale) {
524         SkASSERT((unsigned)scale <= 0x10);
525         return value * scale >> 4;
526     }
527 #else
528     #define SkAlphaMul4(value, scale)   ((value) * (scale) >> 4)
529 #endif
530 
SkR4444ToR565(unsigned r)531 static inline unsigned SkR4444ToR565(unsigned r) {
532     SkASSERT(r <= 0xF);
533     return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
534 }
535 
SkG4444ToG565(unsigned g)536 static inline unsigned SkG4444ToG565(unsigned g) {
537     SkASSERT(g <= 0xF);
538     return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
539 }
540 
SkB4444ToB565(unsigned b)541 static inline unsigned SkB4444ToB565(unsigned b) {
542     SkASSERT(b <= 0xF);
543     return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
544 }
545 
SkPackARGB4444(unsigned a,unsigned r,unsigned g,unsigned b)546 static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
547                                          unsigned g, unsigned b) {
548     SkASSERT(a <= 0xF);
549     SkASSERT(r <= a);
550     SkASSERT(g <= a);
551     SkASSERT(b <= a);
552 
553     return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) |
554                          (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT));
555 }
556 
557 extern const uint16_t gMask_0F0F;
558 
SkAlphaMulQ4(U16CPU c,unsigned scale)559 static inline U16CPU SkAlphaMulQ4(U16CPU c, unsigned scale) {
560     SkASSERT(scale <= 16);
561 
562     const unsigned mask = 0xF0F;    //gMask_0F0F;
563 
564 #if 0
565     unsigned rb = ((c & mask) * scale) >> 4;
566     unsigned ag = ((c >> 4) & mask) * scale;
567     return (rb & mask) | (ag & ~mask);
568 #else
569     c = (c & mask) | ((c & (mask << 4)) << 12);
570     c = c * scale >> 4;
571     return (c & mask) | ((c >> 12) & (mask << 4));
572 #endif
573 }
574 
575 /** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
576     once by a value up to 16. Used in conjunction with SkCompact_4444.
577 */
SkExpand_4444(U16CPU c)578 static inline uint32_t SkExpand_4444(U16CPU c) {
579     SkASSERT(c == (uint16_t)c);
580 
581     const unsigned mask = 0xF0F;    //gMask_0F0F;
582     return (c & mask) | ((c & ~mask) << 12);
583 }
584 
585 /** Compress an expanded value (from SkExpand_4444) back down to a SkPMColor16.
586     NOTE: this explicitly does not clean the top 16 bits (which may be garbage).
587     It does this for speed, since if it is being written directly to 16bits of
588     memory, the top 16bits will be ignored. Casting the result to uint16_t here
589     would add 2 more instructions, slow us down. It is up to the caller to
590     perform the cast if needed.
591 */
SkCompact_4444(uint32_t c)592 static inline U16CPU SkCompact_4444(uint32_t c) {
593     const unsigned mask = 0xF0F;    //gMask_0F0F;
594     return (c & mask) | ((c >> 12) & ~mask);
595 }
596 
SkSrcOver4444To16(SkPMColor16 s,uint16_t d)597 static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) {
598     unsigned sa = SkGetPackedA4444(s);
599     unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
600     unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
601     unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
602 
603     // To avoid overflow, we have to clear the low bit of the synthetic sg
604     // if the src alpha is <= 7.
605     // to see why, try blending 0x4444 on top of 565-white and watch green
606     // overflow (sum == 64)
607     sg &= ~(~(sa >> 3) & 1);
608 
609     unsigned scale = SkAlpha15To16(15 - sa);
610     unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
611     unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
612     unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
613 
614 #if 0
615     if (sg + dg > 63) {
616         SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
617     }
618 #endif
619     return SkPackRGB16(sr + dr, sg + dg, sb + db);
620 }
621 
SkBlend4444To16(SkPMColor16 src,uint16_t dst,int scale16)622 static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) {
623     SkASSERT((unsigned)scale16 <= 16);
624 
625     return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
626 }
627 
SkBlend4444(SkPMColor16 src,SkPMColor16 dst,int scale16)628 static inline uint16_t SkBlend4444(SkPMColor16 src, SkPMColor16 dst, int scale16) {
629     SkASSERT((unsigned)scale16 <= 16);
630 
631     uint32_t src32 = SkExpand_4444(src) * scale16;
632     // the scaled srcAlpha is the bottom byte
633 #ifdef SK_DEBUG
634     {
635         unsigned srcA = SkGetPackedA4444(src) * scale16;
636         SkASSERT(srcA == (src32 & 0xFF));
637     }
638 #endif
639     unsigned dstScale = SkAlpha255To256(255 - (src32 & 0xFF)) >> 4;
640     uint32_t dst32 = SkExpand_4444(dst) * dstScale;
641     return SkCompact_4444((src32 + dst32) >> 4);
642 }
643 
SkPixel4444ToPixel32(U16CPU c)644 static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
645     uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
646                  (SkGetPackedR4444(c) << SK_R32_SHIFT) |
647                  (SkGetPackedG4444(c) << SK_G32_SHIFT) |
648                  (SkGetPackedB4444(c) << SK_B32_SHIFT);
649     return d | (d << 4);
650 }
651 
SkPixel32ToPixel4444(SkPMColor c)652 static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) {
653     return  (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
654     (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
655     (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
656     (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
657 }
658 
659 // cheap 2x2 dither
SkDitherARGB32To4444(U8CPU a,U8CPU r,U8CPU g,U8CPU b)660 static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
661                                                U8CPU g, U8CPU b) {
662     // to ensure that we stay a legal premultiplied color, we take the max()
663     // of the truncated and dithered alpha values. If we didn't, cases like
664     // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...)
665     // which is not legal premultiplied, since a < color
666     unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
667     a = SkMax32(a >> 4, dithered_a);
668     // these we just dither in place
669     r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
670     g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
671     b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4;
672 
673     return SkPackARGB4444(a, r, g, b);
674 }
675 
SkDitherPixel32To4444(SkPMColor c)676 static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) {
677     return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
678                                 SkGetPackedG32(c), SkGetPackedB32(c));
679 }
680 
681 /*  Assumes 16bit is in standard RGBA order.
682     Transforms a normal ARGB_8888 into the same byte order as
683     expanded ARGB_4444, but keeps each component 8bits
684 */
SkExpand_8888(SkPMColor c)685 static inline uint32_t SkExpand_8888(SkPMColor c) {
686     return  (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
687             (((c >> SK_G32_SHIFT) & 0xFF) <<  8) |
688             (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
689             (((c >> SK_A32_SHIFT) & 0xFF) <<  0);
690 }
691 
692 /*  Undo the operation of SkExpand_8888, turning the argument back into
693     a SkPMColor.
694 */
SkCompact_8888(uint32_t c)695 static inline SkPMColor SkCompact_8888(uint32_t c) {
696     return  (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
697             (((c >>  8) & 0xFF) << SK_G32_SHIFT) |
698             (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
699             (((c >>  0) & 0xFF) << SK_A32_SHIFT);
700 }
701 
702 /*  Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
703     but this routine just keeps the high 4bits of each component in the low
704     4bits of the result (just like a newly expanded PMColor16).
705 */
SkExpand32_4444(SkPMColor c)706 static inline uint32_t SkExpand32_4444(SkPMColor c) {
707     return  (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
708             (((c >> (SK_G32_SHIFT + 4)) & 0xF) <<  8) |
709             (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
710             (((c >> (SK_A32_SHIFT + 4)) & 0xF) <<  0);
711 }
712 
713 // takes two values and alternamtes them as part of a memset16
714 // used for cheap 2x2 dithering when the colors are opaque
715 void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
716 
717 ///////////////////////////////////////////////////////////////////////////////
718 
SkUpscale31To32(int value)719 static inline int SkUpscale31To32(int value) {
720     SkASSERT((unsigned)value <= 31);
721     return value + (value >> 4);
722 }
723 
SkBlend32(int src,int dst,int scale)724 static inline int SkBlend32(int src, int dst, int scale) {
725     SkASSERT((unsigned)src <= 0xFF);
726     SkASSERT((unsigned)dst <= 0xFF);
727     SkASSERT((unsigned)scale <= 32);
728     return dst + ((src - dst) * scale >> 5);
729 }
730 
SkBlendLCD16(int srcA,int srcR,int srcG,int srcB,SkPMColor dst,uint16_t mask)731 static inline SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB,
732                                      SkPMColor dst, uint16_t mask) {
733     if (mask == 0) {
734         return dst;
735     }
736 
737     /*  We want all of these in 5bits, hence the shifts in case one of them
738      *  (green) is 6bits.
739      */
740     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
741     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
742     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
743 
744     // Now upscale them to 0..32, so we can use blend32
745     maskR = SkUpscale31To32(maskR);
746     maskG = SkUpscale31To32(maskG);
747     maskB = SkUpscale31To32(maskB);
748 
749     // srcA has been upscaled to 256 before passed into this function
750     maskR = maskR * srcA >> 8;
751     maskG = maskG * srcA >> 8;
752     maskB = maskB * srcA >> 8;
753 
754     int dstR = SkGetPackedR32(dst);
755     int dstG = SkGetPackedG32(dst);
756     int dstB = SkGetPackedB32(dst);
757 
758     // LCD blitting is only supported if the dst is known/required
759     // to be opaque
760     return SkPackARGB32(0xFF,
761                         SkBlend32(srcR, dstR, maskR),
762                         SkBlend32(srcG, dstG, maskG),
763                         SkBlend32(srcB, dstB, maskB));
764 }
765 
SkBlendLCD16Opaque(int srcR,int srcG,int srcB,SkPMColor dst,uint16_t mask,SkPMColor opaqueDst)766 static inline SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB,
767                                            SkPMColor dst, uint16_t mask,
768                                            SkPMColor opaqueDst) {
769     if (mask == 0) {
770         return dst;
771     }
772 
773     if (0xFFFF == mask) {
774         return opaqueDst;
775     }
776 
777     /*  We want all of these in 5bits, hence the shifts in case one of them
778      *  (green) is 6bits.
779      */
780     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
781     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
782     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
783 
784     // Now upscale them to 0..32, so we can use blend32
785     maskR = SkUpscale31To32(maskR);
786     maskG = SkUpscale31To32(maskG);
787     maskB = SkUpscale31To32(maskB);
788 
789     int dstR = SkGetPackedR32(dst);
790     int dstG = SkGetPackedG32(dst);
791     int dstB = SkGetPackedB32(dst);
792 
793     // LCD blitting is only supported if the dst is known/required
794     // to be opaque
795     return SkPackARGB32(0xFF,
796                         SkBlend32(srcR, dstR, maskR),
797                         SkBlend32(srcG, dstG, maskG),
798                         SkBlend32(srcB, dstB, maskB));
799 }
800 
SkBlitLCD16Row(SkPMColor dst[],const uint16_t src[],SkColor color,int width,SkPMColor)801 static inline void SkBlitLCD16Row(SkPMColor dst[], const uint16_t src[],
802                                   SkColor color, int width, SkPMColor) {
803     int srcA = SkColorGetA(color);
804     int srcR = SkColorGetR(color);
805     int srcG = SkColorGetG(color);
806     int srcB = SkColorGetB(color);
807 
808     srcA = SkAlpha255To256(srcA);
809 
810     for (int i = 0; i < width; i++) {
811         dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], src[i]);
812     }
813 }
814 
SkBlitLCD16OpaqueRow(SkPMColor dst[],const uint16_t src[],SkColor color,int width,SkPMColor opaqueDst)815 static inline void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t src[],
816                                         SkColor color, int width,
817                                         SkPMColor opaqueDst) {
818     int srcR = SkColorGetR(color);
819     int srcG = SkColorGetG(color);
820     int srcB = SkColorGetB(color);
821 
822     for (int i = 0; i < width; i++) {
823         dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], src[i],
824                                     opaqueDst);
825     }
826 }
827 
828 #endif
829 
830