1
2 /*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
9
10 #ifndef SkColorPriv_DEFINED
11 #define SkColorPriv_DEFINED
12
13 // turn this own for extra debug checking when blending onto 565
14 #ifdef SK_DEBUG
15 #define CHECK_FOR_565_OVERFLOW
16 #endif
17
18 #include "SkColor.h"
19 #include "SkMath.h"
20
21 /** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a
22 byte into a scale value, so that we can say scale * value >> 8 instead of
23 alpha * value / 255.
24
25 In debugging, asserts that alpha is 0..255
26 */
SkAlpha255To256(U8CPU alpha)27 static inline unsigned SkAlpha255To256(U8CPU alpha) {
28 SkASSERT(SkToU8(alpha) == alpha);
29 // this one assues that blending on top of an opaque dst keeps it that way
30 // even though it is less accurate than a+(a>>7) for non-opaque dsts
31 return alpha + 1;
32 }
33
34 /** Multiplify value by 0..256, and shift the result down 8
35 (i.e. return (value * alpha256) >> 8)
36 */
37 #define SkAlphaMul(value, alpha256) (SkMulS16(value, alpha256) >> 8)
38
39 // The caller may want negative values, so keep all params signed (int)
40 // so we don't accidentally slip into unsigned math and lose the sign
41 // extension when we shift (in SkAlphaMul)
SkAlphaBlend(int src,int dst,int scale256)42 static inline int SkAlphaBlend(int src, int dst, int scale256) {
43 SkASSERT((unsigned)scale256 <= 256);
44 return dst + SkAlphaMul(src - dst, scale256);
45 }
46
47 /**
48 * Returns (src * alpha + dst * (255 - alpha)) / 255
49 *
50 * This is more accurate than SkAlphaBlend, but slightly slower
51 */
SkAlphaBlend255(S16CPU src,S16CPU dst,U8CPU alpha)52 static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) {
53 SkASSERT((int16_t)src == src);
54 SkASSERT((int16_t)dst == dst);
55 SkASSERT((uint8_t)alpha == alpha);
56
57 int prod = SkMulS16(src - dst, alpha) + 128;
58 prod = (prod + (prod >> 8)) >> 8;
59 return dst + prod;
60 }
61
62 #define SK_R16_BITS 5
63 #define SK_G16_BITS 6
64 #define SK_B16_BITS 5
65
66 #define SK_R16_SHIFT (SK_B16_BITS + SK_G16_BITS)
67 #define SK_G16_SHIFT (SK_B16_BITS)
68 #define SK_B16_SHIFT 0
69
70 #define SK_R16_MASK ((1 << SK_R16_BITS) - 1)
71 #define SK_G16_MASK ((1 << SK_G16_BITS) - 1)
72 #define SK_B16_MASK ((1 << SK_B16_BITS) - 1)
73
74 #define SkGetPackedR16(color) (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
75 #define SkGetPackedG16(color) (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
76 #define SkGetPackedB16(color) (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
77
78 #define SkR16Assert(r) SkASSERT((unsigned)(r) <= SK_R16_MASK)
79 #define SkG16Assert(g) SkASSERT((unsigned)(g) <= SK_G16_MASK)
80 #define SkB16Assert(b) SkASSERT((unsigned)(b) <= SK_B16_MASK)
81
SkPackRGB16(unsigned r,unsigned g,unsigned b)82 static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
83 SkASSERT(r <= SK_R16_MASK);
84 SkASSERT(g <= SK_G16_MASK);
85 SkASSERT(b <= SK_B16_MASK);
86
87 return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
88 }
89
90 #define SK_R16_MASK_IN_PLACE (SK_R16_MASK << SK_R16_SHIFT)
91 #define SK_G16_MASK_IN_PLACE (SK_G16_MASK << SK_G16_SHIFT)
92 #define SK_B16_MASK_IN_PLACE (SK_B16_MASK << SK_B16_SHIFT)
93
94 /** Expand the 16bit color into a 32bit value that can be scaled all at once
95 by a value up to 32. Used in conjunction with SkCompact_rgb_16.
96 */
SkExpand_rgb_16(U16CPU c)97 static inline uint32_t SkExpand_rgb_16(U16CPU c) {
98 SkASSERT(c == (uint16_t)c);
99
100 return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
101 }
102
103 /** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
104 color value. The computation yields only 16bits of valid data, but we claim
105 to return 32bits, so that the compiler won't generate extra instructions to
106 "clean" the top 16bits. However, the top 16 can contain garbage, so it is
107 up to the caller to safely ignore them.
108 */
SkCompact_rgb_16(uint32_t c)109 static inline U16CPU SkCompact_rgb_16(uint32_t c) {
110 return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
111 }
112
113 /** Scale the 16bit color value by the 0..256 scale parameter.
114 The computation yields only 16bits of valid data, but we claim
115 to return 32bits, so that the compiler won't generate extra instructions to
116 "clean" the top 16bits.
117 */
SkAlphaMulRGB16(U16CPU c,unsigned scale)118 static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
119 return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
120 }
121
122 // this helper explicitly returns a clean 16bit value (but slower)
123 #define SkAlphaMulRGB16_ToU16(c, s) (uint16_t)SkAlphaMulRGB16(c, s)
124
125 /** Blend src and dst 16bit colors by the 0..256 scale parameter.
126 The computation yields only 16bits of valid data, but we claim
127 to return 32bits, so that the compiler won't generate extra instructions to
128 "clean" the top 16bits.
129 */
SkBlendRGB16(U16CPU src,U16CPU dst,int srcScale)130 static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
131 SkASSERT((unsigned)srcScale <= 256);
132
133 srcScale >>= 3;
134
135 uint32_t src32 = SkExpand_rgb_16(src);
136 uint32_t dst32 = SkExpand_rgb_16(dst);
137 return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
138 }
139
SkBlendRGB16(const uint16_t src[],uint16_t dst[],int srcScale,int count)140 static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
141 int srcScale, int count) {
142 SkASSERT(count > 0);
143 SkASSERT((unsigned)srcScale <= 256);
144
145 srcScale >>= 3;
146
147 do {
148 uint32_t src32 = SkExpand_rgb_16(*src++);
149 uint32_t dst32 = SkExpand_rgb_16(*dst);
150 *dst++ = SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
151 } while (--count > 0);
152 }
153
154 #ifdef SK_DEBUG
SkRGB16Add(U16CPU a,U16CPU b)155 static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
156 SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
157 SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
158 SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
159
160 return a + b;
161 }
162 #else
163 #define SkRGB16Add(a, b) ((a) + (b))
164 #endif
165
166 ///////////////////////////////////////////////////////////////////////////////
167
168 #define SK_A32_BITS 8
169 #define SK_R32_BITS 8
170 #define SK_G32_BITS 8
171 #define SK_B32_BITS 8
172
173 #define SK_A32_MASK ((1 << SK_A32_BITS) - 1)
174 #define SK_R32_MASK ((1 << SK_R32_BITS) - 1)
175 #define SK_G32_MASK ((1 << SK_G32_BITS) - 1)
176 #define SK_B32_MASK ((1 << SK_B32_BITS) - 1)
177
178 #define SkGetPackedA32(packed) ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24)
179 #define SkGetPackedR32(packed) ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24)
180 #define SkGetPackedG32(packed) ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24)
181 #define SkGetPackedB32(packed) ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24)
182
183 #define SkA32Assert(a) SkASSERT((unsigned)(a) <= SK_A32_MASK)
184 #define SkR32Assert(r) SkASSERT((unsigned)(r) <= SK_R32_MASK)
185 #define SkG32Assert(g) SkASSERT((unsigned)(g) <= SK_G32_MASK)
186 #define SkB32Assert(b) SkASSERT((unsigned)(b) <= SK_B32_MASK)
187
188 #ifdef SK_DEBUG
SkPMColorAssert(SkPMColor c)189 static inline void SkPMColorAssert(SkPMColor c) {
190 unsigned a = SkGetPackedA32(c);
191 unsigned r = SkGetPackedR32(c);
192 unsigned g = SkGetPackedG32(c);
193 unsigned b = SkGetPackedB32(c);
194
195 SkA32Assert(a);
196 SkASSERT(r <= a);
197 SkASSERT(g <= a);
198 SkASSERT(b <= a);
199 }
200 #else
201 #define SkPMColorAssert(c)
202 #endif
203
204 /**
205 * Pack the components into a SkPMColor, checking (in the debug version) that
206 * the components are 0..255, and are already premultiplied (i.e. alpha >= color)
207 */
SkPackARGB32(U8CPU a,U8CPU r,U8CPU g,U8CPU b)208 static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
209 SkA32Assert(a);
210 SkASSERT(r <= a);
211 SkASSERT(g <= a);
212 SkASSERT(b <= a);
213
214 return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
215 (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
216 }
217
218 /**
219 * Abstract 4-byte interpolation, implemented on top of SkPMColor
220 * utility functions. Third parameter controls blending of the first two:
221 * (src, dst, 0) returns dst
222 * (src, dst, 0xFF) returns src
223 */
SkFourByteInterp(SkPMColor src,SkPMColor dst,U8CPU srcWeight)224 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,
225 U8CPU srcWeight) {
226 unsigned scale = SkAlpha255To256(srcWeight);
227
228 unsigned a = SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale);
229 unsigned r = SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale);
230 unsigned g = SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale);
231 unsigned b = SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale);
232
233 return SkPackARGB32(a, r, g, b);
234 }
235
236 /**
237 * 32b optimized version; currently appears to be 10% faster even on 64b
238 * architectures than an equivalent 64b version and 30% faster than
239 * SkFourByteInterp(). Third parameter controls blending of the first two:
240 * (src, dst, 0) returns dst
241 * (src, dst, 0xFF) returns src
242 * ** Does not match the results of SkFourByteInterp() because we use
243 * a more accurate scale computation!
244 * TODO: migrate Skia function to using an accurate 255->266 alpha
245 * conversion.
246 */
SkFastFourByteInterp(SkPMColor src,SkPMColor dst,U8CPU srcWeight)247 static inline SkPMColor SkFastFourByteInterp(SkPMColor src,
248 SkPMColor dst,
249 U8CPU srcWeight) {
250 SkASSERT(srcWeight < 256);
251
252 // Reorders ARGB to AG-RB in order to reduce the number of operations.
253 const uint32_t mask = 0xFF00FF;
254 uint32_t src_rb = src & mask;
255 uint32_t src_ag = (src >> 8) & mask;
256 uint32_t dst_rb = dst & mask;
257 uint32_t dst_ag = (dst >> 8) & mask;
258
259 // scale = srcWeight + (srcWeight >> 7) is more accurate than
260 // scale = srcWeight + 1, but 7% slower
261 int scale = srcWeight + (srcWeight >> 7);
262
263 uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
264 uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
265
266 return (ret_ag & ~mask) | ((ret_rb & ~mask) >> 8);
267 }
268
269 /**
270 * Same as SkPackARGB32, but this version guarantees to not check that the
271 * values are premultiplied in the debug version.
272 */
SkPackARGB32NoCheck(U8CPU a,U8CPU r,U8CPU g,U8CPU b)273 static inline SkPMColor SkPackARGB32NoCheck(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
274 return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
275 (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
276 }
277
278 static inline
SkPremultiplyARGBInline(U8CPU a,U8CPU r,U8CPU g,U8CPU b)279 SkPMColor SkPremultiplyARGBInline(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
280 SkA32Assert(a);
281 SkA32Assert(r);
282 SkA32Assert(g);
283 SkA32Assert(b);
284
285 if (a != 255) {
286 r = SkMulDiv255Round(r, a);
287 g = SkMulDiv255Round(g, a);
288 b = SkMulDiv255Round(b, a);
289 }
290 return SkPackARGB32(a, r, g, b);
291 }
292
293 SK_API extern const uint32_t gMask_00FF00FF;
294
SkAlphaMulQ(uint32_t c,unsigned scale)295 static inline uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
296 uint32_t mask = gMask_00FF00FF;
297 // uint32_t mask = 0xFF00FF;
298
299 uint32_t rb = ((c & mask) * scale) >> 8;
300 uint32_t ag = ((c >> 8) & mask) * scale;
301 return (rb & mask) | (ag & ~mask);
302 }
303
SkPMSrcOver(SkPMColor src,SkPMColor dst)304 static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
305 return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
306 }
307
SkBlendARGB32(SkPMColor src,SkPMColor dst,U8CPU aa)308 static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
309 SkASSERT((unsigned)aa <= 255);
310
311 unsigned src_scale = SkAlpha255To256(aa);
312 unsigned dst_scale = SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale));
313
314 return SkAlphaMulQ(src, src_scale) + SkAlphaMulQ(dst, dst_scale);
315 }
316
317 ////////////////////////////////////////////////////////////////////////////////////////////
318 // Convert a 32bit pixel to a 16bit pixel (no dither)
319
320 #define SkR32ToR16_MACRO(r) ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
321 #define SkG32ToG16_MACRO(g) ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
322 #define SkB32ToB16_MACRO(b) ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
323
324 #ifdef SK_DEBUG
SkR32ToR16(unsigned r)325 static inline unsigned SkR32ToR16(unsigned r) {
326 SkR32Assert(r);
327 return SkR32ToR16_MACRO(r);
328 }
SkG32ToG16(unsigned g)329 static inline unsigned SkG32ToG16(unsigned g) {
330 SkG32Assert(g);
331 return SkG32ToG16_MACRO(g);
332 }
SkB32ToB16(unsigned b)333 static inline unsigned SkB32ToB16(unsigned b) {
334 SkB32Assert(b);
335 return SkB32ToB16_MACRO(b);
336 }
337 #else
338 #define SkR32ToR16(r) SkR32ToR16_MACRO(r)
339 #define SkG32ToG16(g) SkG32ToG16_MACRO(g)
340 #define SkB32ToB16(b) SkB32ToB16_MACRO(b)
341 #endif
342
343 #define SkPacked32ToR16(c) (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
344 #define SkPacked32ToG16(c) (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
345 #define SkPacked32ToB16(c) (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
346
SkPixel32ToPixel16(SkPMColor c)347 static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
348 unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
349 unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
350 unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
351 return r | g | b;
352 }
353
SkPack888ToRGB16(U8CPU r,U8CPU g,U8CPU b)354 static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
355 return (SkR32ToR16(r) << SK_R16_SHIFT) |
356 (SkG32ToG16(g) << SK_G16_SHIFT) |
357 (SkB32ToB16(b) << SK_B16_SHIFT);
358 }
359
360 #define SkPixel32ToPixel16_ToU16(src) SkToU16(SkPixel32ToPixel16(src))
361
362 /////////////////////////////////////////////////////////////////////////////////////////
363 // Fast dither from 32->16
364
365 #define SkShouldDitherXY(x, y) (((x) ^ (y)) & 1)
366
SkDitherPack888ToRGB16(U8CPU r,U8CPU g,U8CPU b)367 static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
368 r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
369 g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
370 b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
371
372 return SkPackRGB16(r, g, b);
373 }
374
SkDitherPixel32ToPixel16(SkPMColor c)375 static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) {
376 return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
377 }
378
379 /* Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
380 It is now suitable for combining with a scaled expanded_rgb_16 color
381 as in SkSrcOver32To16().
382 We must do this 565 high-bit replication, in order for the subsequent add
383 to saturate properly (and not overflow). If we take the 8 bits as is, it is
384 possible to overflow.
385 */
SkPMColorToExpanded16x5(SkPMColor c)386 static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) {
387 unsigned sr = SkPacked32ToR16(c);
388 unsigned sg = SkPacked32ToG16(c);
389 unsigned sb = SkPacked32ToB16(c);
390
391 sr = (sr << 5) | sr;
392 sg = (sg << 5) | (sg >> 1);
393 sb = (sb << 5) | sb;
394 return (sr << 11) | (sg << 21) | (sb << 0);
395 }
396
397 /* SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
398 (with dirt in the high 16bits, so caller beware).
399 */
SkSrcOver32To16(SkPMColor src,uint16_t dst)400 static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
401 unsigned sr = SkGetPackedR32(src);
402 unsigned sg = SkGetPackedG32(src);
403 unsigned sb = SkGetPackedB32(src);
404
405 unsigned dr = SkGetPackedR16(dst);
406 unsigned dg = SkGetPackedG16(dst);
407 unsigned db = SkGetPackedB16(dst);
408
409 unsigned isa = 255 - SkGetPackedA32(src);
410
411 dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
412 dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
413 db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
414
415 return SkPackRGB16(dr, dg, db);
416 }
417
418 ////////////////////////////////////////////////////////////////////////////////////////////
419 // Convert a 16bit pixel to a 32bit pixel
420
SkR16ToR32(unsigned r)421 static inline unsigned SkR16ToR32(unsigned r) {
422 return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
423 }
424
SkG16ToG32(unsigned g)425 static inline unsigned SkG16ToG32(unsigned g) {
426 return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
427 }
428
SkB16ToB32(unsigned b)429 static inline unsigned SkB16ToB32(unsigned b) {
430 return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
431 }
432
433 #define SkPacked16ToR32(c) SkR16ToR32(SkGetPackedR16(c))
434 #define SkPacked16ToG32(c) SkG16ToG32(SkGetPackedG16(c))
435 #define SkPacked16ToB32(c) SkB16ToB32(SkGetPackedB16(c))
436
SkPixel16ToPixel32(U16CPU src)437 static inline SkPMColor SkPixel16ToPixel32(U16CPU src) {
438 SkASSERT(src == SkToU16(src));
439
440 unsigned r = SkPacked16ToR32(src);
441 unsigned g = SkPacked16ToG32(src);
442 unsigned b = SkPacked16ToB32(src);
443
444 SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
445 SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
446 SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
447
448 return SkPackARGB32(0xFF, r, g, b);
449 }
450
451 // similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor
SkPixel16ToColor(U16CPU src)452 static inline SkColor SkPixel16ToColor(U16CPU src) {
453 SkASSERT(src == SkToU16(src));
454
455 unsigned r = SkPacked16ToR32(src);
456 unsigned g = SkPacked16ToG32(src);
457 unsigned b = SkPacked16ToB32(src);
458
459 SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
460 SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
461 SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
462
463 return SkColorSetRGB(r, g, b);
464 }
465
466 ///////////////////////////////////////////////////////////////////////////////
467
468 typedef uint16_t SkPMColor16;
469
470 // Put in OpenGL order (r g b a)
471 #define SK_A4444_SHIFT 0
472 #define SK_R4444_SHIFT 12
473 #define SK_G4444_SHIFT 8
474 #define SK_B4444_SHIFT 4
475
476 #define SkA32To4444(a) ((unsigned)(a) >> 4)
477 #define SkR32To4444(r) ((unsigned)(r) >> 4)
478 #define SkG32To4444(g) ((unsigned)(g) >> 4)
479 #define SkB32To4444(b) ((unsigned)(b) >> 4)
480
SkReplicateNibble(unsigned nib)481 static inline U8CPU SkReplicateNibble(unsigned nib) {
482 SkASSERT(nib <= 0xF);
483 return (nib << 4) | nib;
484 }
485
486 #define SkA4444ToA32(a) SkReplicateNibble(a)
487 #define SkR4444ToR32(r) SkReplicateNibble(r)
488 #define SkG4444ToG32(g) SkReplicateNibble(g)
489 #define SkB4444ToB32(b) SkReplicateNibble(b)
490
491 #define SkGetPackedA4444(c) (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
492 #define SkGetPackedR4444(c) (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
493 #define SkGetPackedG4444(c) (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
494 #define SkGetPackedB4444(c) (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
495
496 #define SkPacked4444ToA32(c) SkReplicateNibble(SkGetPackedA4444(c))
497 #define SkPacked4444ToR32(c) SkReplicateNibble(SkGetPackedR4444(c))
498 #define SkPacked4444ToG32(c) SkReplicateNibble(SkGetPackedG4444(c))
499 #define SkPacked4444ToB32(c) SkReplicateNibble(SkGetPackedB4444(c))
500
501 #ifdef SK_DEBUG
SkPMColor16Assert(U16CPU c)502 static inline void SkPMColor16Assert(U16CPU c) {
503 unsigned a = SkGetPackedA4444(c);
504 unsigned r = SkGetPackedR4444(c);
505 unsigned g = SkGetPackedG4444(c);
506 unsigned b = SkGetPackedB4444(c);
507
508 SkASSERT(a <= 0xF);
509 SkASSERT(r <= a);
510 SkASSERT(g <= a);
511 SkASSERT(b <= a);
512 }
513 #else
514 #define SkPMColor16Assert(c)
515 #endif
516
SkAlpha15To16(unsigned a)517 static inline unsigned SkAlpha15To16(unsigned a) {
518 SkASSERT(a <= 0xF);
519 return a + (a >> 3);
520 }
521
522 #ifdef SK_DEBUG
SkAlphaMul4(int value,int scale)523 static inline int SkAlphaMul4(int value, int scale) {
524 SkASSERT((unsigned)scale <= 0x10);
525 return value * scale >> 4;
526 }
527 #else
528 #define SkAlphaMul4(value, scale) ((value) * (scale) >> 4)
529 #endif
530
SkR4444ToR565(unsigned r)531 static inline unsigned SkR4444ToR565(unsigned r) {
532 SkASSERT(r <= 0xF);
533 return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
534 }
535
SkG4444ToG565(unsigned g)536 static inline unsigned SkG4444ToG565(unsigned g) {
537 SkASSERT(g <= 0xF);
538 return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
539 }
540
SkB4444ToB565(unsigned b)541 static inline unsigned SkB4444ToB565(unsigned b) {
542 SkASSERT(b <= 0xF);
543 return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
544 }
545
SkPackARGB4444(unsigned a,unsigned r,unsigned g,unsigned b)546 static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
547 unsigned g, unsigned b) {
548 SkASSERT(a <= 0xF);
549 SkASSERT(r <= a);
550 SkASSERT(g <= a);
551 SkASSERT(b <= a);
552
553 return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) |
554 (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT));
555 }
556
557 extern const uint16_t gMask_0F0F;
558
SkAlphaMulQ4(U16CPU c,unsigned scale)559 static inline U16CPU SkAlphaMulQ4(U16CPU c, unsigned scale) {
560 SkASSERT(scale <= 16);
561
562 const unsigned mask = 0xF0F; //gMask_0F0F;
563
564 #if 0
565 unsigned rb = ((c & mask) * scale) >> 4;
566 unsigned ag = ((c >> 4) & mask) * scale;
567 return (rb & mask) | (ag & ~mask);
568 #else
569 c = (c & mask) | ((c & (mask << 4)) << 12);
570 c = c * scale >> 4;
571 return (c & mask) | ((c >> 12) & (mask << 4));
572 #endif
573 }
574
575 /** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
576 once by a value up to 16. Used in conjunction with SkCompact_4444.
577 */
SkExpand_4444(U16CPU c)578 static inline uint32_t SkExpand_4444(U16CPU c) {
579 SkASSERT(c == (uint16_t)c);
580
581 const unsigned mask = 0xF0F; //gMask_0F0F;
582 return (c & mask) | ((c & ~mask) << 12);
583 }
584
585 /** Compress an expanded value (from SkExpand_4444) back down to a SkPMColor16.
586 NOTE: this explicitly does not clean the top 16 bits (which may be garbage).
587 It does this for speed, since if it is being written directly to 16bits of
588 memory, the top 16bits will be ignored. Casting the result to uint16_t here
589 would add 2 more instructions, slow us down. It is up to the caller to
590 perform the cast if needed.
591 */
SkCompact_4444(uint32_t c)592 static inline U16CPU SkCompact_4444(uint32_t c) {
593 const unsigned mask = 0xF0F; //gMask_0F0F;
594 return (c & mask) | ((c >> 12) & ~mask);
595 }
596
SkSrcOver4444To16(SkPMColor16 s,uint16_t d)597 static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) {
598 unsigned sa = SkGetPackedA4444(s);
599 unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
600 unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
601 unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
602
603 // To avoid overflow, we have to clear the low bit of the synthetic sg
604 // if the src alpha is <= 7.
605 // to see why, try blending 0x4444 on top of 565-white and watch green
606 // overflow (sum == 64)
607 sg &= ~(~(sa >> 3) & 1);
608
609 unsigned scale = SkAlpha15To16(15 - sa);
610 unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
611 unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
612 unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
613
614 #if 0
615 if (sg + dg > 63) {
616 SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
617 }
618 #endif
619 return SkPackRGB16(sr + dr, sg + dg, sb + db);
620 }
621
SkBlend4444To16(SkPMColor16 src,uint16_t dst,int scale16)622 static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) {
623 SkASSERT((unsigned)scale16 <= 16);
624
625 return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
626 }
627
SkBlend4444(SkPMColor16 src,SkPMColor16 dst,int scale16)628 static inline uint16_t SkBlend4444(SkPMColor16 src, SkPMColor16 dst, int scale16) {
629 SkASSERT((unsigned)scale16 <= 16);
630
631 uint32_t src32 = SkExpand_4444(src) * scale16;
632 // the scaled srcAlpha is the bottom byte
633 #ifdef SK_DEBUG
634 {
635 unsigned srcA = SkGetPackedA4444(src) * scale16;
636 SkASSERT(srcA == (src32 & 0xFF));
637 }
638 #endif
639 unsigned dstScale = SkAlpha255To256(255 - (src32 & 0xFF)) >> 4;
640 uint32_t dst32 = SkExpand_4444(dst) * dstScale;
641 return SkCompact_4444((src32 + dst32) >> 4);
642 }
643
SkPixel4444ToPixel32(U16CPU c)644 static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
645 uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
646 (SkGetPackedR4444(c) << SK_R32_SHIFT) |
647 (SkGetPackedG4444(c) << SK_G32_SHIFT) |
648 (SkGetPackedB4444(c) << SK_B32_SHIFT);
649 return d | (d << 4);
650 }
651
SkPixel32ToPixel4444(SkPMColor c)652 static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) {
653 return (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
654 (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
655 (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
656 (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
657 }
658
659 // cheap 2x2 dither
SkDitherARGB32To4444(U8CPU a,U8CPU r,U8CPU g,U8CPU b)660 static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
661 U8CPU g, U8CPU b) {
662 // to ensure that we stay a legal premultiplied color, we take the max()
663 // of the truncated and dithered alpha values. If we didn't, cases like
664 // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...)
665 // which is not legal premultiplied, since a < color
666 unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
667 a = SkMax32(a >> 4, dithered_a);
668 // these we just dither in place
669 r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
670 g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
671 b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4;
672
673 return SkPackARGB4444(a, r, g, b);
674 }
675
SkDitherPixel32To4444(SkPMColor c)676 static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) {
677 return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
678 SkGetPackedG32(c), SkGetPackedB32(c));
679 }
680
681 /* Assumes 16bit is in standard RGBA order.
682 Transforms a normal ARGB_8888 into the same byte order as
683 expanded ARGB_4444, but keeps each component 8bits
684 */
SkExpand_8888(SkPMColor c)685 static inline uint32_t SkExpand_8888(SkPMColor c) {
686 return (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
687 (((c >> SK_G32_SHIFT) & 0xFF) << 8) |
688 (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
689 (((c >> SK_A32_SHIFT) & 0xFF) << 0);
690 }
691
692 /* Undo the operation of SkExpand_8888, turning the argument back into
693 a SkPMColor.
694 */
SkCompact_8888(uint32_t c)695 static inline SkPMColor SkCompact_8888(uint32_t c) {
696 return (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
697 (((c >> 8) & 0xFF) << SK_G32_SHIFT) |
698 (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
699 (((c >> 0) & 0xFF) << SK_A32_SHIFT);
700 }
701
702 /* Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
703 but this routine just keeps the high 4bits of each component in the low
704 4bits of the result (just like a newly expanded PMColor16).
705 */
SkExpand32_4444(SkPMColor c)706 static inline uint32_t SkExpand32_4444(SkPMColor c) {
707 return (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
708 (((c >> (SK_G32_SHIFT + 4)) & 0xF) << 8) |
709 (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
710 (((c >> (SK_A32_SHIFT + 4)) & 0xF) << 0);
711 }
712
713 // takes two values and alternamtes them as part of a memset16
714 // used for cheap 2x2 dithering when the colors are opaque
715 void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
716
717 ///////////////////////////////////////////////////////////////////////////////
718
SkUpscale31To32(int value)719 static inline int SkUpscale31To32(int value) {
720 SkASSERT((unsigned)value <= 31);
721 return value + (value >> 4);
722 }
723
SkBlend32(int src,int dst,int scale)724 static inline int SkBlend32(int src, int dst, int scale) {
725 SkASSERT((unsigned)src <= 0xFF);
726 SkASSERT((unsigned)dst <= 0xFF);
727 SkASSERT((unsigned)scale <= 32);
728 return dst + ((src - dst) * scale >> 5);
729 }
730
SkBlendLCD16(int srcA,int srcR,int srcG,int srcB,SkPMColor dst,uint16_t mask)731 static inline SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB,
732 SkPMColor dst, uint16_t mask) {
733 if (mask == 0) {
734 return dst;
735 }
736
737 /* We want all of these in 5bits, hence the shifts in case one of them
738 * (green) is 6bits.
739 */
740 int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
741 int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
742 int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
743
744 // Now upscale them to 0..32, so we can use blend32
745 maskR = SkUpscale31To32(maskR);
746 maskG = SkUpscale31To32(maskG);
747 maskB = SkUpscale31To32(maskB);
748
749 // srcA has been upscaled to 256 before passed into this function
750 maskR = maskR * srcA >> 8;
751 maskG = maskG * srcA >> 8;
752 maskB = maskB * srcA >> 8;
753
754 int dstR = SkGetPackedR32(dst);
755 int dstG = SkGetPackedG32(dst);
756 int dstB = SkGetPackedB32(dst);
757
758 // LCD blitting is only supported if the dst is known/required
759 // to be opaque
760 return SkPackARGB32(0xFF,
761 SkBlend32(srcR, dstR, maskR),
762 SkBlend32(srcG, dstG, maskG),
763 SkBlend32(srcB, dstB, maskB));
764 }
765
SkBlendLCD16Opaque(int srcR,int srcG,int srcB,SkPMColor dst,uint16_t mask,SkPMColor opaqueDst)766 static inline SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB,
767 SkPMColor dst, uint16_t mask,
768 SkPMColor opaqueDst) {
769 if (mask == 0) {
770 return dst;
771 }
772
773 if (0xFFFF == mask) {
774 return opaqueDst;
775 }
776
777 /* We want all of these in 5bits, hence the shifts in case one of them
778 * (green) is 6bits.
779 */
780 int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
781 int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
782 int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
783
784 // Now upscale them to 0..32, so we can use blend32
785 maskR = SkUpscale31To32(maskR);
786 maskG = SkUpscale31To32(maskG);
787 maskB = SkUpscale31To32(maskB);
788
789 int dstR = SkGetPackedR32(dst);
790 int dstG = SkGetPackedG32(dst);
791 int dstB = SkGetPackedB32(dst);
792
793 // LCD blitting is only supported if the dst is known/required
794 // to be opaque
795 return SkPackARGB32(0xFF,
796 SkBlend32(srcR, dstR, maskR),
797 SkBlend32(srcG, dstG, maskG),
798 SkBlend32(srcB, dstB, maskB));
799 }
800
SkBlitLCD16Row(SkPMColor dst[],const uint16_t src[],SkColor color,int width,SkPMColor)801 static inline void SkBlitLCD16Row(SkPMColor dst[], const uint16_t src[],
802 SkColor color, int width, SkPMColor) {
803 int srcA = SkColorGetA(color);
804 int srcR = SkColorGetR(color);
805 int srcG = SkColorGetG(color);
806 int srcB = SkColorGetB(color);
807
808 srcA = SkAlpha255To256(srcA);
809
810 for (int i = 0; i < width; i++) {
811 dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], src[i]);
812 }
813 }
814
SkBlitLCD16OpaqueRow(SkPMColor dst[],const uint16_t src[],SkColor color,int width,SkPMColor opaqueDst)815 static inline void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t src[],
816 SkColor color, int width,
817 SkPMColor opaqueDst) {
818 int srcR = SkColorGetR(color);
819 int srcG = SkColorGetG(color);
820 int srcB = SkColorGetB(color);
821
822 for (int i = 0; i < width; i++) {
823 dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], src[i],
824 opaqueDst);
825 }
826 }
827
828 #endif
829
830