1 /*
2 * Copyright 2015 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #ifndef SkColor_opts_neon_DEFINED
9 #define SkColor_opts_neon_DEFINED
10
11 #include "SkTypes.h"
12 #include "SkColorPriv.h"
13
14 #include <arm_neon.h>
15
16 #define NEON_A (SK_A32_SHIFT / 8)
17 #define NEON_R (SK_R32_SHIFT / 8)
18 #define NEON_G (SK_G32_SHIFT / 8)
19 #define NEON_B (SK_B32_SHIFT / 8)
20
SkAlpha255To256_neon8(uint8x8_t alpha)21 static inline uint16x8_t SkAlpha255To256_neon8(uint8x8_t alpha) {
22 return vaddw_u8(vdupq_n_u16(1), alpha);
23 }
24
SkAlphaMul_neon8(uint8x8_t color,uint16x8_t scale)25 static inline uint8x8_t SkAlphaMul_neon8(uint8x8_t color, uint16x8_t scale) {
26 return vshrn_n_u16(vmovl_u8(color) * scale, 8);
27 }
28
SkAlphaMulQ_neon8(uint8x8x4_t color,uint16x8_t scale)29 static inline uint8x8x4_t SkAlphaMulQ_neon8(uint8x8x4_t color, uint16x8_t scale) {
30 uint8x8x4_t ret;
31
32 ret.val[NEON_A] = SkAlphaMul_neon8(color.val[NEON_A], scale);
33 ret.val[NEON_R] = SkAlphaMul_neon8(color.val[NEON_R], scale);
34 ret.val[NEON_G] = SkAlphaMul_neon8(color.val[NEON_G], scale);
35 ret.val[NEON_B] = SkAlphaMul_neon8(color.val[NEON_B], scale);
36
37 return ret;
38 }
39
40 /* This function expands 8 pixels from RGB565 (R, G, B from high to low) to
41 * SkPMColor (all possible configurations supported) in the exact same way as
42 * SkPixel16ToPixel32.
43 */
SkPixel16ToPixel32_neon8(uint16x8_t vsrc)44 static inline uint8x8x4_t SkPixel16ToPixel32_neon8(uint16x8_t vsrc) {
45
46 uint8x8x4_t ret;
47 uint8x8_t vr, vg, vb;
48
49 vr = vmovn_u16(vshrq_n_u16(vsrc, SK_R16_SHIFT));
50 vg = vmovn_u16(vshrq_n_u16(vshlq_n_u16(vsrc, SK_R16_BITS), SK_R16_BITS + SK_B16_BITS));
51 vb = vmovn_u16(vsrc & vdupq_n_u16(SK_B16_MASK));
52
53 ret.val[NEON_A] = vdup_n_u8(0xFF);
54 ret.val[NEON_R] = vshl_n_u8(vr, 8 - SK_R16_BITS) | vshr_n_u8(vr, 2 * SK_R16_BITS - 8);
55 ret.val[NEON_G] = vshl_n_u8(vg, 8 - SK_G16_BITS) | vshr_n_u8(vg, 2 * SK_G16_BITS - 8);
56 ret.val[NEON_B] = vshl_n_u8(vb, 8 - SK_B16_BITS) | vshr_n_u8(vb, 2 * SK_B16_BITS - 8);
57
58 return ret;
59 }
60
61 /* This function packs 8 pixels from SkPMColor (all possible configurations
62 * supported) to RGB565 (R, G, B from high to low) in the exact same way as
63 * SkPixel32ToPixel16.
64 */
SkPixel32ToPixel16_neon8(uint8x8x4_t vsrc)65 static inline uint16x8_t SkPixel32ToPixel16_neon8(uint8x8x4_t vsrc) {
66
67 uint16x8_t ret;
68
69 ret = vshll_n_u8(vsrc.val[NEON_R], 8);
70 ret = vsriq_n_u16(ret, vshll_n_u8(vsrc.val[NEON_G], 8), SK_R16_BITS);
71 ret = vsriq_n_u16(ret, vshll_n_u8(vsrc.val[NEON_B], 8), SK_R16_BITS + SK_G16_BITS);
72
73 return ret;
74 }
75
76 /* This function blends 8 pixels of the same channel in the exact same way as
77 * SkBlend32.
78 */
SkBlend32_neon8(uint8x8_t src,uint8x8_t dst,uint16x8_t scale)79 static inline uint8x8_t SkBlend32_neon8(uint8x8_t src, uint8x8_t dst, uint16x8_t scale) {
80 int16x8_t src_wide, dst_wide;
81
82 src_wide = vreinterpretq_s16_u16(vmovl_u8(src));
83 dst_wide = vreinterpretq_s16_u16(vmovl_u8(dst));
84
85 src_wide = (src_wide - dst_wide) * vreinterpretq_s16_u16(scale);
86
87 dst_wide += vshrq_n_s16(src_wide, 5);
88
89 return vmovn_u16(vreinterpretq_u16_s16(dst_wide));
90 }
91
SkFourByteInterp256_neon(SkPMColor src,SkPMColor dst,unsigned srcScale)92 static inline SkPMColor SkFourByteInterp256_neon(SkPMColor src, SkPMColor dst,
93 unsigned srcScale) {
94 SkASSERT(srcScale <= 256);
95 int16x8_t vscale = vdupq_n_s16(srcScale);
96 int16x8_t vsrc_wide, vdst_wide, vdiff;
97 uint8x8_t res;
98
99 vsrc_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(src))));
100 vdst_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(dst))));
101
102 vdiff = vsrc_wide - vdst_wide;
103 vdiff *= vscale;
104
105 vdiff = vshrq_n_s16(vdiff, 8);
106
107 vdst_wide += vdiff;
108
109 res = vmovn_u16(vreinterpretq_u16_s16(vdst_wide));
110
111 return vget_lane_u32(vreinterpret_u32_u8(res), 0);
112 }
113
SkFourByteInterp_neon(SkPMColor src,SkPMColor dst,U8CPU srcWeight)114 static inline SkPMColor SkFourByteInterp_neon(SkPMColor src, SkPMColor dst,
115 U8CPU srcWeight) {
116 SkASSERT(srcWeight <= 255);
117 unsigned scale = SkAlpha255To256(srcWeight);
118 return SkFourByteInterp256_neon(src, dst, scale);
119 }
120
121 #endif /* #ifndef SkColor_opts_neon_DEFINED */
122