1 /*
2 * Copyright 2014 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include <arm_neon.h>
9
10 #define SCALE_NOFILTER_NAME MAKENAME(_nofilter_scale)
11 #define SCALE_FILTER_NAME MAKENAME(_filter_scale)
12
13 #define PACK_FILTER_X_NAME MAKENAME(_pack_filter_x)
14 #define PACK_FILTER_Y_NAME MAKENAME(_pack_filter_y)
15 #define PACK_FILTER_X4_NAME MAKENAME(_pack_filter_x4)
16 #define PACK_FILTER_Y4_NAME MAKENAME(_pack_filter_y4)
17
18 #ifndef PREAMBLE
19 #define PREAMBLE(state)
20 #define PREAMBLE_PARAM_X
21 #define PREAMBLE_PARAM_Y
22 #define PREAMBLE_ARG_X
23 #define PREAMBLE_ARG_Y
24 #endif
25
SCALE_NOFILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)26 static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s,
27 uint32_t xy[], int count, int x, int y) {
28 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
29 SkMatrix::kScale_Mask)) == 0);
30
31 PREAMBLE(s);
32
33 // we store y, x, x, x, x, x
34 const unsigned maxX = s.fPixmap.width() - 1;
35 SkFractionalInt fx;
36 {
37 const SkBitmapProcStateAutoMapper mapper(s, x, y);
38 const unsigned maxY = s.fPixmap.height() - 1;
39 *xy++ = TILEY_PROCF(mapper.fixedY(), maxY);
40 fx = mapper.fractionalIntX();
41 }
42
43 if (0 == maxX) {
44 // all of the following X values must be 0
45 memset(xy, 0, count * sizeof(uint16_t));
46 return;
47 }
48
49 const SkFractionalInt dx = s.fInvSxFractionalInt;
50
51 #ifdef CHECK_FOR_DECAL
52 // test if we don't need to apply the tile proc
53 const SkFixed fixedFx = SkFractionalIntToFixed(fx);
54 const SkFixed fixedDx = SkFractionalIntToFixed(dx);
55 if (can_truncate_to_fixed_for_decal(fixedFx, fixedDx, count, maxX)) {
56 decal_nofilter_scale_neon(xy, fixedFx, fixedDx, count);
57 return;
58 }
59 #endif
60
61 if (count >= 8) {
62 SkFractionalInt dx2 = dx+dx;
63 SkFractionalInt dx4 = dx2+dx2;
64 SkFractionalInt dx8 = dx4+dx4;
65
66 // now build fx/fx+dx/fx+2dx/fx+3dx
67 SkFractionalInt fx1, fx2, fx3;
68 int32x4_t lbase, hbase;
69 int16_t *dst16 = (int16_t *)xy;
70
71 fx1 = fx+dx;
72 fx2 = fx1+dx;
73 fx3 = fx2+dx;
74
75 lbase = vdupq_n_s32(SkFractionalIntToFixed(fx));
76 lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx1), lbase, 1);
77 lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx2), lbase, 2);
78 lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx3), lbase, 3);
79 hbase = vaddq_s32(lbase, vdupq_n_s32(SkFractionalIntToFixed(dx4)));
80
81 // store & bump
82 while (count >= 8) {
83
84 int16x8_t fx8;
85
86 fx8 = TILEX_PROCF_NEON8(lbase, hbase, maxX);
87
88 vst1q_s16(dst16, fx8);
89
90 // but preserving base & on to the next
91 lbase = vaddq_s32 (lbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
92 hbase = vaddq_s32 (hbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
93 dst16 += 8;
94 count -= 8;
95 fx += dx8;
96 };
97 xy = (uint32_t *) dst16;
98 }
99
100 uint16_t* xx = (uint16_t*)xy;
101 for (int i = count; i > 0; --i) {
102 *xx++ = TILEX_PROCF(SkFractionalIntToFixed(fx), maxX);
103 fx += dx;
104 }
105 }
106
PACK_FILTER_Y_NAME(SkFixed f,unsigned max,SkFixed one PREAMBLE_PARAM_Y)107 static inline uint32_t PACK_FILTER_Y_NAME(SkFixed f, unsigned max,
108 SkFixed one PREAMBLE_PARAM_Y) {
109 unsigned i = TILEY_PROCF(f, max);
110 i = (i << 4) | EXTRACT_LOW_BITS(f, max);
111 return (i << 14) | (TILEY_PROCF((f + one), max));
112 }
113
PACK_FILTER_X_NAME(SkFixed f,unsigned max,SkFixed one PREAMBLE_PARAM_X)114 static inline uint32_t PACK_FILTER_X_NAME(SkFixed f, unsigned max,
115 SkFixed one PREAMBLE_PARAM_X) {
116 unsigned i = TILEX_PROCF(f, max);
117 i = (i << 4) | EXTRACT_LOW_BITS(f, max);
118 return (i << 14) | (TILEX_PROCF((f + one), max));
119 }
120
PACK_FILTER_X4_NAME(int32x4_t f,unsigned max,SkFixed one PREAMBLE_PARAM_X)121 static inline int32x4_t PACK_FILTER_X4_NAME(int32x4_t f, unsigned max,
122 SkFixed one PREAMBLE_PARAM_X) {
123 int32x4_t ret, res, wide_one;
124
125 // Prepare constants
126 wide_one = vdupq_n_s32(one);
127
128 // Step 1
129 res = TILEX_PROCF_NEON4(f, max);
130
131 // Step 2
132 ret = EXTRACT_LOW_BITS_NEON4(f, max);
133 ret = vsliq_n_s32(ret, res, 4);
134
135 // Step 3
136 res = TILEX_PROCF_NEON4(f + wide_one, max);
137 ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
138
139 return ret;
140 }
141
PACK_FILTER_Y4_NAME(int32x4_t f,unsigned max,SkFixed one PREAMBLE_PARAM_X)142 static inline int32x4_t PACK_FILTER_Y4_NAME(int32x4_t f, unsigned max,
143 SkFixed one PREAMBLE_PARAM_X) {
144 int32x4_t ret, res, wide_one;
145
146 // Prepare constants
147 wide_one = vdupq_n_s32(one);
148
149 // Step 1
150 res = TILEY_PROCF_NEON4(f, max);
151
152 // Step 2
153 ret = EXTRACT_LOW_BITS_NEON4(f, max);
154 ret = vsliq_n_s32(ret, res, 4);
155
156 // Step 3
157 res = TILEY_PROCF_NEON4(f + wide_one, max);
158 ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
159
160 return ret;
161 }
162
SCALE_FILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)163 static void SCALE_FILTER_NAME(const SkBitmapProcState& s,
164 uint32_t xy[], int count, int x, int y) {
165 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
166 SkMatrix::kScale_Mask)) == 0);
167 SkASSERT(s.fInvKy == 0);
168
169 PREAMBLE(s);
170
171 const unsigned maxX = s.fPixmap.width() - 1;
172 const SkFixed one = s.fFilterOneX;
173 const SkFractionalInt dx = s.fInvSxFractionalInt;
174 SkFractionalInt fx;
175
176 {
177 const SkBitmapProcStateAutoMapper mapper(s, x, y);
178 const SkFixed fy = mapper.fixedY();
179 const unsigned maxY = s.fPixmap.height() - 1;
180 // compute our two Y values up front
181 *xy++ = PACK_FILTER_Y_NAME(fy, maxY, s.fFilterOneY PREAMBLE_ARG_Y);
182 // now initialize fx
183 fx = mapper.fractionalIntX();
184 }
185
186 #ifdef CHECK_FOR_DECAL
187 // test if we don't need to apply the tile proc
188 const SkFixed fixedFx = SkFractionalIntToFixed(fx);
189 const SkFixed fixedDx = SkFractionalIntToFixed(dx);
190 if (can_truncate_to_fixed_for_decal(fixedFx, fixedDx, count, maxX)) {
191 decal_filter_scale_neon(xy, fixedFx, fixedDx, count);
192 return;
193 }
194 #endif
195 {
196
197 if (count >= 4) {
198 int32x4_t wide_fx;
199
200 wide_fx = vdupq_n_s32(SkFractionalIntToFixed(fx));
201 wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), wide_fx, 1);
202 wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), wide_fx, 2);
203 wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), wide_fx, 3);
204
205 while (count >= 4) {
206 int32x4_t res;
207
208 res = PACK_FILTER_X4_NAME(wide_fx, maxX, one PREAMBLE_ARG_X);
209
210 vst1q_u32(xy, vreinterpretq_u32_s32(res));
211
212 wide_fx += vdupq_n_s32(SkFractionalIntToFixed(dx+dx+dx+dx));
213 fx += dx+dx+dx+dx;
214 xy += 4;
215 count -= 4;
216 }
217 }
218
219 while (--count >= 0) {
220 *xy++ = PACK_FILTER_X_NAME(SkFractionalIntToFixed(fx), maxX, one PREAMBLE_ARG_X);
221 fx += dx;
222 }
223
224 }
225 }
226
227 const SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = {
228 SCALE_NOFILTER_NAME,
229 SCALE_FILTER_NAME,
230 };
231
232 #undef TILEX_PROCF_NEON8
233 #undef TILEY_PROCF_NEON8
234 #undef TILEX_PROCF_NEON4
235 #undef TILEY_PROCF_NEON4
236 #undef EXTRACT_LOW_BITS_NEON4
237
238 #undef MAKENAME
239 #undef TILEX_PROCF
240 #undef TILEY_PROCF
241 #ifdef CHECK_FOR_DECAL
242 #undef CHECK_FOR_DECAL
243 #endif
244
245 #undef SCALE_NOFILTER_NAME
246 #undef SCALE_FILTER_NAME
247
248 #undef PREAMBLE
249 #undef PREAMBLE_PARAM_X
250 #undef PREAMBLE_PARAM_Y
251 #undef PREAMBLE_ARG_X
252 #undef PREAMBLE_ARG_Y
253
254 #undef EXTRACT_LOW_BITS
255