• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include <arm_neon.h>
9 
10 #define SCALE_NOFILTER_NAME     MAKENAME(_nofilter_scale)
11 #define SCALE_FILTER_NAME       MAKENAME(_filter_scale)
12 #define AFFINE_NOFILTER_NAME    MAKENAME(_nofilter_affine)
13 #define AFFINE_FILTER_NAME      MAKENAME(_filter_affine)
14 
15 #define PACK_FILTER_X_NAME  MAKENAME(_pack_filter_x)
16 #define PACK_FILTER_Y_NAME  MAKENAME(_pack_filter_y)
17 #define PACK_FILTER_X4_NAME MAKENAME(_pack_filter_x4)
18 #define PACK_FILTER_Y4_NAME MAKENAME(_pack_filter_y4)
19 
20 #ifndef PREAMBLE
21     #define PREAMBLE(state)
22     #define PREAMBLE_PARAM_X
23     #define PREAMBLE_PARAM_Y
24     #define PREAMBLE_ARG_X
25     #define PREAMBLE_ARG_Y
26 #endif
27 
SCALE_NOFILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)28 static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s,
29                                 uint32_t xy[], int count, int x, int y) {
30     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
31                              SkMatrix::kScale_Mask)) == 0);
32 
33     PREAMBLE(s);
34 
35     // we store y, x, x, x, x, x
36     const unsigned maxX = s.fPixmap.width() - 1;
37     SkFractionalInt fx;
38     {
39         const SkBitmapProcStateAutoMapper mapper(s, x, y);
40         const unsigned maxY = s.fPixmap.height() - 1;
41         *xy++ = TILEY_PROCF(mapper.fixedY(), maxY);
42         fx = mapper.fractionalIntX();
43     }
44 
45     if (0 == maxX) {
46         // all of the following X values must be 0
47         memset(xy, 0, count * sizeof(uint16_t));
48         return;
49     }
50 
51     const SkFractionalInt dx = s.fInvSxFractionalInt;
52 
53 #ifdef CHECK_FOR_DECAL
54     // test if we don't need to apply the tile proc
55     const SkFixed fixedFx = SkFractionalIntToFixed(fx);
56     const SkFixed fixedDx = SkFractionalIntToFixed(dx);
57     if (can_truncate_to_fixed_for_decal(fixedFx, fixedDx, count, maxX)) {
58         decal_nofilter_scale_neon(xy, fixedFx, fixedDx, count);
59         return;
60     }
61 #endif
62 
63     if (count >= 8) {
64         SkFractionalInt dx2 = dx+dx;
65         SkFractionalInt dx4 = dx2+dx2;
66         SkFractionalInt dx8 = dx4+dx4;
67 
68         // now build fx/fx+dx/fx+2dx/fx+3dx
69         SkFractionalInt fx1, fx2, fx3;
70         int32x4_t lbase, hbase;
71         int16_t *dst16 = (int16_t *)xy;
72 
73         fx1 = fx+dx;
74         fx2 = fx1+dx;
75         fx3 = fx2+dx;
76 
77         lbase = vdupq_n_s32(SkFractionalIntToFixed(fx));
78         lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx1), lbase, 1);
79         lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx2), lbase, 2);
80         lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx3), lbase, 3);
81         hbase = vaddq_s32(lbase, vdupq_n_s32(SkFractionalIntToFixed(dx4)));
82 
83         // store & bump
84         while (count >= 8) {
85 
86             int16x8_t fx8;
87 
88             fx8 = TILEX_PROCF_NEON8(lbase, hbase, maxX);
89 
90             vst1q_s16(dst16, fx8);
91 
92             // but preserving base & on to the next
93             lbase = vaddq_s32 (lbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
94             hbase = vaddq_s32 (hbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
95             dst16 += 8;
96             count -= 8;
97             fx += dx8;
98         };
99         xy = (uint32_t *) dst16;
100     }
101 
102     uint16_t* xx = (uint16_t*)xy;
103     for (int i = count; i > 0; --i) {
104         *xx++ = TILEX_PROCF(SkFractionalIntToFixed(fx), maxX);
105         fx += dx;
106     }
107 }
108 
AFFINE_NOFILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)109 static void AFFINE_NOFILTER_NAME(const SkBitmapProcState& s,
110                                  uint32_t xy[], int count, int x, int y) {
111     SkASSERT(s.fInvType & SkMatrix::kAffine_Mask);
112     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
113                              SkMatrix::kScale_Mask |
114                              SkMatrix::kAffine_Mask)) == 0);
115 
116     PREAMBLE(s);
117     const SkBitmapProcStateAutoMapper mapper(s, x, y);
118 
119     SkFractionalInt fx = mapper.fractionalIntX();
120     SkFractionalInt fy = mapper.fractionalIntY();
121     SkFractionalInt dx = s.fInvSxFractionalInt;
122     SkFractionalInt dy = s.fInvKyFractionalInt;
123     int maxX = s.fPixmap.width() - 1;
124     int maxY = s.fPixmap.height() - 1;
125 
126     if (count >= 8) {
127         SkFractionalInt dx4 = dx * 4;
128         SkFractionalInt dy4 = dy * 4;
129         SkFractionalInt dx8 = dx * 8;
130         SkFractionalInt dy8 = dy * 8;
131 
132         int32x4_t xbase, ybase;
133         int32x4_t x2base, y2base;
134         int16_t *dst16 = (int16_t *) xy;
135 
136         // now build fx, fx+dx, fx+2dx, fx+3dx
137         xbase = vdupq_n_s32(SkFractionalIntToFixed(fx));
138         xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), xbase, 1);
139         xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), xbase, 2);
140         xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), xbase, 3);
141 
142         // same for fy
143         ybase = vdupq_n_s32(SkFractionalIntToFixed(fy));
144         ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy), ybase, 1);
145         ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy+dy), ybase, 2);
146         ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy+dy+dy), ybase, 3);
147 
148         x2base = vaddq_s32(xbase, vdupq_n_s32(SkFractionalIntToFixed(dx4)));
149         y2base = vaddq_s32(ybase, vdupq_n_s32(SkFractionalIntToFixed(dy4)));
150 
151         // store & bump
152         do {
153             int16x8x2_t hi16;
154 
155             hi16.val[0] = TILEX_PROCF_NEON8(xbase, x2base, maxX);
156             hi16.val[1] = TILEY_PROCF_NEON8(ybase, y2base, maxY);
157 
158             vst2q_s16(dst16, hi16);
159 
160             // moving base and on to the next
161             xbase = vaddq_s32(xbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
162             ybase = vaddq_s32(ybase, vdupq_n_s32(SkFractionalIntToFixed(dy8)));
163             x2base = vaddq_s32(x2base, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
164             y2base = vaddq_s32(y2base, vdupq_n_s32(SkFractionalIntToFixed(dy8)));
165 
166             dst16 += 16; // 8x32 aka 16x16
167             count -= 8;
168             fx += dx8;
169             fy += dy8;
170         } while (count >= 8);
171         xy = (uint32_t *) dst16;
172     }
173 
174     for (int i = count; i > 0; --i) {
175         *xy++ = (TILEY_PROCF(SkFractionalIntToFixed(fy), maxY) << 16) |
176                  TILEX_PROCF(SkFractionalIntToFixed(fx), maxX);
177         fx += dx; fy += dy;
178     }
179 }
180 
PACK_FILTER_Y_NAME(SkFixed f,unsigned max,SkFixed one PREAMBLE_PARAM_Y)181 static inline uint32_t PACK_FILTER_Y_NAME(SkFixed f, unsigned max,
182                                           SkFixed one PREAMBLE_PARAM_Y) {
183     unsigned i = TILEY_PROCF(f, max);
184     i = (i << 4) | EXTRACT_LOW_BITS(f, max);
185     return (i << 14) | (TILEY_PROCF((f + one), max));
186 }
187 
PACK_FILTER_X_NAME(SkFixed f,unsigned max,SkFixed one PREAMBLE_PARAM_X)188 static inline uint32_t PACK_FILTER_X_NAME(SkFixed f, unsigned max,
189                                           SkFixed one PREAMBLE_PARAM_X) {
190     unsigned i = TILEX_PROCF(f, max);
191     i = (i << 4) | EXTRACT_LOW_BITS(f, max);
192     return (i << 14) | (TILEX_PROCF((f + one), max));
193 }
194 
PACK_FILTER_X4_NAME(int32x4_t f,unsigned max,SkFixed one PREAMBLE_PARAM_X)195 static inline int32x4_t PACK_FILTER_X4_NAME(int32x4_t f, unsigned max,
196                                           SkFixed one PREAMBLE_PARAM_X) {
197     int32x4_t ret, res, wide_one;
198 
199     // Prepare constants
200     wide_one = vdupq_n_s32(one);
201 
202     // Step 1
203     res = TILEX_PROCF_NEON4(f, max);
204 
205     // Step 2
206     ret = EXTRACT_LOW_BITS_NEON4(f, max);
207     ret = vsliq_n_s32(ret, res, 4);
208 
209     // Step 3
210     res = TILEX_PROCF_NEON4(f + wide_one, max);
211     ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
212 
213     return ret;
214 }
215 
PACK_FILTER_Y4_NAME(int32x4_t f,unsigned max,SkFixed one PREAMBLE_PARAM_X)216 static inline int32x4_t PACK_FILTER_Y4_NAME(int32x4_t f, unsigned max,
217                                           SkFixed one PREAMBLE_PARAM_X) {
218     int32x4_t ret, res, wide_one;
219 
220     // Prepare constants
221     wide_one = vdupq_n_s32(one);
222 
223     // Step 1
224     res = TILEY_PROCF_NEON4(f, max);
225 
226     // Step 2
227     ret = EXTRACT_LOW_BITS_NEON4(f, max);
228     ret = vsliq_n_s32(ret, res, 4);
229 
230     // Step 3
231     res = TILEY_PROCF_NEON4(f + wide_one, max);
232     ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
233 
234     return ret;
235 }
236 
SCALE_FILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)237 static void SCALE_FILTER_NAME(const SkBitmapProcState& s,
238                               uint32_t xy[], int count, int x, int y) {
239     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
240                              SkMatrix::kScale_Mask)) == 0);
241     SkASSERT(s.fInvKy == 0);
242 
243     PREAMBLE(s);
244 
245     const unsigned maxX = s.fPixmap.width() - 1;
246     const SkFixed one = s.fFilterOneX;
247     const SkFractionalInt dx = s.fInvSxFractionalInt;
248     SkFractionalInt fx;
249 
250     {
251         const SkBitmapProcStateAutoMapper mapper(s, x, y);
252         const SkFixed fy = mapper.fixedY();
253         const unsigned maxY = s.fPixmap.height() - 1;
254         // compute our two Y values up front
255         *xy++ = PACK_FILTER_Y_NAME(fy, maxY, s.fFilterOneY PREAMBLE_ARG_Y);
256         // now initialize fx
257         fx = mapper.fractionalIntX();
258     }
259 
260 #ifdef CHECK_FOR_DECAL
261     // test if we don't need to apply the tile proc
262     const SkFixed fixedFx = SkFractionalIntToFixed(fx);
263     const SkFixed fixedDx = SkFractionalIntToFixed(dx);
264     if (can_truncate_to_fixed_for_decal(fixedFx, fixedDx, count, maxX)) {
265         decal_filter_scale_neon(xy, fixedFx, fixedDx, count);
266         return;
267     }
268 #endif
269     {
270 
271     if (count >= 4) {
272         int32x4_t wide_fx;
273 
274         wide_fx = vdupq_n_s32(SkFractionalIntToFixed(fx));
275         wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), wide_fx, 1);
276         wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), wide_fx, 2);
277         wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), wide_fx, 3);
278 
279         while (count >= 4) {
280             int32x4_t res;
281 
282             res = PACK_FILTER_X4_NAME(wide_fx, maxX, one PREAMBLE_ARG_X);
283 
284             vst1q_u32(xy, vreinterpretq_u32_s32(res));
285 
286             wide_fx += vdupq_n_s32(SkFractionalIntToFixed(dx+dx+dx+dx));
287             fx += dx+dx+dx+dx;
288             xy += 4;
289             count -= 4;
290         }
291     }
292 
293     while (--count >= 0) {
294         *xy++ = PACK_FILTER_X_NAME(SkFractionalIntToFixed(fx), maxX, one PREAMBLE_ARG_X);
295         fx += dx;
296     }
297 
298     }
299 }
300 
AFFINE_FILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)301 static void AFFINE_FILTER_NAME(const SkBitmapProcState& s,
302                                uint32_t xy[], int count, int x, int y) {
303     SkASSERT(s.fInvType & SkMatrix::kAffine_Mask);
304     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
305                              SkMatrix::kScale_Mask |
306                              SkMatrix::kAffine_Mask)) == 0);
307 
308     PREAMBLE(s);
309     const SkBitmapProcStateAutoMapper mapper(s, x, y);
310 
311     SkFixed oneX = s.fFilterOneX;
312     SkFixed oneY = s.fFilterOneY;
313     SkFixed fx = mapper.fixedX();
314     SkFixed fy = mapper.fixedY();
315     SkFixed dx = s.fInvSx;
316     SkFixed dy = s.fInvKy;
317     unsigned maxX = s.fPixmap.width() - 1;
318     unsigned maxY = s.fPixmap.height() - 1;
319 
320     if (count >= 4) {
321         int32x4_t wide_fy, wide_fx;
322 
323         wide_fx = vdupq_n_s32(fx);
324         wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1);
325         wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2);
326         wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3);
327 
328         wide_fy = vdupq_n_s32(fy);
329         wide_fy = vsetq_lane_s32(fy+dy, wide_fy, 1);
330         wide_fy = vsetq_lane_s32(fy+dy+dy, wide_fy, 2);
331         wide_fy = vsetq_lane_s32(fy+dy+dy+dy, wide_fy, 3);
332 
333         while (count >= 4) {
334             int32x4x2_t vxy;
335 
336             // do the X side, then the Y side, then interleave them
337             vxy.val[0] = PACK_FILTER_Y4_NAME(wide_fy, maxY, oneY PREAMBLE_ARG_Y);
338             vxy.val[1] = PACK_FILTER_X4_NAME(wide_fx, maxX, oneX PREAMBLE_ARG_X);
339 
340             // interleave as YXYXYXYX as part of the storing
341             vst2q_s32((int32_t*)xy, vxy);
342 
343             // prepare next iteration
344             wide_fx += vdupq_n_s32(dx+dx+dx+dx);
345             fx += dx + dx + dx + dx;
346             wide_fy += vdupq_n_s32(dy+dy+dy+dy);
347             fy += dy+dy+dy+dy;
348             xy += 8; // 4 x's, 4 y's
349             count -= 4;
350         }
351     }
352 
353     while (--count >= 0) {
354         // NB: writing Y/X
355         *xy++ = PACK_FILTER_Y_NAME(fy, maxY, oneY PREAMBLE_ARG_Y);
356         fy += dy;
357         *xy++ = PACK_FILTER_X_NAME(fx, maxX, oneX PREAMBLE_ARG_X);
358         fx += dx;
359     }
360 }
361 
362 const SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = {
363     SCALE_NOFILTER_NAME,
364     SCALE_FILTER_NAME,
365     AFFINE_NOFILTER_NAME,
366     AFFINE_FILTER_NAME,
367 };
368 
369 #undef TILEX_PROCF_NEON8
370 #undef TILEY_PROCF_NEON8
371 #undef TILEX_PROCF_NEON4
372 #undef TILEY_PROCF_NEON4
373 #undef EXTRACT_LOW_BITS_NEON4
374 
375 #undef MAKENAME
376 #undef TILEX_PROCF
377 #undef TILEY_PROCF
378 #ifdef CHECK_FOR_DECAL
379     #undef CHECK_FOR_DECAL
380 #endif
381 
382 #undef SCALE_NOFILTER_NAME
383 #undef SCALE_FILTER_NAME
384 #undef AFFINE_NOFILTER_NAME
385 #undef AFFINE_FILTER_NAME
386 
387 #undef PREAMBLE
388 #undef PREAMBLE_PARAM_X
389 #undef PREAMBLE_PARAM_Y
390 #undef PREAMBLE_ARG_X
391 #undef PREAMBLE_ARG_Y
392 
393 #undef EXTRACT_LOW_BITS
394