• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include <arm_neon.h>
9 
10 #define SCALE_NOFILTER_NAME     MAKENAME(_nofilter_scale)
11 #define SCALE_FILTER_NAME       MAKENAME(_filter_scale)
12 #define AFFINE_NOFILTER_NAME    MAKENAME(_nofilter_affine)
13 #define AFFINE_FILTER_NAME      MAKENAME(_filter_affine)
14 #define PERSP_NOFILTER_NAME     MAKENAME(_nofilter_persp)
15 #define PERSP_FILTER_NAME       MAKENAME(_filter_persp)
16 
17 #define PACK_FILTER_X_NAME  MAKENAME(_pack_filter_x)
18 #define PACK_FILTER_Y_NAME  MAKENAME(_pack_filter_y)
19 #define PACK_FILTER_X4_NAME MAKENAME(_pack_filter_x4)
20 #define PACK_FILTER_Y4_NAME MAKENAME(_pack_filter_y4)
21 
22 #ifndef PREAMBLE
23     #define PREAMBLE(state)
24     #define PREAMBLE_PARAM_X
25     #define PREAMBLE_PARAM_Y
26     #define PREAMBLE_ARG_X
27     #define PREAMBLE_ARG_Y
28 #endif
29 
SCALE_NOFILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)30 static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s,
31                                 uint32_t xy[], int count, int x, int y) {
32     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
33                              SkMatrix::kScale_Mask)) == 0);
34 
35     PREAMBLE(s);
36 
37     // we store y, x, x, x, x, x
38     const unsigned maxX = s.fPixmap.width() - 1;
39     SkFractionalInt fx;
40     {
41         const SkBitmapProcStateAutoMapper mapper(s, x, y);
42         const unsigned maxY = s.fPixmap.height() - 1;
43         *xy++ = TILEY_PROCF(mapper.fixedY(), maxY);
44         fx = mapper.fractionalIntX();
45     }
46 
47     if (0 == maxX) {
48         // all of the following X values must be 0
49         memset(xy, 0, count * sizeof(uint16_t));
50         return;
51     }
52 
53     const SkFractionalInt dx = s.fInvSxFractionalInt;
54 
55 #ifdef CHECK_FOR_DECAL
56     // test if we don't need to apply the tile proc
57     if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) {
58         decal_nofilter_scale_neon(xy, SkFractionalIntToFixed(fx),
59                              SkFractionalIntToFixed(dx), count);
60         return;
61     }
62 #endif
63 
64     if (count >= 8) {
65         SkFractionalInt dx2 = dx+dx;
66         SkFractionalInt dx4 = dx2+dx2;
67         SkFractionalInt dx8 = dx4+dx4;
68 
69         // now build fx/fx+dx/fx+2dx/fx+3dx
70         SkFractionalInt fx1, fx2, fx3;
71         int32x4_t lbase, hbase;
72         int16_t *dst16 = (int16_t *)xy;
73 
74         fx1 = fx+dx;
75         fx2 = fx1+dx;
76         fx3 = fx2+dx;
77 
78         lbase = vdupq_n_s32(SkFractionalIntToFixed(fx));
79         lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx1), lbase, 1);
80         lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx2), lbase, 2);
81         lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx3), lbase, 3);
82         hbase = vaddq_s32(lbase, vdupq_n_s32(SkFractionalIntToFixed(dx4)));
83 
84         // store & bump
85         while (count >= 8) {
86 
87             int16x8_t fx8;
88 
89             fx8 = TILEX_PROCF_NEON8(lbase, hbase, maxX);
90 
91             vst1q_s16(dst16, fx8);
92 
93             // but preserving base & on to the next
94             lbase = vaddq_s32 (lbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
95             hbase = vaddq_s32 (hbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
96             dst16 += 8;
97             count -= 8;
98             fx += dx8;
99         };
100         xy = (uint32_t *) dst16;
101     }
102 
103     uint16_t* xx = (uint16_t*)xy;
104     for (int i = count; i > 0; --i) {
105         *xx++ = TILEX_PROCF(SkFractionalIntToFixed(fx), maxX);
106         fx += dx;
107     }
108 }
109 
AFFINE_NOFILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)110 static void AFFINE_NOFILTER_NAME(const SkBitmapProcState& s,
111                                  uint32_t xy[], int count, int x, int y) {
112     SkASSERT(s.fInvType & SkMatrix::kAffine_Mask);
113     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
114                              SkMatrix::kScale_Mask |
115                              SkMatrix::kAffine_Mask)) == 0);
116 
117     PREAMBLE(s);
118     const SkBitmapProcStateAutoMapper mapper(s, x, y);
119 
120     SkFractionalInt fx = mapper.fractionalIntX();
121     SkFractionalInt fy = mapper.fractionalIntY();
122     SkFractionalInt dx = s.fInvSxFractionalInt;
123     SkFractionalInt dy = s.fInvKyFractionalInt;
124     int maxX = s.fPixmap.width() - 1;
125     int maxY = s.fPixmap.height() - 1;
126 
127     if (count >= 8) {
128         SkFractionalInt dx4 = dx * 4;
129         SkFractionalInt dy4 = dy * 4;
130         SkFractionalInt dx8 = dx * 8;
131         SkFractionalInt dy8 = dy * 8;
132 
133         int32x4_t xbase, ybase;
134         int32x4_t x2base, y2base;
135         int16_t *dst16 = (int16_t *) xy;
136 
137         // now build fx, fx+dx, fx+2dx, fx+3dx
138         xbase = vdupq_n_s32(SkFractionalIntToFixed(fx));
139         xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), xbase, 1);
140         xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), xbase, 2);
141         xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), xbase, 3);
142 
143         // same for fy
144         ybase = vdupq_n_s32(SkFractionalIntToFixed(fy));
145         ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy), ybase, 1);
146         ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy+dy), ybase, 2);
147         ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy+dy+dy), ybase, 3);
148 
149         x2base = vaddq_s32(xbase, vdupq_n_s32(SkFractionalIntToFixed(dx4)));
150         y2base = vaddq_s32(ybase, vdupq_n_s32(SkFractionalIntToFixed(dy4)));
151 
152         // store & bump
153         do {
154             int16x8x2_t hi16;
155 
156             hi16.val[0] = TILEX_PROCF_NEON8(xbase, x2base, maxX);
157             hi16.val[1] = TILEY_PROCF_NEON8(ybase, y2base, maxY);
158 
159             vst2q_s16(dst16, hi16);
160 
161             // moving base and on to the next
162             xbase = vaddq_s32(xbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
163             ybase = vaddq_s32(ybase, vdupq_n_s32(SkFractionalIntToFixed(dy8)));
164             x2base = vaddq_s32(x2base, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
165             y2base = vaddq_s32(y2base, vdupq_n_s32(SkFractionalIntToFixed(dy8)));
166 
167             dst16 += 16; // 8x32 aka 16x16
168             count -= 8;
169             fx += dx8;
170             fy += dy8;
171         } while (count >= 8);
172         xy = (uint32_t *) dst16;
173     }
174 
175     for (int i = count; i > 0; --i) {
176         *xy++ = (TILEY_PROCF(SkFractionalIntToFixed(fy), maxY) << 16) |
177                  TILEX_PROCF(SkFractionalIntToFixed(fx), maxX);
178         fx += dx; fy += dy;
179     }
180 }
181 
PERSP_NOFILTER_NAME(const SkBitmapProcState & s,uint32_t * SK_RESTRICT xy,int count,int x,int y)182 static void PERSP_NOFILTER_NAME(const SkBitmapProcState& s,
183                                 uint32_t* SK_RESTRICT xy,
184                                 int count, int x, int y) {
185     SkASSERT(s.fInvType & SkMatrix::kPerspective_Mask);
186 
187     PREAMBLE(s);
188     // max{X,Y} are int here, but later shown/assumed to fit in 16 bits
189     int maxX = s.fPixmap.width() - 1;
190     int maxY = s.fPixmap.height() - 1;
191 
192     SkPerspIter iter(s.fInvMatrix,
193                      SkIntToScalar(x) + SK_ScalarHalf,
194                      SkIntToScalar(y) + SK_ScalarHalf, count);
195 
196     while ((count = iter.next()) != 0) {
197         const SkFixed* SK_RESTRICT srcXY = iter.getXY();
198 
199         if (count >= 8) {
200             int32_t *mysrc = (int32_t *) srcXY;
201             int16_t *mydst = (int16_t *) xy;
202             do {
203                 int16x8x2_t hi16;
204                 int32x4x2_t xy1, xy2;
205 
206                 xy1 = vld2q_s32(mysrc);
207                 xy2 = vld2q_s32(mysrc+8);
208 
209                 hi16.val[0] = TILEX_PROCF_NEON8(xy1.val[0], xy2.val[0], maxX);
210                 hi16.val[1] = TILEY_PROCF_NEON8(xy1.val[1], xy2.val[1], maxY);
211 
212                 vst2q_s16(mydst, hi16);
213 
214                 count -= 8;  // 8 iterations
215                 mysrc += 16; // 16 longs
216                 mydst += 16; // 16 shorts, aka 8 longs
217             } while (count >= 8);
218             // get xy and srcXY fixed up
219             srcXY = (const SkFixed *) mysrc;
220             xy = (uint32_t *) mydst;
221         }
222 
223         while (--count >= 0) {
224             *xy++ = (TILEY_PROCF(srcXY[1], maxY) << 16) |
225                      TILEX_PROCF(srcXY[0], maxX);
226             srcXY += 2;
227         }
228     }
229 }
230 
PACK_FILTER_Y_NAME(SkFixed f,unsigned max,SkFixed one PREAMBLE_PARAM_Y)231 static inline uint32_t PACK_FILTER_Y_NAME(SkFixed f, unsigned max,
232                                           SkFixed one PREAMBLE_PARAM_Y) {
233     unsigned i = TILEY_PROCF(f, max);
234     i = (i << 4) | TILEY_LOW_BITS(f, max);
235     return (i << 14) | (TILEY_PROCF((f + one), max));
236 }
237 
PACK_FILTER_X_NAME(SkFixed f,unsigned max,SkFixed one PREAMBLE_PARAM_X)238 static inline uint32_t PACK_FILTER_X_NAME(SkFixed f, unsigned max,
239                                           SkFixed one PREAMBLE_PARAM_X) {
240     unsigned i = TILEX_PROCF(f, max);
241     i = (i << 4) | TILEX_LOW_BITS(f, max);
242     return (i << 14) | (TILEX_PROCF((f + one), max));
243 }
244 
PACK_FILTER_X4_NAME(int32x4_t f,unsigned max,SkFixed one PREAMBLE_PARAM_X)245 static inline int32x4_t PACK_FILTER_X4_NAME(int32x4_t f, unsigned max,
246                                           SkFixed one PREAMBLE_PARAM_X) {
247     int32x4_t ret, res, wide_one;
248 
249     // Prepare constants
250     wide_one = vdupq_n_s32(one);
251 
252     // Step 1
253     res = TILEX_PROCF_NEON4(f, max);
254 
255     // Step 2
256     ret = TILEX_LOW_BITS_NEON4(f, max);
257     ret = vsliq_n_s32(ret, res, 4);
258 
259     // Step 3
260     res = TILEX_PROCF_NEON4(f + wide_one, max);
261     ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
262 
263     return ret;
264 }
265 
PACK_FILTER_Y4_NAME(int32x4_t f,unsigned max,SkFixed one PREAMBLE_PARAM_X)266 static inline int32x4_t PACK_FILTER_Y4_NAME(int32x4_t f, unsigned max,
267                                           SkFixed one PREAMBLE_PARAM_X) {
268     int32x4_t ret, res, wide_one;
269 
270     // Prepare constants
271     wide_one = vdupq_n_s32(one);
272 
273     // Step 1
274     res = TILEY_PROCF_NEON4(f, max);
275 
276     // Step 2
277     ret = TILEY_LOW_BITS_NEON4(f, max);
278     ret = vsliq_n_s32(ret, res, 4);
279 
280     // Step 3
281     res = TILEY_PROCF_NEON4(f + wide_one, max);
282     ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
283 
284     return ret;
285 }
286 
SCALE_FILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)287 static void SCALE_FILTER_NAME(const SkBitmapProcState& s,
288                               uint32_t xy[], int count, int x, int y) {
289     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
290                              SkMatrix::kScale_Mask)) == 0);
291     SkASSERT(s.fInvKy == 0);
292 
293     PREAMBLE(s);
294 
295     const unsigned maxX = s.fPixmap.width() - 1;
296     const SkFixed one = s.fFilterOneX;
297     const SkFractionalInt dx = s.fInvSxFractionalInt;
298     SkFractionalInt fx;
299 
300     {
301         const SkBitmapProcStateAutoMapper mapper(s, x, y);
302         const SkFixed fy = mapper.fixedY();
303         const unsigned maxY = s.fPixmap.height() - 1;
304         // compute our two Y values up front
305         *xy++ = PACK_FILTER_Y_NAME(fy, maxY, s.fFilterOneY PREAMBLE_ARG_Y);
306         // now initialize fx
307         fx = mapper.fractionalIntX();
308     }
309 
310 #ifdef CHECK_FOR_DECAL
311     // test if we don't need to apply the tile proc
312     if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) {
313         decal_filter_scale_neon(xy, SkFractionalIntToFixed(fx),
314                              SkFractionalIntToFixed(dx), count);
315         return;
316     }
317 #endif
318     {
319 
320     if (count >= 4) {
321         int32x4_t wide_fx;
322 
323         wide_fx = vdupq_n_s32(SkFractionalIntToFixed(fx));
324         wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), wide_fx, 1);
325         wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), wide_fx, 2);
326         wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), wide_fx, 3);
327 
328         while (count >= 4) {
329             int32x4_t res;
330 
331             res = PACK_FILTER_X4_NAME(wide_fx, maxX, one PREAMBLE_ARG_X);
332 
333             vst1q_u32(xy, vreinterpretq_u32_s32(res));
334 
335             wide_fx += vdupq_n_s32(SkFractionalIntToFixed(dx+dx+dx+dx));
336             fx += dx+dx+dx+dx;
337             xy += 4;
338             count -= 4;
339         }
340     }
341 
342     while (--count >= 0) {
343         *xy++ = PACK_FILTER_X_NAME(SkFractionalIntToFixed(fx), maxX, one PREAMBLE_ARG_X);
344         fx += dx;
345     }
346 
347     }
348 }
349 
AFFINE_FILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)350 static void AFFINE_FILTER_NAME(const SkBitmapProcState& s,
351                                uint32_t xy[], int count, int x, int y) {
352     SkASSERT(s.fInvType & SkMatrix::kAffine_Mask);
353     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
354                              SkMatrix::kScale_Mask |
355                              SkMatrix::kAffine_Mask)) == 0);
356 
357     PREAMBLE(s);
358     const SkBitmapProcStateAutoMapper mapper(s, x, y);
359 
360     SkFixed oneX = s.fFilterOneX;
361     SkFixed oneY = s.fFilterOneY;
362     SkFixed fx = mapper.fixedX();
363     SkFixed fy = mapper.fixedY();
364     SkFixed dx = s.fInvSx;
365     SkFixed dy = s.fInvKy;
366     unsigned maxX = s.fPixmap.width() - 1;
367     unsigned maxY = s.fPixmap.height() - 1;
368 
369     if (count >= 4) {
370         int32x4_t wide_fy, wide_fx;
371 
372         wide_fx = vdupq_n_s32(fx);
373         wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1);
374         wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2);
375         wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3);
376 
377         wide_fy = vdupq_n_s32(fy);
378         wide_fy = vsetq_lane_s32(fy+dy, wide_fy, 1);
379         wide_fy = vsetq_lane_s32(fy+dy+dy, wide_fy, 2);
380         wide_fy = vsetq_lane_s32(fy+dy+dy+dy, wide_fy, 3);
381 
382         while (count >= 4) {
383             int32x4x2_t vxy;
384 
385             // do the X side, then the Y side, then interleave them
386             vxy.val[0] = PACK_FILTER_Y4_NAME(wide_fy, maxY, oneY PREAMBLE_ARG_Y);
387             vxy.val[1] = PACK_FILTER_X4_NAME(wide_fx, maxX, oneX PREAMBLE_ARG_X);
388 
389             // interleave as YXYXYXYX as part of the storing
390             vst2q_s32((int32_t*)xy, vxy);
391 
392             // prepare next iteration
393             wide_fx += vdupq_n_s32(dx+dx+dx+dx);
394             fx += dx + dx + dx + dx;
395             wide_fy += vdupq_n_s32(dy+dy+dy+dy);
396             fy += dy+dy+dy+dy;
397             xy += 8; // 4 x's, 4 y's
398             count -= 4;
399         }
400     }
401 
402     while (--count >= 0) {
403         // NB: writing Y/X
404         *xy++ = PACK_FILTER_Y_NAME(fy, maxY, oneY PREAMBLE_ARG_Y);
405         fy += dy;
406         *xy++ = PACK_FILTER_X_NAME(fx, maxX, oneX PREAMBLE_ARG_X);
407         fx += dx;
408     }
409 }
410 
PERSP_FILTER_NAME(const SkBitmapProcState & s,uint32_t * SK_RESTRICT xy,int count,int x,int y)411 static void PERSP_FILTER_NAME(const SkBitmapProcState& s,
412                               uint32_t* SK_RESTRICT xy, int count,
413                               int x, int y) {
414     SkASSERT(s.fInvType & SkMatrix::kPerspective_Mask);
415 
416     PREAMBLE(s);
417     unsigned maxX = s.fPixmap.width() - 1;
418     unsigned maxY = s.fPixmap.height() - 1;
419     SkFixed oneX = s.fFilterOneX;
420     SkFixed oneY = s.fFilterOneY;
421 
422     SkPerspIter iter(s.fInvMatrix,
423                      SkIntToScalar(x) + SK_ScalarHalf,
424                      SkIntToScalar(y) + SK_ScalarHalf, count);
425 
426     while ((count = iter.next()) != 0) {
427         const SkFixed* SK_RESTRICT srcXY = iter.getXY();
428 
429         while (count >= 4) {
430             int32x4_t wide_x, wide_y;
431             int32x4x2_t vxy, vresyx;
432 
433             // load src:  x-y-x-y-x-y-x-y
434             vxy = vld2q_s32(srcXY);
435 
436             // do the X side, then the Y side, then interleave them
437             wide_x = vsubq_s32(vxy.val[0], vdupq_n_s32(oneX>>1));
438             wide_y = vsubq_s32(vxy.val[1], vdupq_n_s32(oneY>>1));
439 
440             vresyx.val[0] = PACK_FILTER_Y4_NAME(wide_y, maxY, oneY PREAMBLE_ARG_Y);
441             vresyx.val[1] = PACK_FILTER_X4_NAME(wide_x, maxX, oneX PREAMBLE_ARG_X);
442 
443             // store interleaved as y-x-y-x-y-x-y-x (NB != read order)
444             vst2q_s32((int32_t*)xy, vresyx);
445 
446             // on to the next iteration
447             srcXY += 2*4;
448             count -= 4;
449             xy += 2*4;
450         }
451 
452         while (--count >= 0) {
453             // NB: we read x/y, we write y/x
454             *xy++ = PACK_FILTER_Y_NAME(srcXY[1] - (oneY >> 1), maxY,
455                                        oneY PREAMBLE_ARG_Y);
456             *xy++ = PACK_FILTER_X_NAME(srcXY[0] - (oneX >> 1), maxX,
457                                        oneX PREAMBLE_ARG_X);
458             srcXY += 2;
459         }
460     }
461 }
462 
463 const SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = {
464     SCALE_NOFILTER_NAME,
465     SCALE_FILTER_NAME,
466     AFFINE_NOFILTER_NAME,
467     AFFINE_FILTER_NAME,
468     PERSP_NOFILTER_NAME,
469     PERSP_FILTER_NAME
470 };
471 
472 #undef TILEX_PROCF_NEON8
473 #undef TILEY_PROCF_NEON8
474 #undef TILEX_PROCF_NEON4
475 #undef TILEY_PROCF_NEON4
476 #undef TILEX_LOW_BITS_NEON4
477 #undef TILEY_LOW_BITS_NEON4
478 
479 #undef MAKENAME
480 #undef TILEX_PROCF
481 #undef TILEY_PROCF
482 #ifdef CHECK_FOR_DECAL
483     #undef CHECK_FOR_DECAL
484 #endif
485 
486 #undef SCALE_NOFILTER_NAME
487 #undef SCALE_FILTER_NAME
488 #undef AFFINE_NOFILTER_NAME
489 #undef AFFINE_FILTER_NAME
490 #undef PERSP_NOFILTER_NAME
491 #undef PERSP_FILTER_NAME
492 
493 #undef PREAMBLE
494 #undef PREAMBLE_PARAM_X
495 #undef PREAMBLE_PARAM_Y
496 #undef PREAMBLE_ARG_X
497 #undef PREAMBLE_ARG_Y
498 
499 #undef TILEX_LOW_BITS
500 #undef TILEY_LOW_BITS
501