• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* NEON optimized code (C) COPYRIGHT 2009 Motorola */
2 
3 #include "SkBitmapProcState.h"
4 #include "SkPerspIter.h"
5 #include "SkShader.h"
6 #include "SkUtils.h"
7 
8 /*  returns 0...(n-1) given any x (positive or negative).
9 
10     As an example, if n (which is always positive) is 5...
11 
12           x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
13     returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
14  */
sk_int_mod(int x,int n)15 static inline int sk_int_mod(int x, int n) {
16     SkASSERT(n > 0);
17     if ((unsigned)x >= (unsigned)n) {
18         if (x < 0) {
19             x = n + ~(~x % n);
20         } else {
21             x = x % n;
22         }
23     }
24     return x;
25 }
26 
27 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
28 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
29 
30 #define MAKENAME(suffix)        ClampX_ClampY ## suffix
31 #define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
32 #define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
33 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
34 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
35 #define CHECK_FOR_DECAL
36 #if	defined(__ARM_HAVE_NEON)
37     #include "SkBitmapProcState_matrix_clamp.h"
38 #else
39     #include "SkBitmapProcState_matrix.h"
40 #endif
41 
42 #define MAKENAME(suffix)        RepeatX_RepeatY ## suffix
43 #define TILEX_PROCF(fx, max)    (((fx) & 0xFFFF) * ((max) + 1) >> 16)
44 #define TILEY_PROCF(fy, max)    (((fy) & 0xFFFF) * ((max) + 1) >> 16)
45 #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
46 #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
47 #if	defined(__ARM_HAVE_NEON)
48     #include "SkBitmapProcState_matrix_repeat.h"
49 #else
50     #include "SkBitmapProcState_matrix.h"
51 #endif
52 
53 #define MAKENAME(suffix)        GeneralXY ## suffix
54 #define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; \
55                                 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY
56 #define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX
57 #define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY
58 #define PREAMBLE_ARG_X          , tileProcX
59 #define PREAMBLE_ARG_Y          , tileProcY
60 #define TILEX_PROCF(fx, max)    (tileProcX(fx) * ((max) + 1) >> 16)
61 #define TILEY_PROCF(fy, max)    (tileProcY(fy) * ((max) + 1) >> 16)
62 #define TILEX_LOW_BITS(fx, max) ((tileProcX(fx) * ((max) + 1) >> 12) & 0xF)
63 #define TILEY_LOW_BITS(fy, max) ((tileProcY(fy) * ((max) + 1) >> 12) & 0xF)
64 #include "SkBitmapProcState_matrix.h"
65 
fixed_clamp(SkFixed x)66 static inline U16CPU fixed_clamp(SkFixed x)
67 {
68 #ifdef SK_CPU_HAS_CONDITIONAL_INSTR
69     if (x >> 16)
70         x = 0xFFFF;
71     if (x < 0)
72         x = 0;
73 #else
74     if (x >> 16)
75     {
76         if (x < 0)
77             x = 0;
78         else
79             x = 0xFFFF;
80     }
81 #endif
82     return x;
83 }
84 
fixed_repeat(SkFixed x)85 static inline U16CPU fixed_repeat(SkFixed x)
86 {
87     return x & 0xFFFF;
88 }
89 
fixed_mirror(SkFixed x)90 static inline U16CPU fixed_mirror(SkFixed x)
91 {
92     SkFixed s = x << 15 >> 31;
93     // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
94     return (x ^ s) & 0xFFFF;
95 }
96 
choose_tile_proc(unsigned m)97 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m)
98 {
99     if (SkShader::kClamp_TileMode == m)
100         return fixed_clamp;
101     if (SkShader::kRepeat_TileMode == m)
102         return fixed_repeat;
103     SkASSERT(SkShader::kMirror_TileMode == m);
104     return fixed_mirror;
105 }
106 
int_clamp(int x,int n)107 static inline U16CPU int_clamp(int x, int n) {
108 #ifdef SK_CPU_HAS_CONDITIONAL_INSTR
109     if (x >= n)
110         x = n - 1;
111     if (x < 0)
112         x = 0;
113 #else
114     if ((unsigned)x >= (unsigned)n) {
115         if (x < 0) {
116             x = 0;
117         } else {
118             x = n - 1;
119         }
120     }
121 #endif
122     return x;
123 }
124 
int_repeat(int x,int n)125 static inline U16CPU int_repeat(int x, int n) {
126     return sk_int_mod(x, n);
127 }
128 
int_mirror(int x,int n)129 static inline U16CPU int_mirror(int x, int n) {
130     x = sk_int_mod(x, 2 * n);
131     if (x >= n) {
132         x = n + ~(x - n);
133     }
134     return x;
135 }
136 
137 #if 0
138 static void test_int_tileprocs() {
139     for (int i = -8; i <= 8; i++) {
140         SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
141     }
142 }
143 #endif
144 
choose_int_tile_proc(unsigned tm)145 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
146     if (SkShader::kClamp_TileMode == tm)
147         return int_clamp;
148     if (SkShader::kRepeat_TileMode == tm)
149         return int_repeat;
150     SkASSERT(SkShader::kMirror_TileMode == tm);
151     return int_mirror;
152 }
153 
154 //////////////////////////////////////////////////////////////////////////////
155 
decal_nofilter_scale(uint32_t dst[],SkFixed fx,SkFixed dx,int count)156 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
157 {
158     int i;
159 
160 #if	defined(__ARM_HAVE_NEON)
161     if (count >= 8) {
162         /* SkFixed is 16.16 fixed point */
163         SkFixed dx2 = dx+dx;
164         SkFixed dx4 = dx2+dx2;
165         SkFixed dx8 = dx4+dx4;
166 
167         /* now build fx/fx+dx/fx+2dx/fx+3dx */
168         SkFixed fx1, fx2, fx3;
169         int32x2_t lower, upper;
170         int32x4_t lbase, hbase;
171         uint16_t *dst16 = (uint16_t *)dst;
172 
173         fx1 = fx+dx;
174         fx2 = fx1+dx;
175         fx3 = fx2+dx;
176 
177         /* avoid an 'lbase unitialized' warning */
178         lbase = vdupq_n_s32(fx);
179         lbase = vsetq_lane_s32(fx1, lbase, 1);
180         lbase = vsetq_lane_s32(fx2, lbase, 2);
181         lbase = vsetq_lane_s32(fx3, lbase, 3);
182         hbase = vaddq_s32(lbase, vdupq_n_s32(dx4));
183 
184         /* take upper 16 of each, store, and bump everything */
185         do {
186             int32x4_t lout, hout;
187             uint16x8_t hi16;
188 
189             lout = lbase;
190             hout = hbase;
191             /* gets hi's of all louts then hi's of all houts */
192             asm ("vuzpq.16 %q0, %q1" : "+w" (lout), "+w" (hout));
193             hi16 = vreinterpretq_u16_s32(hout);
194             vst1q_u16(dst16, hi16);
195 
196             /* on to the next */
197             lbase = vaddq_s32 (lbase, vdupq_n_s32(dx8));
198             hbase = vaddq_s32 (hbase, vdupq_n_s32(dx8));
199             dst16 += 8;
200             count -= 8;
201             fx += dx8;
202         } while (count >= 8);
203         dst = (uint32_t *) dst16;
204     }
205 #else
206     for (i = (count >> 2); i > 0; --i)
207     {
208         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
209         fx += dx+dx;
210         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
211         fx += dx+dx;
212     }
213     count &= 3;
214 #endif
215 
216     uint16_t* xx = (uint16_t*)dst;
217     for (i = count; i > 0; --i) {
218         *xx++ = SkToU16(fx >> 16); fx += dx;
219     }
220 }
221 
decal_filter_scale(uint32_t dst[],SkFixed fx,SkFixed dx,int count)222 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
223 {
224 
225 #if	defined(__ARM_HAVE_NEON)
226     if (count >= 8) {
227         int32x4_t wide_fx;
228         int32x4_t wide_fx2;
229         int32x4_t wide_dx8 = vdupq_n_s32(dx*8);
230 
231         wide_fx = vdupq_n_s32(fx);
232         wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1);
233         wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2);
234         wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3);
235 
236         wide_fx2 = vaddq_s32(wide_fx, vdupq_n_s32(dx+dx+dx+dx));
237 
238         while (count >= 8) {
239             int32x4_t wide_out;
240             int32x4_t wide_out2;
241 
242             wide_out = vshlq_n_s32(vshrq_n_s32(wide_fx, 12), 14);
243             wide_out = vorrq_s32(wide_out,
244             vaddq_s32(vshrq_n_s32(wide_fx,16), vdupq_n_s32(1)));
245 
246             wide_out2 = vshlq_n_s32(vshrq_n_s32(wide_fx2, 12), 14);
247             wide_out2 = vorrq_s32(wide_out2,
248             vaddq_s32(vshrq_n_s32(wide_fx2,16), vdupq_n_s32(1)));
249 
250             vst1q_u32(dst, vreinterpretq_u32_s32(wide_out));
251             vst1q_u32(dst+4, vreinterpretq_u32_s32(wide_out2));
252 
253             dst += 8;
254             fx += dx*8;
255             wide_fx = vaddq_s32(wide_fx, wide_dx8);
256             wide_fx2 = vaddq_s32(wide_fx2, wide_dx8);
257             count -= 8;
258         }
259     }
260 #endif
261 
262     if (count & 1)
263     {
264         SkASSERT((fx >> (16 + 14)) == 0);
265         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
266         fx += dx;
267     }
268     while ((count -= 2) >= 0)
269     {
270         SkASSERT((fx >> (16 + 14)) == 0);
271         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
272         fx += dx;
273 
274         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
275         fx += dx;
276     }
277 }
278 
279 ///////////////////////////////////////////////////////////////////////////////
280 // stores the same as SCALE, but is cheaper to compute. Also since there is no
281 // scale, we don't need/have a FILTER version
282 
fill_sequential(uint16_t xptr[],int start,int count)283 static void fill_sequential(uint16_t xptr[], int start, int count) {
284 #if 1
285     if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
286         *xptr++ = start++;
287         count -= 1;
288     }
289     if (count > 3) {
290         uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
291         uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
292         uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
293         start += count & ~3;
294         int qcount = count >> 2;
295         do {
296             *xxptr++ = pattern0;
297             pattern0 += 0x40004;
298             *xxptr++ = pattern1;
299             pattern1 += 0x40004;
300         } while (--qcount != 0);
301         xptr = reinterpret_cast<uint16_t*>(xxptr);
302         count &= 3;
303     }
304     while (--count >= 0) {
305         *xptr++ = start++;
306     }
307 #else
308     for (int i = 0; i < count; i++) {
309         *xptr++ = start++;
310     }
311 #endif
312 }
313 
nofilter_trans_preamble(const SkBitmapProcState & s,uint32_t ** xy,int x,int y)314 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
315                                    int x, int y) {
316     SkPoint pt;
317     s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
318                SkIntToScalar(y) + SK_ScalarHalf, &pt);
319     **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16,
320                            s.fBitmap->height());
321     *xy += 1;   // bump the ptr
322     // return our starting X position
323     return SkScalarToFixed(pt.fX) >> 16;
324 }
325 
clampx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)326 static void clampx_nofilter_trans(const SkBitmapProcState& s,
327                                   uint32_t xy[], int count, int x, int y) {
328     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
329 
330     int xpos = nofilter_trans_preamble(s, &xy, x, y);
331     const int width = s.fBitmap->width();
332     if (1 == width) {
333         // all of the following X values must be 0
334         memset(xy, 0, count * sizeof(uint16_t));
335         return;
336     }
337 
338     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
339     int n;
340 
341     // fill before 0 as needed
342     if (xpos < 0) {
343         n = -xpos;
344         if (n > count) {
345             n = count;
346         }
347         memset(xptr, 0, n * sizeof(uint16_t));
348         count -= n;
349         if (0 == count) {
350             return;
351         }
352         xptr += n;
353         xpos = 0;
354     }
355 
356     // fill in 0..width-1 if needed
357     if (xpos < width) {
358         n = width - xpos;
359         if (n > count) {
360             n = count;
361         }
362         fill_sequential(xptr, xpos, n);
363         count -= n;
364         if (0 == count) {
365             return;
366         }
367         xptr += n;
368     }
369 
370     // fill the remaining with the max value
371     sk_memset16(xptr, width - 1, count * sizeof(uint16_t));
372 }
373 
repeatx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)374 static void repeatx_nofilter_trans(const SkBitmapProcState& s,
375                                    uint32_t xy[], int count, int x, int y) {
376     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
377 
378     int xpos = nofilter_trans_preamble(s, &xy, x, y);
379     const int width = s.fBitmap->width();
380     if (1 == width) {
381         // all of the following X values must be 0
382         memset(xy, 0, count * sizeof(uint16_t));
383         return;
384     }
385 
386     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
387     int start = sk_int_mod(xpos, width);
388     int n = width - start;
389     if (n > count) {
390         n = count;
391     }
392     fill_sequential(xptr, start, n);
393     xptr += n;
394     count -= n;
395 
396     while (count >= width) {
397         fill_sequential(xptr, 0, width);
398         xptr += width;
399         count -= width;
400     }
401 
402     if (count > 0) {
403         fill_sequential(xptr, 0, count);
404     }
405 }
406 
fill_backwards(uint16_t xptr[],int pos,int count)407 static void fill_backwards(uint16_t xptr[], int pos, int count) {
408     for (int i = 0; i < count; i++) {
409         SkASSERT(pos >= 0);
410         xptr[i] = pos--;
411     }
412 }
413 
mirrorx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)414 static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
415                                    uint32_t xy[], int count, int x, int y) {
416     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
417 
418     int xpos = nofilter_trans_preamble(s, &xy, x, y);
419     const int width = s.fBitmap->width();
420     if (1 == width) {
421         // all of the following X values must be 0
422         memset(xy, 0, count * sizeof(uint16_t));
423         return;
424     }
425 
426     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
427     // need to know our start, and our initial phase (forward or backward)
428     bool forward;
429     int n;
430     int start = sk_int_mod(xpos, 2 * width);
431     if (start >= width) {
432         start = width + ~(start - width);
433         forward = false;
434         n = start + 1;  // [start .. 0]
435     } else {
436         forward = true;
437         n = width - start;  // [start .. width)
438     }
439     if (n > count) {
440         n = count;
441     }
442     if (forward) {
443         fill_sequential(xptr, start, n);
444     } else {
445         fill_backwards(xptr, start, n);
446     }
447     forward = !forward;
448     xptr += n;
449     count -= n;
450 
451     while (count >= width) {
452         if (forward) {
453             fill_sequential(xptr, 0, width);
454         } else {
455             fill_backwards(xptr, width - 1, width);
456         }
457         forward = !forward;
458         xptr += width;
459         count -= width;
460     }
461 
462     if (count > 0) {
463         if (forward) {
464             fill_sequential(xptr, 0, count);
465         } else {
466             fill_backwards(xptr, width - 1, count);
467         }
468     }
469 }
470 
471 ///////////////////////////////////////////////////////////////////////////////
472 
473 SkBitmapProcState::MatrixProc
chooseMatrixProc(bool trivial_matrix)474 SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
475 //    test_int_tileprocs();
476     // check for our special case when there is no scale/affine/perspective
477     if (trivial_matrix) {
478         SkASSERT(!fDoFilter);
479         fIntTileProcY = choose_int_tile_proc(fTileModeY);
480         switch (fTileModeX) {
481             case SkShader::kClamp_TileMode:
482                 return clampx_nofilter_trans;
483             case SkShader::kRepeat_TileMode:
484                 return repeatx_nofilter_trans;
485             case SkShader::kMirror_TileMode:
486                 return mirrorx_nofilter_trans;
487         }
488     }
489 
490     int index = 0;
491     if (fDoFilter) {
492         index = 1;
493     }
494     if (fInvType & SkMatrix::kPerspective_Mask) {
495         index += 4;
496     } else if (fInvType & SkMatrix::kAffine_Mask) {
497         index += 2;
498     }
499 
500     if (SkShader::kClamp_TileMode == fTileModeX &&
501         SkShader::kClamp_TileMode == fTileModeY)
502     {
503         // clamp gets special version of filterOne
504         fFilterOneX = SK_Fixed1;
505         fFilterOneY = SK_Fixed1;
506         return ClampX_ClampY_Procs[index];
507     }
508 
509     // all remaining procs use this form for filterOne
510     fFilterOneX = SK_Fixed1 / fBitmap->width();
511     fFilterOneY = SK_Fixed1 / fBitmap->height();
512 
513     if (SkShader::kRepeat_TileMode == fTileModeX &&
514         SkShader::kRepeat_TileMode == fTileModeY)
515     {
516         return RepeatX_RepeatY_Procs[index];
517     }
518 
519     fTileProcX = choose_tile_proc(fTileModeX);
520     fTileProcY = choose_tile_proc(fTileModeY);
521     return GeneralXY_Procs[index];
522 }
523 
524