• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* NEON optimized code (C) COPYRIGHT 2009 Motorola
2  *
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6 
7 #include "SkBitmapProcState.h"
8 #include "SkPerspIter.h"
9 #include "SkShader.h"
10 #include "SkUtils.h"
11 
12 /*  returns 0...(n-1) given any x (positive or negative).
13 
14     As an example, if n (which is always positive) is 5...
15 
16           x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
17     returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
18  */
sk_int_mod(int x,int n)19 static inline int sk_int_mod(int x, int n) {
20     SkASSERT(n > 0);
21     if ((unsigned)x >= (unsigned)n) {
22         if (x < 0) {
23             x = n + ~(~x % n);
24         } else {
25             x = x % n;
26         }
27     }
28     return x;
29 }
30 
31 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
32 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
33 
34 #define MAKENAME(suffix)        ClampX_ClampY ## suffix
35 #define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
36 #define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
37 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
38 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
39 #define CHECK_FOR_DECAL
40 #if	defined(__ARM_HAVE_NEON)
41     #include "SkBitmapProcState_matrix_clamp.h"
42 #else
43     #include "SkBitmapProcState_matrix.h"
44 #endif
45 
46 #define MAKENAME(suffix)        RepeatX_RepeatY ## suffix
47 #define TILEX_PROCF(fx, max)    (((fx) & 0xFFFF) * ((max) + 1) >> 16)
48 #define TILEY_PROCF(fy, max)    (((fy) & 0xFFFF) * ((max) + 1) >> 16)
49 #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
50 #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
51 #if	defined(__ARM_HAVE_NEON)
52     #include "SkBitmapProcState_matrix_repeat.h"
53 #else
54     #include "SkBitmapProcState_matrix.h"
55 #endif
56 
57 #define MAKENAME(suffix)        GeneralXY ## suffix
58 #define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; \
59                                 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY
60 #define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX
61 #define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY
62 #define PREAMBLE_ARG_X          , tileProcX
63 #define PREAMBLE_ARG_Y          , tileProcY
64 #define TILEX_PROCF(fx, max)    (tileProcX(fx) * ((max) + 1) >> 16)
65 #define TILEY_PROCF(fy, max)    (tileProcY(fy) * ((max) + 1) >> 16)
66 #define TILEX_LOW_BITS(fx, max) ((tileProcX(fx) * ((max) + 1) >> 12) & 0xF)
67 #define TILEY_LOW_BITS(fy, max) ((tileProcY(fy) * ((max) + 1) >> 12) & 0xF)
68 #include "SkBitmapProcState_matrix.h"
69 
fixed_clamp(SkFixed x)70 static inline U16CPU fixed_clamp(SkFixed x)
71 {
72 #ifdef SK_CPU_HAS_CONDITIONAL_INSTR
73     if (x >> 16)
74         x = 0xFFFF;
75     if (x < 0)
76         x = 0;
77 #else
78     if (x >> 16)
79     {
80         if (x < 0)
81             x = 0;
82         else
83             x = 0xFFFF;
84     }
85 #endif
86     return x;
87 }
88 
fixed_repeat(SkFixed x)89 static inline U16CPU fixed_repeat(SkFixed x)
90 {
91     return x & 0xFFFF;
92 }
93 
fixed_mirror(SkFixed x)94 static inline U16CPU fixed_mirror(SkFixed x)
95 {
96     SkFixed s = x << 15 >> 31;
97     // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
98     return (x ^ s) & 0xFFFF;
99 }
100 
choose_tile_proc(unsigned m)101 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m)
102 {
103     if (SkShader::kClamp_TileMode == m)
104         return fixed_clamp;
105     if (SkShader::kRepeat_TileMode == m)
106         return fixed_repeat;
107     SkASSERT(SkShader::kMirror_TileMode == m);
108     return fixed_mirror;
109 }
110 
int_clamp(int x,int n)111 static inline U16CPU int_clamp(int x, int n) {
112 #ifdef SK_CPU_HAS_CONDITIONAL_INSTR
113     if (x >= n)
114         x = n - 1;
115     if (x < 0)
116         x = 0;
117 #else
118     if ((unsigned)x >= (unsigned)n) {
119         if (x < 0) {
120             x = 0;
121         } else {
122             x = n - 1;
123         }
124     }
125 #endif
126     return x;
127 }
128 
int_repeat(int x,int n)129 static inline U16CPU int_repeat(int x, int n) {
130     return sk_int_mod(x, n);
131 }
132 
int_mirror(int x,int n)133 static inline U16CPU int_mirror(int x, int n) {
134     x = sk_int_mod(x, 2 * n);
135     if (x >= n) {
136         x = n + ~(x - n);
137     }
138     return x;
139 }
140 
141 #if 0
142 static void test_int_tileprocs() {
143     for (int i = -8; i <= 8; i++) {
144         SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
145     }
146 }
147 #endif
148 
choose_int_tile_proc(unsigned tm)149 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
150     if (SkShader::kClamp_TileMode == tm)
151         return int_clamp;
152     if (SkShader::kRepeat_TileMode == tm)
153         return int_repeat;
154     SkASSERT(SkShader::kMirror_TileMode == tm);
155     return int_mirror;
156 }
157 
158 //////////////////////////////////////////////////////////////////////////////
159 
decal_nofilter_scale(uint32_t dst[],SkFixed fx,SkFixed dx,int count)160 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
161 {
162     int i;
163 
164 #if	defined(__ARM_HAVE_NEON)
165     if (count >= 8) {
166         /* SkFixed is 16.16 fixed point */
167         SkFixed dx2 = dx+dx;
168         SkFixed dx4 = dx2+dx2;
169         SkFixed dx8 = dx4+dx4;
170 
171         /* now build fx/fx+dx/fx+2dx/fx+3dx */
172         SkFixed fx1, fx2, fx3;
173         int32x2_t lower, upper;
174         int32x4_t lbase, hbase;
175         uint16_t *dst16 = (uint16_t *)dst;
176 
177         fx1 = fx+dx;
178         fx2 = fx1+dx;
179         fx3 = fx2+dx;
180 
181         /* avoid an 'lbase unitialized' warning */
182         lbase = vdupq_n_s32(fx);
183         lbase = vsetq_lane_s32(fx1, lbase, 1);
184         lbase = vsetq_lane_s32(fx2, lbase, 2);
185         lbase = vsetq_lane_s32(fx3, lbase, 3);
186         hbase = vaddq_s32(lbase, vdupq_n_s32(dx4));
187 
188         /* take upper 16 of each, store, and bump everything */
189         do {
190             int32x4_t lout, hout;
191             uint16x8_t hi16;
192 
193             lout = lbase;
194             hout = hbase;
195             /* gets hi's of all louts then hi's of all houts */
196             asm ("vuzpq.16 %q0, %q1" : "+w" (lout), "+w" (hout));
197             hi16 = vreinterpretq_u16_s32(hout);
198             vst1q_u16(dst16, hi16);
199 
200             /* on to the next */
201             lbase = vaddq_s32 (lbase, vdupq_n_s32(dx8));
202             hbase = vaddq_s32 (hbase, vdupq_n_s32(dx8));
203             dst16 += 8;
204             count -= 8;
205             fx += dx8;
206         } while (count >= 8);
207         dst = (uint32_t *) dst16;
208     }
209 #else
210     for (i = (count >> 2); i > 0; --i)
211     {
212         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
213         fx += dx+dx;
214         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
215         fx += dx+dx;
216     }
217     count &= 3;
218 #endif
219 
220     uint16_t* xx = (uint16_t*)dst;
221     for (i = count; i > 0; --i) {
222         *xx++ = SkToU16(fx >> 16); fx += dx;
223     }
224 }
225 
decal_filter_scale(uint32_t dst[],SkFixed fx,SkFixed dx,int count)226 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
227 {
228 
229 #if	defined(__ARM_HAVE_NEON)
230     if (count >= 8) {
231         int32x4_t wide_fx;
232         int32x4_t wide_fx2;
233         int32x4_t wide_dx8 = vdupq_n_s32(dx*8);
234 
235         wide_fx = vdupq_n_s32(fx);
236         wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1);
237         wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2);
238         wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3);
239 
240         wide_fx2 = vaddq_s32(wide_fx, vdupq_n_s32(dx+dx+dx+dx));
241 
242         while (count >= 8) {
243             int32x4_t wide_out;
244             int32x4_t wide_out2;
245 
246             wide_out = vshlq_n_s32(vshrq_n_s32(wide_fx, 12), 14);
247             wide_out = vorrq_s32(wide_out,
248             vaddq_s32(vshrq_n_s32(wide_fx,16), vdupq_n_s32(1)));
249 
250             wide_out2 = vshlq_n_s32(vshrq_n_s32(wide_fx2, 12), 14);
251             wide_out2 = vorrq_s32(wide_out2,
252             vaddq_s32(vshrq_n_s32(wide_fx2,16), vdupq_n_s32(1)));
253 
254             vst1q_u32(dst, vreinterpretq_u32_s32(wide_out));
255             vst1q_u32(dst+4, vreinterpretq_u32_s32(wide_out2));
256 
257             dst += 8;
258             fx += dx*8;
259             wide_fx = vaddq_s32(wide_fx, wide_dx8);
260             wide_fx2 = vaddq_s32(wide_fx2, wide_dx8);
261             count -= 8;
262         }
263     }
264 #endif
265 
266     if (count & 1)
267     {
268         SkASSERT((fx >> (16 + 14)) == 0);
269         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
270         fx += dx;
271     }
272     while ((count -= 2) >= 0)
273     {
274         SkASSERT((fx >> (16 + 14)) == 0);
275         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
276         fx += dx;
277 
278         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
279         fx += dx;
280     }
281 }
282 
283 ///////////////////////////////////////////////////////////////////////////////
284 // stores the same as SCALE, but is cheaper to compute. Also since there is no
285 // scale, we don't need/have a FILTER version
286 
fill_sequential(uint16_t xptr[],int start,int count)287 static void fill_sequential(uint16_t xptr[], int start, int count) {
288 #if 1
289     if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
290         *xptr++ = start++;
291         count -= 1;
292     }
293     if (count > 3) {
294         uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
295         uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
296         uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
297         start += count & ~3;
298         int qcount = count >> 2;
299         do {
300             *xxptr++ = pattern0;
301             pattern0 += 0x40004;
302             *xxptr++ = pattern1;
303             pattern1 += 0x40004;
304         } while (--qcount != 0);
305         xptr = reinterpret_cast<uint16_t*>(xxptr);
306         count &= 3;
307     }
308     while (--count >= 0) {
309         *xptr++ = start++;
310     }
311 #else
312     for (int i = 0; i < count; i++) {
313         *xptr++ = start++;
314     }
315 #endif
316 }
317 
nofilter_trans_preamble(const SkBitmapProcState & s,uint32_t ** xy,int x,int y)318 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
319                                    int x, int y) {
320     SkPoint pt;
321     s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
322                SkIntToScalar(y) + SK_ScalarHalf, &pt);
323     **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16,
324                            s.fBitmap->height());
325     *xy += 1;   // bump the ptr
326     // return our starting X position
327     return SkScalarToFixed(pt.fX) >> 16;
328 }
329 
clampx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)330 static void clampx_nofilter_trans(const SkBitmapProcState& s,
331                                   uint32_t xy[], int count, int x, int y) {
332     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
333 
334     int xpos = nofilter_trans_preamble(s, &xy, x, y);
335     const int width = s.fBitmap->width();
336     if (1 == width) {
337         // all of the following X values must be 0
338         memset(xy, 0, count * sizeof(uint16_t));
339         return;
340     }
341 
342     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
343     int n;
344 
345     // fill before 0 as needed
346     if (xpos < 0) {
347         n = -xpos;
348         if (n > count) {
349             n = count;
350         }
351         memset(xptr, 0, n * sizeof(uint16_t));
352         count -= n;
353         if (0 == count) {
354             return;
355         }
356         xptr += n;
357         xpos = 0;
358     }
359 
360     // fill in 0..width-1 if needed
361     if (xpos < width) {
362         n = width - xpos;
363         if (n > count) {
364             n = count;
365         }
366         fill_sequential(xptr, xpos, n);
367         count -= n;
368         if (0 == count) {
369             return;
370         }
371         xptr += n;
372     }
373 
374     // fill the remaining with the max value
375     sk_memset16(xptr, width - 1, count);
376 }
377 
repeatx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)378 static void repeatx_nofilter_trans(const SkBitmapProcState& s,
379                                    uint32_t xy[], int count, int x, int y) {
380     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
381 
382     int xpos = nofilter_trans_preamble(s, &xy, x, y);
383     const int width = s.fBitmap->width();
384     if (1 == width) {
385         // all of the following X values must be 0
386         memset(xy, 0, count * sizeof(uint16_t));
387         return;
388     }
389 
390     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
391     int start = sk_int_mod(xpos, width);
392     int n = width - start;
393     if (n > count) {
394         n = count;
395     }
396     fill_sequential(xptr, start, n);
397     xptr += n;
398     count -= n;
399 
400     while (count >= width) {
401         fill_sequential(xptr, 0, width);
402         xptr += width;
403         count -= width;
404     }
405 
406     if (count > 0) {
407         fill_sequential(xptr, 0, count);
408     }
409 }
410 
fill_backwards(uint16_t xptr[],int pos,int count)411 static void fill_backwards(uint16_t xptr[], int pos, int count) {
412     for (int i = 0; i < count; i++) {
413         SkASSERT(pos >= 0);
414         xptr[i] = pos--;
415     }
416 }
417 
mirrorx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)418 static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
419                                    uint32_t xy[], int count, int x, int y) {
420     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
421 
422     int xpos = nofilter_trans_preamble(s, &xy, x, y);
423     const int width = s.fBitmap->width();
424     if (1 == width) {
425         // all of the following X values must be 0
426         memset(xy, 0, count * sizeof(uint16_t));
427         return;
428     }
429 
430     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
431     // need to know our start, and our initial phase (forward or backward)
432     bool forward;
433     int n;
434     int start = sk_int_mod(xpos, 2 * width);
435     if (start >= width) {
436         start = width + ~(start - width);
437         forward = false;
438         n = start + 1;  // [start .. 0]
439     } else {
440         forward = true;
441         n = width - start;  // [start .. width)
442     }
443     if (n > count) {
444         n = count;
445     }
446     if (forward) {
447         fill_sequential(xptr, start, n);
448     } else {
449         fill_backwards(xptr, start, n);
450     }
451     forward = !forward;
452     xptr += n;
453     count -= n;
454 
455     while (count >= width) {
456         if (forward) {
457             fill_sequential(xptr, 0, width);
458         } else {
459             fill_backwards(xptr, width - 1, width);
460         }
461         forward = !forward;
462         xptr += width;
463         count -= width;
464     }
465 
466     if (count > 0) {
467         if (forward) {
468             fill_sequential(xptr, 0, count);
469         } else {
470             fill_backwards(xptr, width - 1, count);
471         }
472     }
473 }
474 
475 ///////////////////////////////////////////////////////////////////////////////
476 
477 SkBitmapProcState::MatrixProc
chooseMatrixProc(bool trivial_matrix)478 SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
479 //    test_int_tileprocs();
480     // check for our special case when there is no scale/affine/perspective
481     if (trivial_matrix) {
482         SkASSERT(!fDoFilter);
483         fIntTileProcY = choose_int_tile_proc(fTileModeY);
484         switch (fTileModeX) {
485             case SkShader::kClamp_TileMode:
486                 return clampx_nofilter_trans;
487             case SkShader::kRepeat_TileMode:
488                 return repeatx_nofilter_trans;
489             case SkShader::kMirror_TileMode:
490                 return mirrorx_nofilter_trans;
491         }
492     }
493 
494     int index = 0;
495     if (fDoFilter) {
496         index = 1;
497     }
498     if (fInvType & SkMatrix::kPerspective_Mask) {
499         index += 4;
500     } else if (fInvType & SkMatrix::kAffine_Mask) {
501         index += 2;
502     }
503 
504     if (SkShader::kClamp_TileMode == fTileModeX &&
505         SkShader::kClamp_TileMode == fTileModeY)
506     {
507         // clamp gets special version of filterOne
508         fFilterOneX = SK_Fixed1;
509         fFilterOneY = SK_Fixed1;
510         return ClampX_ClampY_Procs[index];
511     }
512 
513     // all remaining procs use this form for filterOne
514     fFilterOneX = SK_Fixed1 / fBitmap->width();
515     fFilterOneY = SK_Fixed1 / fBitmap->height();
516 
517     if (SkShader::kRepeat_TileMode == fTileModeX &&
518         SkShader::kRepeat_TileMode == fTileModeY)
519     {
520         return RepeatX_RepeatY_Procs[index];
521     }
522 
523     fTileProcX = choose_tile_proc(fTileModeX);
524     fTileProcY = choose_tile_proc(fTileModeY);
525     return GeneralXY_Procs[index];
526 }
527 
528