• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include <arm_neon.h>
9 
10 #define SCALE_NOFILTER_NAME     MAKENAME(_nofilter_scale)
11 #define SCALE_FILTER_NAME       MAKENAME(_filter_scale)
12 
13 #define PACK_FILTER_X_NAME  MAKENAME(_pack_filter_x)
14 #define PACK_FILTER_Y_NAME  MAKENAME(_pack_filter_y)
15 #define PACK_FILTER_X4_NAME MAKENAME(_pack_filter_x4)
16 #define PACK_FILTER_Y4_NAME MAKENAME(_pack_filter_y4)
17 
18 #ifndef PREAMBLE
19     #define PREAMBLE(state)
20     #define PREAMBLE_PARAM_X
21     #define PREAMBLE_PARAM_Y
22     #define PREAMBLE_ARG_X
23     #define PREAMBLE_ARG_Y
24 #endif
25 
SCALE_NOFILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)26 static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s,
27                                 uint32_t xy[], int count, int x, int y) {
28     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
29                              SkMatrix::kScale_Mask)) == 0);
30 
31     PREAMBLE(s);
32 
33     // we store y, x, x, x, x, x
34     const unsigned maxX = s.fPixmap.width() - 1;
35     SkFractionalInt fx;
36     {
37         const SkBitmapProcStateAutoMapper mapper(s, x, y);
38         const unsigned maxY = s.fPixmap.height() - 1;
39         *xy++ = TILEY_PROCF(mapper.fixedY(), maxY);
40         fx = mapper.fractionalIntX();
41     }
42 
43     if (0 == maxX) {
44         // all of the following X values must be 0
45         memset(xy, 0, count * sizeof(uint16_t));
46         return;
47     }
48 
49     const SkFractionalInt dx = s.fInvSxFractionalInt;
50 
51 #ifdef CHECK_FOR_DECAL
52     // test if we don't need to apply the tile proc
53     const SkFixed fixedFx = SkFractionalIntToFixed(fx);
54     const SkFixed fixedDx = SkFractionalIntToFixed(dx);
55     if (can_truncate_to_fixed_for_decal(fixedFx, fixedDx, count, maxX)) {
56         decal_nofilter_scale_neon(xy, fixedFx, fixedDx, count);
57         return;
58     }
59 #endif
60 
61     if (count >= 8) {
62         SkFractionalInt dx2 = dx+dx;
63         SkFractionalInt dx4 = dx2+dx2;
64         SkFractionalInt dx8 = dx4+dx4;
65 
66         // now build fx/fx+dx/fx+2dx/fx+3dx
67         SkFractionalInt fx1, fx2, fx3;
68         int32x4_t lbase, hbase;
69         int16_t *dst16 = (int16_t *)xy;
70 
71         fx1 = fx+dx;
72         fx2 = fx1+dx;
73         fx3 = fx2+dx;
74 
75         lbase = vdupq_n_s32(SkFractionalIntToFixed(fx));
76         lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx1), lbase, 1);
77         lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx2), lbase, 2);
78         lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx3), lbase, 3);
79         hbase = vaddq_s32(lbase, vdupq_n_s32(SkFractionalIntToFixed(dx4)));
80 
81         // store & bump
82         while (count >= 8) {
83 
84             int16x8_t fx8;
85 
86             fx8 = TILEX_PROCF_NEON8(lbase, hbase, maxX);
87 
88             vst1q_s16(dst16, fx8);
89 
90             // but preserving base & on to the next
91             lbase = vaddq_s32 (lbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
92             hbase = vaddq_s32 (hbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
93             dst16 += 8;
94             count -= 8;
95             fx += dx8;
96         };
97         xy = (uint32_t *) dst16;
98     }
99 
100     uint16_t* xx = (uint16_t*)xy;
101     for (int i = count; i > 0; --i) {
102         *xx++ = TILEX_PROCF(SkFractionalIntToFixed(fx), maxX);
103         fx += dx;
104     }
105 }
106 
PACK_FILTER_Y_NAME(SkFixed f,unsigned max,SkFixed one PREAMBLE_PARAM_Y)107 static inline uint32_t PACK_FILTER_Y_NAME(SkFixed f, unsigned max,
108                                           SkFixed one PREAMBLE_PARAM_Y) {
109     unsigned i = TILEY_PROCF(f, max);
110     i = (i << 4) | EXTRACT_LOW_BITS(f, max);
111     return (i << 14) | (TILEY_PROCF((f + one), max));
112 }
113 
PACK_FILTER_X_NAME(SkFixed f,unsigned max,SkFixed one PREAMBLE_PARAM_X)114 static inline uint32_t PACK_FILTER_X_NAME(SkFixed f, unsigned max,
115                                           SkFixed one PREAMBLE_PARAM_X) {
116     unsigned i = TILEX_PROCF(f, max);
117     i = (i << 4) | EXTRACT_LOW_BITS(f, max);
118     return (i << 14) | (TILEX_PROCF((f + one), max));
119 }
120 
PACK_FILTER_X4_NAME(int32x4_t f,unsigned max,SkFixed one PREAMBLE_PARAM_X)121 static inline int32x4_t PACK_FILTER_X4_NAME(int32x4_t f, unsigned max,
122                                           SkFixed one PREAMBLE_PARAM_X) {
123     int32x4_t ret, res, wide_one;
124 
125     // Prepare constants
126     wide_one = vdupq_n_s32(one);
127 
128     // Step 1
129     res = TILEX_PROCF_NEON4(f, max);
130 
131     // Step 2
132     ret = EXTRACT_LOW_BITS_NEON4(f, max);
133     ret = vsliq_n_s32(ret, res, 4);
134 
135     // Step 3
136     res = TILEX_PROCF_NEON4(f + wide_one, max);
137     ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
138 
139     return ret;
140 }
141 
PACK_FILTER_Y4_NAME(int32x4_t f,unsigned max,SkFixed one PREAMBLE_PARAM_X)142 static inline int32x4_t PACK_FILTER_Y4_NAME(int32x4_t f, unsigned max,
143                                           SkFixed one PREAMBLE_PARAM_X) {
144     int32x4_t ret, res, wide_one;
145 
146     // Prepare constants
147     wide_one = vdupq_n_s32(one);
148 
149     // Step 1
150     res = TILEY_PROCF_NEON4(f, max);
151 
152     // Step 2
153     ret = EXTRACT_LOW_BITS_NEON4(f, max);
154     ret = vsliq_n_s32(ret, res, 4);
155 
156     // Step 3
157     res = TILEY_PROCF_NEON4(f + wide_one, max);
158     ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
159 
160     return ret;
161 }
162 
SCALE_FILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)163 static void SCALE_FILTER_NAME(const SkBitmapProcState& s,
164                               uint32_t xy[], int count, int x, int y) {
165     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
166                              SkMatrix::kScale_Mask)) == 0);
167     SkASSERT(s.fInvKy == 0);
168 
169     PREAMBLE(s);
170 
171     const unsigned maxX = s.fPixmap.width() - 1;
172     const SkFixed one = s.fFilterOneX;
173     const SkFractionalInt dx = s.fInvSxFractionalInt;
174     SkFractionalInt fx;
175 
176     {
177         const SkBitmapProcStateAutoMapper mapper(s, x, y);
178         const SkFixed fy = mapper.fixedY();
179         const unsigned maxY = s.fPixmap.height() - 1;
180         // compute our two Y values up front
181         *xy++ = PACK_FILTER_Y_NAME(fy, maxY, s.fFilterOneY PREAMBLE_ARG_Y);
182         // now initialize fx
183         fx = mapper.fractionalIntX();
184     }
185 
186 #ifdef CHECK_FOR_DECAL
187     // test if we don't need to apply the tile proc
188     const SkFixed fixedFx = SkFractionalIntToFixed(fx);
189     const SkFixed fixedDx = SkFractionalIntToFixed(dx);
190     if (can_truncate_to_fixed_for_decal(fixedFx, fixedDx, count, maxX)) {
191         decal_filter_scale_neon(xy, fixedFx, fixedDx, count);
192         return;
193     }
194 #endif
195     {
196 
197     if (count >= 4) {
198         int32x4_t wide_fx;
199 
200         wide_fx = vdupq_n_s32(SkFractionalIntToFixed(fx));
201         wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), wide_fx, 1);
202         wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), wide_fx, 2);
203         wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), wide_fx, 3);
204 
205         while (count >= 4) {
206             int32x4_t res;
207 
208             res = PACK_FILTER_X4_NAME(wide_fx, maxX, one PREAMBLE_ARG_X);
209 
210             vst1q_u32(xy, vreinterpretq_u32_s32(res));
211 
212             wide_fx += vdupq_n_s32(SkFractionalIntToFixed(dx+dx+dx+dx));
213             fx += dx+dx+dx+dx;
214             xy += 4;
215             count -= 4;
216         }
217     }
218 
219     while (--count >= 0) {
220         *xy++ = PACK_FILTER_X_NAME(SkFractionalIntToFixed(fx), maxX, one PREAMBLE_ARG_X);
221         fx += dx;
222     }
223 
224     }
225 }
226 
227 const SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = {
228     SCALE_NOFILTER_NAME,
229     SCALE_FILTER_NAME,
230 };
231 
232 #undef TILEX_PROCF_NEON8
233 #undef TILEY_PROCF_NEON8
234 #undef TILEX_PROCF_NEON4
235 #undef TILEY_PROCF_NEON4
236 #undef EXTRACT_LOW_BITS_NEON4
237 
238 #undef MAKENAME
239 #undef TILEX_PROCF
240 #undef TILEY_PROCF
241 #ifdef CHECK_FOR_DECAL
242     #undef CHECK_FOR_DECAL
243 #endif
244 
245 #undef SCALE_NOFILTER_NAME
246 #undef SCALE_FILTER_NAME
247 
248 #undef PREAMBLE
249 #undef PREAMBLE_PARAM_X
250 #undef PREAMBLE_PARAM_Y
251 #undef PREAMBLE_ARG_X
252 #undef PREAMBLE_ARG_Y
253 
254 #undef EXTRACT_LOW_BITS
255