1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20
21 namespace android {
22 namespace renderscript {
23
24
25 class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic {
26 public:
27 void populateScript(Script *) override;
28 void invokeFreeChildren() override;
29
30 void setGlobalObj(uint32_t slot, ObjectBase *data) override;
31
32 ~RsdCpuScriptIntrinsicResize() override;
33 RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
34
35 void preLaunch(uint32_t slot, const Allocation ** ains,
36 uint32_t inLen, Allocation * aout, const void * usr,
37 uint32_t usrLen, const RsScriptCall *sc) override;
38
39 float scaleX;
40 float scaleY;
41
42 protected:
43 ObjectBaseRef<const Allocation> mAlloc;
44 ObjectBaseRef<const Element> mElement;
45
46 static void kernelU1(const RsExpandKernelDriverInfo *info,
47 uint32_t xstart, uint32_t xend,
48 uint32_t outstep);
49 static void kernelU2(const RsExpandKernelDriverInfo *info,
50 uint32_t xstart, uint32_t xend,
51 uint32_t outstep);
52 static void kernelU4(const RsExpandKernelDriverInfo *info,
53 uint32_t xstart, uint32_t xend,
54 uint32_t outstep);
55 static void kernelF1(const RsExpandKernelDriverInfo *info,
56 uint32_t xstart, uint32_t xend,
57 uint32_t outstep);
58 static void kernelF2(const RsExpandKernelDriverInfo *info,
59 uint32_t xstart, uint32_t xend,
60 uint32_t outstep);
61 static void kernelF4(const RsExpandKernelDriverInfo *info,
62 uint32_t xstart, uint32_t xend,
63 uint32_t outstep);
64 };
65
setGlobalObj(uint32_t slot,ObjectBase * data)66 void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) {
67 rsAssert(slot == 0);
68 mAlloc.set(static_cast<Allocation *>(data));
69 }
70
cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3,float x)71 static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) {
72 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
73 + x * (3.f * (p1 - p2) + p3 - p0)));
74 }
75
cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3,float x)76 static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) {
77 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
78 + x * (3.f * (p1 - p2) + p3 - p0)));
79 }
80
cubicInterpolate(float p0,float p1,float p2,float p3,float x)81 static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
82 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
83 + x * (3.f * (p1 - p2) + p3 - p0)));
84 }
85
OneBiCubic(const uchar4 * yp0,const uchar4 * yp1,const uchar4 * yp2,const uchar4 * yp3,float xf,float yf,int width)86 static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
87 float xf, float yf, int width) {
88 int startx = (int) floor(xf - 1);
89 xf = xf - floor(xf);
90 int maxx = width - 1;
91 int xs0 = rsMax(0, startx + 0);
92 int xs1 = rsMax(0, startx + 1);
93 int xs2 = rsMin(maxx, startx + 2);
94 int xs3 = rsMin(maxx, startx + 3);
95
96 float4 p0 = cubicInterpolate(convert_float4(yp0[xs0]),
97 convert_float4(yp0[xs1]),
98 convert_float4(yp0[xs2]),
99 convert_float4(yp0[xs3]), xf);
100
101 float4 p1 = cubicInterpolate(convert_float4(yp1[xs0]),
102 convert_float4(yp1[xs1]),
103 convert_float4(yp1[xs2]),
104 convert_float4(yp1[xs3]), xf);
105
106 float4 p2 = cubicInterpolate(convert_float4(yp2[xs0]),
107 convert_float4(yp2[xs1]),
108 convert_float4(yp2[xs2]),
109 convert_float4(yp2[xs3]), xf);
110
111 float4 p3 = cubicInterpolate(convert_float4(yp3[xs0]),
112 convert_float4(yp3[xs1]),
113 convert_float4(yp3[xs2]),
114 convert_float4(yp3[xs3]), xf);
115
116 float4 p = cubicInterpolate(p0, p1, p2, p3, yf);
117 p = clamp(p + 0.5f, 0.f, 255.f);
118 return convert_uchar4(p);
119 }
120
OneBiCubic(const uchar2 * yp0,const uchar2 * yp1,const uchar2 * yp2,const uchar2 * yp3,float xf,float yf,int width)121 static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3,
122 float xf, float yf, int width) {
123 int startx = (int) floor(xf - 1);
124 xf = xf - floor(xf);
125 int maxx = width - 1;
126 int xs0 = rsMax(0, startx + 0);
127 int xs1 = rsMax(0, startx + 1);
128 int xs2 = rsMin(maxx, startx + 2);
129 int xs3 = rsMin(maxx, startx + 3);
130
131 float2 p0 = cubicInterpolate(convert_float2(yp0[xs0]),
132 convert_float2(yp0[xs1]),
133 convert_float2(yp0[xs2]),
134 convert_float2(yp0[xs3]), xf);
135
136 float2 p1 = cubicInterpolate(convert_float2(yp1[xs0]),
137 convert_float2(yp1[xs1]),
138 convert_float2(yp1[xs2]),
139 convert_float2(yp1[xs3]), xf);
140
141 float2 p2 = cubicInterpolate(convert_float2(yp2[xs0]),
142 convert_float2(yp2[xs1]),
143 convert_float2(yp2[xs2]),
144 convert_float2(yp2[xs3]), xf);
145
146 float2 p3 = cubicInterpolate(convert_float2(yp3[xs0]),
147 convert_float2(yp3[xs1]),
148 convert_float2(yp3[xs2]),
149 convert_float2(yp3[xs3]), xf);
150
151 float2 p = cubicInterpolate(p0, p1, p2, p3, yf);
152 p = clamp(p + 0.5f, 0.f, 255.f);
153 return convert_uchar2(p);
154 }
155
OneBiCubic(const uchar * yp0,const uchar * yp1,const uchar * yp2,const uchar * yp3,float xf,float yf,int width)156 static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3,
157 float xf, float yf, int width) {
158 int startx = (int) floor(xf - 1);
159 xf = xf - floor(xf);
160 int maxx = width - 1;
161 int xs0 = rsMax(0, startx + 0);
162 int xs1 = rsMax(0, startx + 1);
163 int xs2 = rsMin(maxx, startx + 2);
164 int xs3 = rsMin(maxx, startx + 3);
165
166 float p0 = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1],
167 (float)yp0[xs2], (float)yp0[xs3], xf);
168 float p1 = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1],
169 (float)yp1[xs2], (float)yp1[xs3], xf);
170 float p2 = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1],
171 (float)yp2[xs2], (float)yp2[xs3], xf);
172 float p3 = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1],
173 (float)yp3[xs2], (float)yp3[xs3], xf);
174
175 float p = cubicInterpolate(p0, p1, p2, p3, yf);
176 p = clamp(p + 0.5f, 0.f, 255.f);
177 return (uchar)p;
178 }
179
180 extern "C" uint64_t rsdIntrinsicResize_oscctl_K(uint32_t xinc);
181
182 extern "C" void rsdIntrinsicResizeB4_K(
183 uchar4 *dst,
184 size_t count,
185 uint32_t xf,
186 uint32_t xinc,
187 uchar4 const *srcn,
188 uchar4 const *src0,
189 uchar4 const *src1,
190 uchar4 const *src2,
191 size_t xclip,
192 size_t avail,
193 uint64_t osc_ctl,
194 int32_t const *yr);
195
196 extern "C" void rsdIntrinsicResizeB2_K(
197 uchar2 *dst,
198 size_t count,
199 uint32_t xf,
200 uint32_t xinc,
201 uchar2 const *srcn,
202 uchar2 const *src0,
203 uchar2 const *src1,
204 uchar2 const *src2,
205 size_t xclip,
206 size_t avail,
207 uint64_t osc_ctl,
208 int32_t const *yr);
209
210 extern "C" void rsdIntrinsicResizeB1_K(
211 uchar *dst,
212 size_t count,
213 uint32_t xf,
214 uint32_t xinc,
215 uchar const *srcn,
216 uchar const *src0,
217 uchar const *src1,
218 uchar const *src2,
219 size_t xclip,
220 size_t avail,
221 uint64_t osc_ctl,
222 int32_t const *yr);
223
224 #if defined(ARCH_ARM_USE_INTRINSICS)
mkYCoeff(int32_t * yr,float yf)225 static void mkYCoeff(int32_t *yr, float yf) {
226 int32_t yf1 = rint(yf * 0x10000);
227 int32_t yf2 = rint(yf * yf * 0x10000);
228 int32_t yf3 = rint(yf * yf * yf * 0x10000);
229
230 yr[0] = -(2 * yf2 - yf3 - yf1) >> 1;
231 yr[1] = (3 * yf3 - 5 * yf2 + 0x20000) >> 1;
232 yr[2] = (-3 * yf3 + 4 * yf2 + yf1) >> 1;
233 yr[3] = -(yf3 - yf2) >> 1;
234 }
235 #endif
236
OneBiCubic(const float4 * yp0,const float4 * yp1,const float4 * yp2,const float4 * yp3,float xf,float yf,int width)237 static float4 OneBiCubic(const float4 *yp0, const float4 *yp1, const float4 *yp2, const float4 *yp3,
238 float xf, float yf, int width) {
239 int startx = (int) floor(xf - 1);
240 xf = xf - floor(xf);
241 int maxx = width - 1;
242 int xs0 = rsMax(0, startx + 0);
243 int xs1 = rsMax(0, startx + 1);
244 int xs2 = rsMin(maxx, startx + 2);
245 int xs3 = rsMin(maxx, startx + 3);
246
247 float4 p0 = cubicInterpolate(yp0[xs0], yp0[xs1],
248 yp0[xs2], yp0[xs3], xf);
249 float4 p1 = cubicInterpolate(yp1[xs0], yp1[xs1],
250 yp1[xs2], yp1[xs3], xf);
251 float4 p2 = cubicInterpolate(yp2[xs0], yp2[xs1],
252 yp2[xs2], yp2[xs3], xf);
253 float4 p3 = cubicInterpolate(yp3[xs0], yp3[xs1],
254 yp3[xs2], yp3[xs3], xf);
255
256 float4 p = cubicInterpolate(p0, p1, p2, p3, yf);
257 return p;
258 }
259
OneBiCubic(const float2 * yp0,const float2 * yp1,const float2 * yp2,const float2 * yp3,float xf,float yf,int width)260 static float2 OneBiCubic(const float2 *yp0, const float2 *yp1, const float2 *yp2, const float2 *yp3,
261 float xf, float yf, int width) {
262 int startx = (int) floor(xf - 1);
263 xf = xf - floor(xf);
264 int maxx = width - 1;
265 int xs0 = rsMax(0, startx + 0);
266 int xs1 = rsMax(0, startx + 1);
267 int xs2 = rsMin(maxx, startx + 2);
268 int xs3 = rsMin(maxx, startx + 3);
269
270 float2 p0 = cubicInterpolate(yp0[xs0], yp0[xs1],
271 yp0[xs2], yp0[xs3], xf);
272 float2 p1 = cubicInterpolate(yp1[xs0], yp1[xs1],
273 yp1[xs2], yp1[xs3], xf);
274 float2 p2 = cubicInterpolate(yp2[xs0], yp2[xs1],
275 yp2[xs2], yp2[xs3], xf);
276 float2 p3 = cubicInterpolate(yp3[xs0], yp3[xs1],
277 yp3[xs2], yp3[xs3], xf);
278
279 float2 p = cubicInterpolate(p0, p1, p2, p3, yf);
280 return p;
281 }
282
OneBiCubic(const float * yp0,const float * yp1,const float * yp2,const float * yp3,float xf,float yf,int width)283 static float OneBiCubic(const float *yp0, const float *yp1, const float *yp2, const float *yp3,
284 float xf, float yf, int width) {
285 int startx = (int) floor(xf - 1);
286 xf = xf - floor(xf);
287 int maxx = width - 1;
288 int xs0 = rsMax(0, startx + 0);
289 int xs1 = rsMax(0, startx + 1);
290 int xs2 = rsMin(maxx, startx + 2);
291 int xs3 = rsMin(maxx, startx + 3);
292
293 float p0 = cubicInterpolate(yp0[xs0], yp0[xs1],
294 yp0[xs2], yp0[xs3], xf);
295 float p1 = cubicInterpolate(yp1[xs0], yp1[xs1],
296 yp1[xs2], yp1[xs3], xf);
297 float p2 = cubicInterpolate(yp2[xs0], yp2[xs1],
298 yp2[xs2], yp2[xs3], xf);
299 float p3 = cubicInterpolate(yp3[xs0], yp3[xs1],
300 yp3[xs2], yp3[xs3], xf);
301
302 float p = cubicInterpolate(p0, p1, p2, p3, yf);
303 return p;
304 }
305
kernelU4(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)306 void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info,
307 uint32_t xstart, uint32_t xend,
308 uint32_t outstep) {
309 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
310
311 if (!cp->mAlloc.get()) {
312 ALOGE("Resize executed without input, skipping");
313 return;
314 }
315 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
316 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
317 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
318 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
319
320 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
321 int starty = (int) floor(yf - 1);
322 yf = yf - floor(yf);
323 int maxy = srcHeight - 1;
324 int ys0 = rsMax(0, starty + 0);
325 int ys1 = rsMax(0, starty + 1);
326 int ys2 = rsMin(maxy, starty + 2);
327 int ys3 = rsMin(maxy, starty + 3);
328
329 const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0);
330 const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1);
331 const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
332 const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
333
334 uchar4 *out = ((uchar4 *)info->outPtr[0]) + xstart;
335 uint32_t x1 = xstart;
336 uint32_t x2 = xend;
337
338 #if defined(ARCH_ARM_USE_INTRINSICS)
339 if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
340 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
341 long xf16 = rint(xf * 0x10000);
342 uint32_t xinc16 = rint(cp->scaleX * 0x10000);
343
344 int xoff = (xf16 >> 16) - 1;
345 int xclip = rsMax(0, xoff) - xoff;
346 int len = x2 - x1;
347
348 int32_t yr[4];
349 uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
350 mkYCoeff(yr, yf);
351
352 xoff += xclip;
353
354 rsdIntrinsicResizeB4_K(
355 out, len,
356 xf16 & 0xffff, xinc16,
357 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
358 xclip, srcWidth - xoff + xclip,
359 osc_ctl, yr);
360 out += len;
361 x1 += len;
362 }
363 #endif
364
365 while(x1 < x2) {
366 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
367 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
368 out++;
369 x1++;
370 }
371 }
372
kernelU2(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)373 void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info,
374 uint32_t xstart, uint32_t xend,
375 uint32_t outstep) {
376 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
377
378 if (!cp->mAlloc.get()) {
379 ALOGE("Resize executed without input, skipping");
380 return;
381 }
382 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
383 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
384 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
385 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
386
387 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
388 int starty = (int) floor(yf - 1);
389 yf = yf - floor(yf);
390 int maxy = srcHeight - 1;
391 int ys0 = rsMax(0, starty + 0);
392 int ys1 = rsMax(0, starty + 1);
393 int ys2 = rsMin(maxy, starty + 2);
394 int ys3 = rsMin(maxy, starty + 3);
395
396 const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
397 const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
398 const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
399 const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
400
401 uchar2 *out = ((uchar2 *)info->outPtr[0]) + xstart;
402 uint32_t x1 = xstart;
403 uint32_t x2 = xend;
404
405 #if defined(ARCH_ARM_USE_INTRINSICS)
406 if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
407 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
408 long xf16 = rint(xf * 0x10000);
409 uint32_t xinc16 = rint(cp->scaleX * 0x10000);
410
411 int xoff = (xf16 >> 16) - 1;
412 int xclip = rsMax(0, xoff) - xoff;
413 int len = x2 - x1;
414
415 int32_t yr[4];
416 uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
417 mkYCoeff(yr, yf);
418
419 xoff += xclip;
420
421 rsdIntrinsicResizeB2_K(
422 out, len,
423 xf16 & 0xffff, xinc16,
424 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
425 xclip, srcWidth - xoff + xclip,
426 osc_ctl, yr);
427 out += len;
428 x1 += len;
429 }
430 #endif
431
432 while(x1 < x2) {
433 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
434 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
435 out++;
436 x1++;
437 }
438 }
439
kernelU1(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)440 void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info,
441 uint32_t xstart, uint32_t xend,
442 uint32_t outstep) {
443 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
444
445 if (!cp->mAlloc.get()) {
446 ALOGE("Resize executed without input, skipping");
447 return;
448 }
449 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
450 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
451 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
452 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
453
454 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
455 int starty = (int) floor(yf - 1);
456 yf = yf - floor(yf);
457 int maxy = srcHeight - 1;
458 int ys0 = rsMax(0, starty + 0);
459 int ys1 = rsMax(0, starty + 1);
460 int ys2 = rsMin(maxy, starty + 2);
461 int ys3 = rsMin(maxy, starty + 3);
462
463 const uchar *yp0 = pin + stride * ys0;
464 const uchar *yp1 = pin + stride * ys1;
465 const uchar *yp2 = pin + stride * ys2;
466 const uchar *yp3 = pin + stride * ys3;
467
468 uchar *out = ((uchar *)info->outPtr[0]) + xstart;
469 uint32_t x1 = xstart;
470 uint32_t x2 = xend;
471
472 #if defined(ARCH_ARM_USE_INTRINSICS)
473 if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
474 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
475 long xf16 = rint(xf * 0x10000);
476 uint32_t xinc16 = rint(cp->scaleX * 0x10000);
477
478 int xoff = (xf16 >> 16) - 1;
479 int xclip = rsMax(0, xoff) - xoff;
480 int len = x2 - x1;
481
482 int32_t yr[4];
483 uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
484 mkYCoeff(yr, yf);
485
486 xoff += xclip;
487
488 rsdIntrinsicResizeB1_K(
489 out, len,
490 xf16 & 0xffff, xinc16,
491 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
492 xclip, srcWidth - xoff + xclip,
493 osc_ctl, yr);
494 out += len;
495 x1 += len;
496 }
497 #endif
498
499 while(x1 < x2) {
500 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
501 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
502 out++;
503 x1++;
504 }
505 }
506
kernelF4(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)507 void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info,
508 uint32_t xstart, uint32_t xend,
509 uint32_t outstep) {
510 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
511
512 if (!cp->mAlloc.get()) {
513 ALOGE("Resize executed without input, skipping");
514 return;
515 }
516 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
517 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
518 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
519 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
520
521 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
522 int starty = (int) floor(yf - 1);
523 yf = yf - floor(yf);
524 int maxy = srcHeight - 1;
525 int ys0 = rsMax(0, starty + 0);
526 int ys1 = rsMax(0, starty + 1);
527 int ys2 = rsMin(maxy, starty + 2);
528 int ys3 = rsMin(maxy, starty + 3);
529
530 const float4 *yp0 = (const float4 *)(pin + stride * ys0);
531 const float4 *yp1 = (const float4 *)(pin + stride * ys1);
532 const float4 *yp2 = (const float4 *)(pin + stride * ys2);
533 const float4 *yp3 = (const float4 *)(pin + stride * ys3);
534
535 float4 *out = ((float4 *)info->outPtr[0]) + xstart;
536 uint32_t x1 = xstart;
537 uint32_t x2 = xend;
538
539 while(x1 < x2) {
540 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
541 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
542 out++;
543 x1++;
544 }
545 }
546
kernelF2(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)547 void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info,
548 uint32_t xstart, uint32_t xend,
549 uint32_t outstep) {
550 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
551
552 if (!cp->mAlloc.get()) {
553 ALOGE("Resize executed without input, skipping");
554 return;
555 }
556 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
557 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
558 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
559 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
560
561 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
562 int starty = (int) floor(yf - 1);
563 yf = yf - floor(yf);
564 int maxy = srcHeight - 1;
565 int ys0 = rsMax(0, starty + 0);
566 int ys1 = rsMax(0, starty + 1);
567 int ys2 = rsMin(maxy, starty + 2);
568 int ys3 = rsMin(maxy, starty + 3);
569
570 const float2 *yp0 = (const float2 *)(pin + stride * ys0);
571 const float2 *yp1 = (const float2 *)(pin + stride * ys1);
572 const float2 *yp2 = (const float2 *)(pin + stride * ys2);
573 const float2 *yp3 = (const float2 *)(pin + stride * ys3);
574
575 float2 *out = ((float2 *)info->outPtr[0]) + xstart;
576 uint32_t x1 = xstart;
577 uint32_t x2 = xend;
578
579 while(x1 < x2) {
580 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
581 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
582 out++;
583 x1++;
584 }
585 }
586
kernelF1(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)587 void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info,
588 uint32_t xstart, uint32_t xend,
589 uint32_t outstep) {
590 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
591
592 if (!cp->mAlloc.get()) {
593 ALOGE("Resize executed without input, skipping");
594 return;
595 }
596 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
597 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
598 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
599 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
600
601 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
602 int starty = (int) floor(yf - 1);
603 yf = yf - floor(yf);
604 int maxy = srcHeight - 1;
605 int ys0 = rsMax(0, starty + 0);
606 int ys1 = rsMax(0, starty + 1);
607 int ys2 = rsMin(maxy, starty + 2);
608 int ys3 = rsMin(maxy, starty + 3);
609
610 const float *yp0 = (const float *)(pin + stride * ys0);
611 const float *yp1 = (const float *)(pin + stride * ys1);
612 const float *yp2 = (const float *)(pin + stride * ys2);
613 const float *yp3 = (const float *)(pin + stride * ys3);
614
615 float *out = ((float *)info->outPtr[0]) + xstart;
616 uint32_t x1 = xstart;
617 uint32_t x2 = xend;
618
619 while(x1 < x2) {
620 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
621 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
622 out++;
623 x1++;
624 }
625 }
626
RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)627 RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize (
628 RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
629 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) {
630
631 }
632
~RsdCpuScriptIntrinsicResize()633 RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
634 }
635
preLaunch(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)636 void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot,
637 const Allocation ** ains,
638 uint32_t inLen, Allocation * aout,
639 const void * usr, uint32_t usrLen,
640 const RsScriptCall *sc)
641 {
642 if (!mAlloc.get()) {
643 ALOGE("Resize executed without input, skipping");
644 return;
645 }
646 const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY;
647 const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX;
648
649 //check the data type to determine F or U.
650 if (mAlloc->getType()->getElement()->getType() == RS_TYPE_UNSIGNED_8) {
651 switch(mAlloc->getType()->getElement()->getVectorSize()) {
652 case 1:
653 mRootPtr = &kernelU1;
654 break;
655 case 2:
656 mRootPtr = &kernelU2;
657 break;
658 case 3:
659 case 4:
660 mRootPtr = &kernelU4;
661 break;
662 }
663 } else {
664 switch(mAlloc->getType()->getElement()->getVectorSize()) {
665 case 1:
666 mRootPtr = &kernelF1;
667 break;
668 case 2:
669 mRootPtr = &kernelF2;
670 break;
671 case 3:
672 case 4:
673 mRootPtr = &kernelF4;
674 break;
675 }
676 }
677
678 scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX;
679 scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY;
680
681 }
682
populateScript(Script * s)683 void RsdCpuScriptIntrinsicResize::populateScript(Script *s) {
684 s->mHal.info.exportedVariableCount = 1;
685 }
686
invokeFreeChildren()687 void RsdCpuScriptIntrinsicResize::invokeFreeChildren() {
688 mAlloc.clear();
689 }
690
rsdIntrinsic_Resize(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)691 RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
692
693 return new RsdCpuScriptIntrinsicResize(ctx, s, e);
694 }
695
696 } // namespace renderscript
697 } // namespace android
698