• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 
18 #include "rsdCore.h"
19 #include "rsdIntrinsics.h"
20 #include "rsdAllocation.h"
21 
22 #include "rsdIntrinsicInlines.h"
23 
24 using namespace android;
25 using namespace android::renderscript;
26 
27 struct ConvolveParams {
28     float fp[104];
29     short ip[104];
30     float radius;
31     int iradius;
32     void **scratch;
33     size_t *scratchSize;
34     ObjectBaseRef<Allocation> alloc;
35 };
36 
ComputeGaussianWeights(ConvolveParams * cp)37 static void ComputeGaussianWeights(ConvolveParams *cp) {
38     // Compute gaussian weights for the blur
39     // e is the euler's number
40     float e = 2.718281828459045f;
41     float pi = 3.1415926535897932f;
42     // g(x) = ( 1 / sqrt( 2 * pi ) * sigma) * e ^ ( -x^2 / 2 * sigma^2 )
43     // x is of the form [-radius .. 0 .. radius]
44     // and sigma varies with radius.
45     // Based on some experimental radius values and sigma's
46     // we approximately fit sigma = f(radius) as
47     // sigma = radius * 0.4  + 0.6
48     // The larger the radius gets, the more our gaussian blur
49     // will resemble a box blur since with large sigma
50     // the gaussian curve begins to lose its shape
51     float sigma = 0.4f * cp->radius + 0.6f;
52 
53     // Now compute the coefficients. We will store some redundant values to save
54     // some math during the blur calculations precompute some values
55     float coeff1 = 1.0f / (sqrtf(2.0f * pi) * sigma);
56     float coeff2 = - 1.0f / (2.0f * sigma * sigma);
57 
58     float normalizeFactor = 0.0f;
59     float floatR = 0.0f;
60     int r;
61     cp->iradius = (float)ceil(cp->radius) + 0.5f;
62     for (r = -cp->iradius; r <= cp->iradius; r ++) {
63         floatR = (float)r;
64         cp->fp[r + cp->iradius] = coeff1 * powf(e, floatR * floatR * coeff2);
65         normalizeFactor += cp->fp[r + cp->iradius];
66     }
67 
68     //Now we need to normalize the weights because all our coefficients need to add up to one
69     normalizeFactor = 1.0f / normalizeFactor;
70     for (r = -cp->iradius; r <= cp->iradius; r ++) {
71         cp->fp[r + cp->iradius] *= normalizeFactor;
72         cp->ip[r + cp->iradius] = (short)(cp->ip[r + cp->iradius] * 32768);
73     }
74 }
75 
Blur_Bind(const Context * dc,const Script * script,void * intrinsicData,uint32_t slot,Allocation * data)76 static void Blur_Bind(const Context *dc, const Script *script,
77                              void * intrinsicData, uint32_t slot, Allocation *data) {
78     ConvolveParams *cp = (ConvolveParams *)intrinsicData;
79     rsAssert(slot == 1);
80     cp->alloc.set(data);
81 }
82 
Blur_SetVar(const Context * dc,const Script * script,void * intrinsicData,uint32_t slot,void * data,size_t dataLength)83 static void Blur_SetVar(const Context *dc, const Script *script, void * intrinsicData,
84                                uint32_t slot, void *data, size_t dataLength) {
85     ConvolveParams *cp = (ConvolveParams *)intrinsicData;
86     rsAssert(slot == 0);
87 
88     cp->radius = ((const float *)data)[0];
89     ComputeGaussianWeights(cp);
90 }
91 
92 
93 
OneV(const RsForEachStubParamStruct * p,float4 * out,int32_t x,int32_t y,const uchar * ptrIn,int iStride,const float * gPtr,int iradius)94 static void OneV(const RsForEachStubParamStruct *p, float4 *out, int32_t x, int32_t y,
95                  const uchar *ptrIn, int iStride, const float* gPtr, int iradius) {
96 
97     const uchar *pi = ptrIn + x*4;
98 
99     float4 blurredPixel = 0;
100     for (int r = -iradius; r <= iradius; r ++) {
101         int validY = rsMax((y + r), 0);
102         validY = rsMin(validY, (int)(p->dimY - 1));
103         const uchar4 *pvy = (const uchar4 *)&pi[validY * iStride];
104         float4 pf = convert_float4(pvy[0]);
105         blurredPixel += pf * gPtr[0];
106         gPtr++;
107     }
108 
109     out->xyzw = blurredPixel;
110 }
111 
112 extern "C" void rsdIntrinsicBlurVF_K(void *dst, const void *pin, int stride, const void *gptr, int rct, int x1, int x2);
113 extern "C" void rsdIntrinsicBlurHF_K(void *dst, const void *pin, const void *gptr, int rct, int x1, int x2);
114 
OneVF(float4 * out,const uchar * ptrIn,int iStride,const float * gPtr,int ct,int x1,int x2)115 static void OneVF(float4 *out,
116                   const uchar *ptrIn, int iStride, const float* gPtr, int ct,
117                   int x1, int x2) {
118 
119 #if defined(ARCH_ARM_HAVE_NEON)
120     {
121         int t = (x2 - x1);
122         t &= ~1;
123         if(t) {
124             rsdIntrinsicBlurVF_K(out, ptrIn, iStride, gPtr, ct, x1, x1 + t);
125         }
126         x1 += t;
127     }
128 #endif
129 
130     while(x2 > x1) {
131         const uchar *pi = ptrIn + x1 * 4;
132         float4 blurredPixel = 0;
133         const float* gp = gPtr;
134 
135         for (int r = 0; r < ct; r++) {
136             float4 pf = convert_float4(((const uchar4 *)pi)[0]);
137             blurredPixel += pf * gp[0];
138             pi += iStride;
139             gp++;
140         }
141         out->xyzw = blurredPixel;
142         x1++;
143         out++;
144         gPtr++;
145     }
146 }
147 
OneH(const RsForEachStubParamStruct * p,uchar4 * out,int32_t x,const float4 * ptrIn,const float * gPtr,int iradius)148 static void OneH(const RsForEachStubParamStruct *p, uchar4 *out, int32_t x,
149                 const float4 *ptrIn, const float* gPtr, int iradius) {
150 
151     float4 blurredPixel = 0;
152     for (int r = -iradius; r <= iradius; r ++) {
153         int validX = rsMax((x + r), 0);
154         validX = rsMin(validX, (int)(p->dimX - 1));
155         float4 pf = ptrIn[validX];
156         blurredPixel += pf * gPtr[0];
157         gPtr++;
158     }
159 
160     out->xyzw = convert_uchar4(blurredPixel);
161 }
162 
163 
Blur_uchar4(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)164 static void Blur_uchar4(const RsForEachStubParamStruct *p,
165                                     uint32_t xstart, uint32_t xend,
166                                     uint32_t instep, uint32_t outstep) {
167     float stackbuf[4 * 2048];
168     float *buf = &stackbuf[0];
169     ConvolveParams *cp = (ConvolveParams *)p->usr;
170     if (!cp->alloc.get()) {
171         ALOGE("Blur executed without input, skipping");
172         return;
173     }
174     DrvAllocation *din = (DrvAllocation *)cp->alloc->mHal.drv;
175     const uchar *pin = (const uchar *)din->lod[0].mallocPtr;
176 
177     uchar4 *out = (uchar4 *)p->out;
178     uint32_t x1 = xstart;
179     uint32_t x2 = xend;
180 
181     if (p->dimX > 2048) {
182         if ((p->dimX > cp->scratchSize[p->lid]) || !cp->scratch[p->lid]) {
183             cp->scratch[p->lid] = realloc(cp->scratch[p->lid], p->dimX * 16);
184             cp->scratchSize[p->lid] = p->dimX;
185         }
186         buf = (float *)cp->scratch[p->lid];
187     }
188     float4 *fout = (float4 *)buf;
189 
190     int y = p->y;
191     uint32_t vx1 = x1;
192     uint32_t vx2 = x2;
193 
194     if (vx1 > (uint32_t)cp->iradius) {
195         vx1 -= cp->iradius;
196     } else {
197         vx1 = 0;
198     }
199     vx2 += cp->iradius;
200     if (vx2 >= p->dimX) {
201         vx2 = p->dimX - 1;
202     }
203 
204     if ((y > cp->iradius) && (y < ((int)p->dimY - cp->iradius))) {
205         const uchar *pi = pin + (y - cp->iradius) * din->lod[0].stride;
206         OneVF(fout + vx1, pi, din->lod[0].stride, cp->fp, cp->iradius * 2 + 1, vx1, vx2);
207     } else {
208         while(vx2 > vx1) {
209             OneV(p, fout, vx1, y, pin, din->lod[0].stride, cp->fp, cp->iradius);
210             fout++;
211             vx1++;
212         }
213     }
214 
215     x1 = xstart;
216     while ((x1 < (uint32_t)cp->iradius) && (x1 < x2)) {
217         OneH(p, out, x1, (float4 *)buf, cp->fp, cp->iradius);
218         out++;
219         x1++;
220     }
221 #if defined(ARCH_ARM_HAVE_NEON)
222     if ((x1 + cp->iradius) < x2) {
223         rsdIntrinsicBlurHF_K(out, ((float4 *)buf) - cp->iradius, cp->fp, cp->iradius * 2 + 1, x1, x2 - cp->iradius);
224         out += (x2 - cp->iradius) - x1;
225         x1 = x2 - cp->iradius;
226     }
227 #endif
228     while(x2 > x1) {
229         OneH(p, out, x1, (float4 *)buf, cp->fp, cp->iradius);
230         out++;
231         x1++;
232     }
233 
234 }
235 
Destroy(const Context * rsc,const Script * script,void * intrinsicData)236 static void Destroy(const Context *rsc, const Script *script, void * intrinsicData) {
237     RsdHal * dc = (RsdHal *)rsc->mHal.drv;
238     ConvolveParams *cp = (ConvolveParams *)intrinsicData;
239 
240     if (cp) {
241         if (cp->scratch) {
242             for (size_t i = 0; i < dc->mWorkers.mCount + 1; i++) {
243                 if (cp->scratch[i]) {
244                     free(cp->scratch[i]);
245                 }
246             }
247             free(cp->scratch);
248         }
249         if (cp->scratchSize) {
250             free(cp->scratchSize);
251         }
252         free(cp);
253     }
254 }
255 
rsdIntrinsic_InitBlur(const android::renderscript::Context * rsc,android::renderscript::Script * script,RsdIntriniscFuncs_t * funcs)256 void * rsdIntrinsic_InitBlur(const android::renderscript::Context *rsc,
257                                     android::renderscript::Script *script,
258                                     RsdIntriniscFuncs_t *funcs) {
259 
260     RsdHal * dc = (RsdHal *)rsc->mHal.drv;
261 
262     script->mHal.info.exportedVariableCount = 2;
263     funcs->setVarObj = Blur_Bind;
264     funcs->setVar = Blur_SetVar;
265     funcs->root = Blur_uchar4;
266     funcs->destroy = Destroy;
267 
268     ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams));
269     if (!cp) {
270         return NULL;
271     }
272 
273     cp->radius = 5;
274     cp->scratch = (void **)calloc(dc->mWorkers.mCount + 1, sizeof(void *));
275     cp->scratchSize = (size_t *)calloc(dc->mWorkers.mCount + 1, sizeof(size_t));
276     if (!cp->scratch || !cp->scratchSize) {
277         Destroy(rsc, script, cp);
278         return NULL;
279     }
280 
281     ComputeGaussianWeights(cp);
282     return cp;
283 }
284 
285 
286