• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*
3  * Copyright 2006 The Android Open Source Project
4  *
5  * Use of this source code is governed by a BSD-style license that can be
6  * found in the LICENSE file.
7  */
8 
9 
10 #include "SkBlurMask.h"
11 #include "SkMath.h"
12 #include "SkTemplates.h"
13 #include "SkEndian.h"
14 
15 // Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
16 // breakeven on Mac, and ~15% slowdown on Linux.
17 // Reading a word at a time when bulding the sum buffer seems to give
18 // us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
19 #if defined(SK_BUILD_FOR_WIN32)
20 #define UNROLL_KERNEL_LOOP 1
21 #endif
22 
23 /** The sum buffer is an array of u32 to hold the accumulated sum of all of the
24     src values at their position, plus all values above and to the left.
25     When we sample into this buffer, we need an initial row and column of 0s,
26     so we have an index correspondence as follows:
27 
28     src[i, j] == sum[i+1, j+1]
29     sum[0, j] == sum[i, 0] == 0
30 
31     We assume that the sum buffer's stride == its width
32  */
build_sum_buffer(uint32_t sum[],int srcW,int srcH,const uint8_t src[],int srcRB)33 static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
34                              const uint8_t src[], int srcRB) {
35     int sumW = srcW + 1;
36 
37     SkASSERT(srcRB >= srcW);
38     // mod srcRB so we can apply it after each row
39     srcRB -= srcW;
40 
41     int x, y;
42 
43     // zero out the top row and column
44     memset(sum, 0, sumW * sizeof(sum[0]));
45     sum += sumW;
46 
47     // special case first row
48     uint32_t X = 0;
49     *sum++ = 0; // initialze the first column to 0
50     for (x = srcW - 1; x >= 0; --x) {
51         X = *src++ + X;
52         *sum++ = X;
53     }
54     src += srcRB;
55 
56     // now do the rest of the rows
57     for (y = srcH - 1; y > 0; --y) {
58         uint32_t L = 0;
59         uint32_t C = 0;
60         *sum++ = 0; // initialze the first column to 0
61 
62         for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
63             uint32_t T = sum[-sumW];
64             X = *src++ + L + T - C;
65             *sum++ = X;
66             L = X;
67             C = T;
68         }
69 
70         for (; x >= 4; x-=4) {
71             uint32_t T = sum[-sumW];
72             X = *src++ + L + T - C;
73             *sum++ = X;
74             L = X;
75             C = T;
76             T = sum[-sumW];
77             X = *src++ + L + T - C;
78             *sum++ = X;
79             L = X;
80             C = T;
81             T = sum[-sumW];
82             X = *src++ + L + T - C;
83             *sum++ = X;
84             L = X;
85             C = T;
86             T = sum[-sumW];
87             X = *src++ + L + T - C;
88             *sum++ = X;
89             L = X;
90             C = T;
91         }
92 
93         for (; x >= 0; --x) {
94             uint32_t T = sum[-sumW];
95             X = *src++ + L + T - C;
96             *sum++ = X;
97             L = X;
98             C = T;
99         }
100         src += srcRB;
101     }
102 }
103 
104 /**
105  * This is the path for apply_kernel() to be taken when the kernel
106  * is wider than the source image.
107  */
kernel_clamped(uint8_t dst[],int rx,int ry,const uint32_t sum[],int sw,int sh)108 static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
109                            int sw, int sh) {
110     SkASSERT(2*rx > sw);
111 
112     uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
113 
114     int sumStride = sw + 1;
115 
116     int dw = sw + 2*rx;
117     int dh = sh + 2*ry;
118 
119     int prev_y = -2*ry;
120     int next_y = 1;
121 
122     for (int y = 0; y < dh; y++) {
123         int py = SkClampPos(prev_y) * sumStride;
124         int ny = SkFastMin32(next_y, sh) * sumStride;
125 
126         int prev_x = -2*rx;
127         int next_x = 1;
128 
129         for (int x = 0; x < dw; x++) {
130             int px = SkClampPos(prev_x);
131             int nx = SkFastMin32(next_x, sw);
132 
133             uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
134             *dst++ = SkToU8(tmp * scale >> 24);
135 
136             prev_x += 1;
137             next_x += 1;
138         }
139 
140         prev_y += 1;
141         next_y += 1;
142     }
143 }
144 /**
145  *  sw and sh are the width and height of the src. Since the sum buffer
146  *  matches that, but has an extra row and col at the beginning (with zeros),
147  *  we can just use sw and sh as our "max" values for pinning coordinates
148  *  when sampling into sum[][]
149  *
150  *  The inner loop is conceptually simple; we break it into several sections
151  *  to improve performance. Here's the original version:
152         for (int x = 0; x < dw; x++) {
153             int px = SkClampPos(prev_x);
154             int nx = SkFastMin32(next_x, sw);
155 
156             uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
157             *dst++ = SkToU8(tmp * scale >> 24);
158 
159             prev_x += 1;
160             next_x += 1;
161         }
162  *  The sections are:
163  *     left-hand section, where prev_x is clamped to 0
164  *     center section, where neither prev_x nor next_x is clamped
165  *     right-hand section, where next_x is clamped to sw
166  *  On some operating systems, the center section is unrolled for additional
167  *  speedup.
168 */
apply_kernel(uint8_t dst[],int rx,int ry,const uint32_t sum[],int sw,int sh)169 static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
170                          int sw, int sh) {
171     if (2*rx > sw) {
172         kernel_clamped(dst, rx, ry, sum, sw, sh);
173         return;
174     }
175 
176     uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
177 
178     int sumStride = sw + 1;
179 
180     int dw = sw + 2*rx;
181     int dh = sh + 2*ry;
182 
183     int prev_y = -2*ry;
184     int next_y = 1;
185 
186     SkASSERT(2*rx <= dw - 2*rx);
187 
188     for (int y = 0; y < dh; y++) {
189         int py = SkClampPos(prev_y) * sumStride;
190         int ny = SkFastMin32(next_y, sh) * sumStride;
191 
192         int prev_x = -2*rx;
193         int next_x = 1;
194         int x = 0;
195 
196         for (; x < 2*rx; x++) {
197             SkASSERT(prev_x <= 0);
198             SkASSERT(next_x <= sw);
199 
200             int px = 0;
201             int nx = next_x;
202 
203             uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
204             *dst++ = SkToU8(tmp * scale >> 24);
205 
206             prev_x += 1;
207             next_x += 1;
208         }
209 
210         int i0 = prev_x + py;
211         int i1 = next_x + ny;
212         int i2 = next_x + py;
213         int i3 = prev_x + ny;
214 
215 #if UNROLL_KERNEL_LOOP
216         for (; x < dw - 2*rx - 4; x += 4) {
217             SkASSERT(prev_x >= 0);
218             SkASSERT(next_x <= sw);
219 
220             uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
221             *dst++ = SkToU8(tmp * scale >> 24);
222             tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
223             *dst++ = SkToU8(tmp * scale >> 24);
224             tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
225             *dst++ = SkToU8(tmp * scale >> 24);
226             tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
227             *dst++ = SkToU8(tmp * scale >> 24);
228 
229             prev_x += 4;
230             next_x += 4;
231         }
232 #endif
233 
234         for (; x < dw - 2*rx; x++) {
235             SkASSERT(prev_x >= 0);
236             SkASSERT(next_x <= sw);
237 
238             uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
239             *dst++ = SkToU8(tmp * scale >> 24);
240 
241             prev_x += 1;
242             next_x += 1;
243         }
244 
245         for (; x < dw; x++) {
246             SkASSERT(prev_x >= 0);
247             SkASSERT(next_x > sw);
248 
249             int px = prev_x;
250             int nx = sw;
251 
252             uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
253             *dst++ = SkToU8(tmp * scale >> 24);
254 
255             prev_x += 1;
256             next_x += 1;
257         }
258 
259         prev_y += 1;
260         next_y += 1;
261     }
262 }
263 
264 /**
265  * This is the path for apply_kernel_interp() to be taken when the kernel
266  * is wider than the source image.
267  */
kernel_interp_clamped(uint8_t dst[],int rx,int ry,const uint32_t sum[],int sw,int sh,U8CPU outer_weight)268 static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
269                 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
270     SkASSERT(2*rx > sw);
271 
272     int inner_weight = 255 - outer_weight;
273 
274     // round these guys up if they're bigger than 127
275     outer_weight += outer_weight >> 7;
276     inner_weight += inner_weight >> 7;
277 
278     uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
279     uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
280 
281     int sumStride = sw + 1;
282 
283     int dw = sw + 2*rx;
284     int dh = sh + 2*ry;
285 
286     int prev_y = -2*ry;
287     int next_y = 1;
288 
289     for (int y = 0; y < dh; y++) {
290         int py = SkClampPos(prev_y) * sumStride;
291         int ny = SkFastMin32(next_y, sh) * sumStride;
292 
293         int ipy = SkClampPos(prev_y + 1) * sumStride;
294         int iny = SkClampMax(next_y - 1, sh) * sumStride;
295 
296         int prev_x = -2*rx;
297         int next_x = 1;
298 
299         for (int x = 0; x < dw; x++) {
300             int px = SkClampPos(prev_x);
301             int nx = SkFastMin32(next_x, sw);
302 
303             int ipx = SkClampPos(prev_x + 1);
304             int inx = SkClampMax(next_x - 1, sw);
305 
306             uint32_t outer_sum = sum[px+py] + sum[nx+ny]
307                                - sum[nx+py] - sum[px+ny];
308             uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
309                                - sum[inx+ipy] - sum[ipx+iny];
310             *dst++ = SkToU8((outer_sum * outer_scale
311                            + inner_sum * inner_scale) >> 24);
312 
313             prev_x += 1;
314             next_x += 1;
315         }
316         prev_y += 1;
317         next_y += 1;
318     }
319 }
320 
321 /**
322  *  sw and sh are the width and height of the src. Since the sum buffer
323  *  matches that, but has an extra row and col at the beginning (with zeros),
324  *  we can just use sw and sh as our "max" values for pinning coordinates
325  *  when sampling into sum[][]
326  *
327  *  The inner loop is conceptually simple; we break it into several variants
328  *  to improve performance. Here's the original version:
329         for (int x = 0; x < dw; x++) {
330             int px = SkClampPos(prev_x);
331             int nx = SkFastMin32(next_x, sw);
332 
333             int ipx = SkClampPos(prev_x + 1);
334             int inx = SkClampMax(next_x - 1, sw);
335 
336             uint32_t outer_sum = sum[px+py] + sum[nx+ny]
337                                - sum[nx+py] - sum[px+ny];
338             uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
339                                - sum[inx+ipy] - sum[ipx+iny];
340             *dst++ = SkToU8((outer_sum * outer_scale
341                            + inner_sum * inner_scale) >> 24);
342 
343             prev_x += 1;
344             next_x += 1;
345         }
346  *  The sections are:
347  *     left-hand section, where prev_x is clamped to 0
348  *     center section, where neither prev_x nor next_x is clamped
349  *     right-hand section, where next_x is clamped to sw
350  *  On some operating systems, the center section is unrolled for additional
351  *  speedup.
352 */
apply_kernel_interp(uint8_t dst[],int rx,int ry,const uint32_t sum[],int sw,int sh,U8CPU outer_weight)353 static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
354                 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
355     SkASSERT(rx > 0 && ry > 0);
356     SkASSERT(outer_weight <= 255);
357 
358     if (2*rx > sw) {
359         kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
360         return;
361     }
362 
363     int inner_weight = 255 - outer_weight;
364 
365     // round these guys up if they're bigger than 127
366     outer_weight += outer_weight >> 7;
367     inner_weight += inner_weight >> 7;
368 
369     uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
370     uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
371 
372     int sumStride = sw + 1;
373 
374     int dw = sw + 2*rx;
375     int dh = sh + 2*ry;
376 
377     int prev_y = -2*ry;
378     int next_y = 1;
379 
380     SkASSERT(2*rx <= dw - 2*rx);
381 
382     for (int y = 0; y < dh; y++) {
383         int py = SkClampPos(prev_y) * sumStride;
384         int ny = SkFastMin32(next_y, sh) * sumStride;
385 
386         int ipy = SkClampPos(prev_y + 1) * sumStride;
387         int iny = SkClampMax(next_y - 1, sh) * sumStride;
388 
389         int prev_x = -2*rx;
390         int next_x = 1;
391         int x = 0;
392 
393         for (; x < 2*rx; x++) {
394             SkASSERT(prev_x < 0);
395             SkASSERT(next_x <= sw);
396 
397             int px = 0;
398             int nx = next_x;
399 
400             int ipx = 0;
401             int inx = next_x - 1;
402 
403             uint32_t outer_sum = sum[px+py] + sum[nx+ny]
404                                - sum[nx+py] - sum[px+ny];
405             uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
406                                - sum[inx+ipy] - sum[ipx+iny];
407             *dst++ = SkToU8((outer_sum * outer_scale
408                            + inner_sum * inner_scale) >> 24);
409 
410             prev_x += 1;
411             next_x += 1;
412         }
413 
414         int i0 = prev_x + py;
415         int i1 = next_x + ny;
416         int i2 = next_x + py;
417         int i3 = prev_x + ny;
418         int i4 = prev_x + 1 + ipy;
419         int i5 = next_x - 1 + iny;
420         int i6 = next_x - 1 + ipy;
421         int i7 = prev_x + 1 + iny;
422 
423 #if UNROLL_KERNEL_LOOP
424         for (; x < dw - 2*rx - 4; x += 4) {
425             SkASSERT(prev_x >= 0);
426             SkASSERT(next_x <= sw);
427 
428             uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
429             uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
430             *dst++ = SkToU8((outer_sum * outer_scale
431                            + inner_sum * inner_scale) >> 24);
432             outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
433             inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
434             *dst++ = SkToU8((outer_sum * outer_scale
435                            + inner_sum * inner_scale) >> 24);
436             outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
437             inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
438             *dst++ = SkToU8((outer_sum * outer_scale
439                            + inner_sum * inner_scale) >> 24);
440             outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
441             inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
442             *dst++ = SkToU8((outer_sum * outer_scale
443                            + inner_sum * inner_scale) >> 24);
444 
445             prev_x += 4;
446             next_x += 4;
447         }
448 #endif
449 
450         for (; x < dw - 2*rx; x++) {
451             SkASSERT(prev_x >= 0);
452             SkASSERT(next_x <= sw);
453 
454             uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
455             uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
456             *dst++ = SkToU8((outer_sum * outer_scale
457                            + inner_sum * inner_scale) >> 24);
458 
459             prev_x += 1;
460             next_x += 1;
461         }
462 
463         for (; x < dw; x++) {
464             SkASSERT(prev_x >= 0);
465             SkASSERT(next_x > sw);
466 
467             int px = prev_x;
468             int nx = sw;
469 
470             int ipx = prev_x + 1;
471             int inx = sw;
472 
473             uint32_t outer_sum = sum[px+py] + sum[nx+ny]
474                                - sum[nx+py] - sum[px+ny];
475             uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
476                                - sum[inx+ipy] - sum[ipx+iny];
477             *dst++ = SkToU8((outer_sum * outer_scale
478                            + inner_sum * inner_scale) >> 24);
479 
480             prev_x += 1;
481             next_x += 1;
482         }
483 
484         prev_y += 1;
485         next_y += 1;
486     }
487 }
488 
489 #include "SkColorPriv.h"
490 
merge_src_with_blur(uint8_t dst[],int dstRB,const uint8_t src[],int srcRB,const uint8_t blur[],int blurRB,int sw,int sh)491 static void merge_src_with_blur(uint8_t dst[], int dstRB,
492                                 const uint8_t src[], int srcRB,
493                                 const uint8_t blur[], int blurRB,
494                                 int sw, int sh) {
495     dstRB -= sw;
496     srcRB -= sw;
497     blurRB -= sw;
498     while (--sh >= 0) {
499         for (int x = sw - 1; x >= 0; --x) {
500             *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
501             dst += 1;
502             src += 1;
503             blur += 1;
504         }
505         dst += dstRB;
506         src += srcRB;
507         blur += blurRB;
508     }
509 }
510 
clamp_with_orig(uint8_t dst[],int dstRowBytes,const uint8_t src[],int srcRowBytes,int sw,int sh,SkBlurMask::Style style)511 static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
512                             const uint8_t src[], int srcRowBytes,
513                             int sw, int sh,
514                             SkBlurMask::Style style) {
515     int x;
516     while (--sh >= 0) {
517         switch (style) {
518         case SkBlurMask::kSolid_Style:
519             for (x = sw - 1; x >= 0; --x) {
520                 int s = *src;
521                 int d = *dst;
522                 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
523                 dst += 1;
524                 src += 1;
525             }
526             break;
527         case SkBlurMask::kOuter_Style:
528             for (x = sw - 1; x >= 0; --x) {
529                 if (*src) {
530                     *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
531                 }
532                 dst += 1;
533                 src += 1;
534             }
535             break;
536         default:
537             SkDEBUGFAIL("Unexpected blur style here");
538             break;
539         }
540         dst += dstRowBytes - sw;
541         src += srcRowBytes - sw;
542     }
543 }
544 
545 ///////////////////////////////////////////////////////////////////////////////
546 
547 // we use a local funciton to wrap the class static method to work around
548 // a bug in gcc98
549 void SkMask_FreeImage(uint8_t* image);
SkMask_FreeImage(uint8_t * image)550 void SkMask_FreeImage(uint8_t* image) {
551     SkMask::FreeImage(image);
552 }
553 
Blur(SkMask * dst,const SkMask & src,SkScalar radius,Style style,Quality quality,SkIPoint * margin)554 bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
555                       SkScalar radius, Style style, Quality quality,
556                       SkIPoint* margin)
557 {
558     if (src.fFormat != SkMask::kA8_Format) {
559         return false;
560     }
561 
562     // Force high quality off for small radii (performance)
563     if (radius < SkIntToScalar(3)) quality = kLow_Quality;
564 
565     // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
566     int passCount = (quality == kHigh_Quality) ? 3 : 1;
567     SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount)));
568 
569     int rx = SkScalarCeil(passRadius);
570     int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
571 
572     SkASSERT(rx >= 0);
573     SkASSERT((unsigned)outer_weight <= 255);
574     if (rx <= 0) {
575         return false;
576     }
577 
578     int ry = rx;    // only do square blur for now
579 
580     int padx = passCount * rx;
581     int pady = passCount * ry;
582     if (margin) {
583         margin->set(padx, pady);
584     }
585     dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
586         src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
587     dst->fRowBytes = dst->fBounds.width();
588     dst->fFormat = SkMask::kA8_Format;
589     dst->fImage = NULL;
590 
591     if (src.fImage) {
592         size_t dstSize = dst->computeImageSize();
593         if (0 == dstSize) {
594             return false;   // too big to allocate, abort
595         }
596 
597         int             sw = src.fBounds.width();
598         int             sh = src.fBounds.height();
599         const uint8_t*  sp = src.fImage;
600         uint8_t*        dp = SkMask::AllocImage(dstSize);
601 
602         SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
603 
604         // build the blurry destination
605         {
606             const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
607             const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
608             SkAutoTMalloc<uint32_t> storage(storageW * storageH);
609             uint32_t*               sumBuffer = storage.get();
610 
611             //pass1: sp is source, dp is destination
612             build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
613             if (outer_weight == 255) {
614                 apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
615             } else {
616                 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
617             }
618 
619             if (quality == kHigh_Quality) {
620                 //pass2: dp is source, tmpBuffer is destination
621                 int tmp_sw = sw + 2 * rx;
622                 int tmp_sh = sh + 2 * ry;
623                 SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
624                 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
625                 if (outer_weight == 255)
626                     apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
627                 else
628                     apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
629                                         tmp_sw, tmp_sh, outer_weight);
630 
631                 //pass3: tmpBuffer is source, dp is destination
632                 tmp_sw += 2 * rx;
633                 tmp_sh += 2 * ry;
634                 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
635                 if (outer_weight == 255)
636                     apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
637                 else
638                     apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
639                                         outer_weight);
640             }
641         }
642 
643         dst->fImage = dp;
644         // if need be, alloc the "real" dst (same size as src) and copy/merge
645         // the blur into it (applying the src)
646         if (style == kInner_Style) {
647             // now we allocate the "real" dst, mirror the size of src
648             size_t srcSize = src.computeImageSize();
649             if (0 == srcSize) {
650                 return false;   // too big to allocate, abort
651             }
652             dst->fImage = SkMask::AllocImage(srcSize);
653             merge_src_with_blur(dst->fImage, src.fRowBytes,
654                                 sp, src.fRowBytes,
655                                 dp + passCount * (rx + ry * dst->fRowBytes),
656                                 dst->fRowBytes, sw, sh);
657             SkMask::FreeImage(dp);
658         } else if (style != kNormal_Style) {
659             clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
660                             dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
661         }
662         (void)autoCall.detach();
663     }
664 
665     if (style == kInner_Style) {
666         dst->fBounds = src.fBounds; // restore trimmed bounds
667         dst->fRowBytes = src.fRowBytes;
668     }
669 
670     return true;
671 }
672 
673