1
2 /*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
9
10 #include "SkBlurMask.h"
11 #include "SkMath.h"
12 #include "SkTemplates.h"
13 #include "SkEndian.h"
14
15 // Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
16 // breakeven on Mac, and ~15% slowdown on Linux.
17 // Reading a word at a time when bulding the sum buffer seems to give
18 // us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
19 #if defined(SK_BUILD_FOR_WIN32)
20 #define UNROLL_KERNEL_LOOP 1
21 #endif
22
23 /** The sum buffer is an array of u32 to hold the accumulated sum of all of the
24 src values at their position, plus all values above and to the left.
25 When we sample into this buffer, we need an initial row and column of 0s,
26 so we have an index correspondence as follows:
27
28 src[i, j] == sum[i+1, j+1]
29 sum[0, j] == sum[i, 0] == 0
30
31 We assume that the sum buffer's stride == its width
32 */
build_sum_buffer(uint32_t sum[],int srcW,int srcH,const uint8_t src[],int srcRB)33 static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
34 const uint8_t src[], int srcRB) {
35 int sumW = srcW + 1;
36
37 SkASSERT(srcRB >= srcW);
38 // mod srcRB so we can apply it after each row
39 srcRB -= srcW;
40
41 int x, y;
42
43 // zero out the top row and column
44 memset(sum, 0, sumW * sizeof(sum[0]));
45 sum += sumW;
46
47 // special case first row
48 uint32_t X = 0;
49 *sum++ = 0; // initialze the first column to 0
50 for (x = srcW - 1; x >= 0; --x) {
51 X = *src++ + X;
52 *sum++ = X;
53 }
54 src += srcRB;
55
56 // now do the rest of the rows
57 for (y = srcH - 1; y > 0; --y) {
58 uint32_t L = 0;
59 uint32_t C = 0;
60 *sum++ = 0; // initialze the first column to 0
61
62 for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
63 uint32_t T = sum[-sumW];
64 X = *src++ + L + T - C;
65 *sum++ = X;
66 L = X;
67 C = T;
68 }
69
70 for (; x >= 4; x-=4) {
71 uint32_t T = sum[-sumW];
72 X = *src++ + L + T - C;
73 *sum++ = X;
74 L = X;
75 C = T;
76 T = sum[-sumW];
77 X = *src++ + L + T - C;
78 *sum++ = X;
79 L = X;
80 C = T;
81 T = sum[-sumW];
82 X = *src++ + L + T - C;
83 *sum++ = X;
84 L = X;
85 C = T;
86 T = sum[-sumW];
87 X = *src++ + L + T - C;
88 *sum++ = X;
89 L = X;
90 C = T;
91 }
92
93 for (; x >= 0; --x) {
94 uint32_t T = sum[-sumW];
95 X = *src++ + L + T - C;
96 *sum++ = X;
97 L = X;
98 C = T;
99 }
100 src += srcRB;
101 }
102 }
103
104 /**
105 * This is the path for apply_kernel() to be taken when the kernel
106 * is wider than the source image.
107 */
kernel_clamped(uint8_t dst[],int rx,int ry,const uint32_t sum[],int sw,int sh)108 static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
109 int sw, int sh) {
110 SkASSERT(2*rx > sw);
111
112 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
113
114 int sumStride = sw + 1;
115
116 int dw = sw + 2*rx;
117 int dh = sh + 2*ry;
118
119 int prev_y = -2*ry;
120 int next_y = 1;
121
122 for (int y = 0; y < dh; y++) {
123 int py = SkClampPos(prev_y) * sumStride;
124 int ny = SkFastMin32(next_y, sh) * sumStride;
125
126 int prev_x = -2*rx;
127 int next_x = 1;
128
129 for (int x = 0; x < dw; x++) {
130 int px = SkClampPos(prev_x);
131 int nx = SkFastMin32(next_x, sw);
132
133 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
134 *dst++ = SkToU8(tmp * scale >> 24);
135
136 prev_x += 1;
137 next_x += 1;
138 }
139
140 prev_y += 1;
141 next_y += 1;
142 }
143 }
144 /**
145 * sw and sh are the width and height of the src. Since the sum buffer
146 * matches that, but has an extra row and col at the beginning (with zeros),
147 * we can just use sw and sh as our "max" values for pinning coordinates
148 * when sampling into sum[][]
149 *
150 * The inner loop is conceptually simple; we break it into several sections
151 * to improve performance. Here's the original version:
152 for (int x = 0; x < dw; x++) {
153 int px = SkClampPos(prev_x);
154 int nx = SkFastMin32(next_x, sw);
155
156 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
157 *dst++ = SkToU8(tmp * scale >> 24);
158
159 prev_x += 1;
160 next_x += 1;
161 }
162 * The sections are:
163 * left-hand section, where prev_x is clamped to 0
164 * center section, where neither prev_x nor next_x is clamped
165 * right-hand section, where next_x is clamped to sw
166 * On some operating systems, the center section is unrolled for additional
167 * speedup.
168 */
apply_kernel(uint8_t dst[],int rx,int ry,const uint32_t sum[],int sw,int sh)169 static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
170 int sw, int sh) {
171 if (2*rx > sw) {
172 kernel_clamped(dst, rx, ry, sum, sw, sh);
173 return;
174 }
175
176 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
177
178 int sumStride = sw + 1;
179
180 int dw = sw + 2*rx;
181 int dh = sh + 2*ry;
182
183 int prev_y = -2*ry;
184 int next_y = 1;
185
186 SkASSERT(2*rx <= dw - 2*rx);
187
188 for (int y = 0; y < dh; y++) {
189 int py = SkClampPos(prev_y) * sumStride;
190 int ny = SkFastMin32(next_y, sh) * sumStride;
191
192 int prev_x = -2*rx;
193 int next_x = 1;
194 int x = 0;
195
196 for (; x < 2*rx; x++) {
197 SkASSERT(prev_x <= 0);
198 SkASSERT(next_x <= sw);
199
200 int px = 0;
201 int nx = next_x;
202
203 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
204 *dst++ = SkToU8(tmp * scale >> 24);
205
206 prev_x += 1;
207 next_x += 1;
208 }
209
210 int i0 = prev_x + py;
211 int i1 = next_x + ny;
212 int i2 = next_x + py;
213 int i3 = prev_x + ny;
214
215 #if UNROLL_KERNEL_LOOP
216 for (; x < dw - 2*rx - 4; x += 4) {
217 SkASSERT(prev_x >= 0);
218 SkASSERT(next_x <= sw);
219
220 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
221 *dst++ = SkToU8(tmp * scale >> 24);
222 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
223 *dst++ = SkToU8(tmp * scale >> 24);
224 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
225 *dst++ = SkToU8(tmp * scale >> 24);
226 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
227 *dst++ = SkToU8(tmp * scale >> 24);
228
229 prev_x += 4;
230 next_x += 4;
231 }
232 #endif
233
234 for (; x < dw - 2*rx; x++) {
235 SkASSERT(prev_x >= 0);
236 SkASSERT(next_x <= sw);
237
238 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
239 *dst++ = SkToU8(tmp * scale >> 24);
240
241 prev_x += 1;
242 next_x += 1;
243 }
244
245 for (; x < dw; x++) {
246 SkASSERT(prev_x >= 0);
247 SkASSERT(next_x > sw);
248
249 int px = prev_x;
250 int nx = sw;
251
252 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
253 *dst++ = SkToU8(tmp * scale >> 24);
254
255 prev_x += 1;
256 next_x += 1;
257 }
258
259 prev_y += 1;
260 next_y += 1;
261 }
262 }
263
264 /**
265 * This is the path for apply_kernel_interp() to be taken when the kernel
266 * is wider than the source image.
267 */
kernel_interp_clamped(uint8_t dst[],int rx,int ry,const uint32_t sum[],int sw,int sh,U8CPU outer_weight)268 static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
269 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
270 SkASSERT(2*rx > sw);
271
272 int inner_weight = 255 - outer_weight;
273
274 // round these guys up if they're bigger than 127
275 outer_weight += outer_weight >> 7;
276 inner_weight += inner_weight >> 7;
277
278 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
279 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
280
281 int sumStride = sw + 1;
282
283 int dw = sw + 2*rx;
284 int dh = sh + 2*ry;
285
286 int prev_y = -2*ry;
287 int next_y = 1;
288
289 for (int y = 0; y < dh; y++) {
290 int py = SkClampPos(prev_y) * sumStride;
291 int ny = SkFastMin32(next_y, sh) * sumStride;
292
293 int ipy = SkClampPos(prev_y + 1) * sumStride;
294 int iny = SkClampMax(next_y - 1, sh) * sumStride;
295
296 int prev_x = -2*rx;
297 int next_x = 1;
298
299 for (int x = 0; x < dw; x++) {
300 int px = SkClampPos(prev_x);
301 int nx = SkFastMin32(next_x, sw);
302
303 int ipx = SkClampPos(prev_x + 1);
304 int inx = SkClampMax(next_x - 1, sw);
305
306 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
307 - sum[nx+py] - sum[px+ny];
308 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
309 - sum[inx+ipy] - sum[ipx+iny];
310 *dst++ = SkToU8((outer_sum * outer_scale
311 + inner_sum * inner_scale) >> 24);
312
313 prev_x += 1;
314 next_x += 1;
315 }
316 prev_y += 1;
317 next_y += 1;
318 }
319 }
320
321 /**
322 * sw and sh are the width and height of the src. Since the sum buffer
323 * matches that, but has an extra row and col at the beginning (with zeros),
324 * we can just use sw and sh as our "max" values for pinning coordinates
325 * when sampling into sum[][]
326 *
327 * The inner loop is conceptually simple; we break it into several variants
328 * to improve performance. Here's the original version:
329 for (int x = 0; x < dw; x++) {
330 int px = SkClampPos(prev_x);
331 int nx = SkFastMin32(next_x, sw);
332
333 int ipx = SkClampPos(prev_x + 1);
334 int inx = SkClampMax(next_x - 1, sw);
335
336 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
337 - sum[nx+py] - sum[px+ny];
338 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
339 - sum[inx+ipy] - sum[ipx+iny];
340 *dst++ = SkToU8((outer_sum * outer_scale
341 + inner_sum * inner_scale) >> 24);
342
343 prev_x += 1;
344 next_x += 1;
345 }
346 * The sections are:
347 * left-hand section, where prev_x is clamped to 0
348 * center section, where neither prev_x nor next_x is clamped
349 * right-hand section, where next_x is clamped to sw
350 * On some operating systems, the center section is unrolled for additional
351 * speedup.
352 */
apply_kernel_interp(uint8_t dst[],int rx,int ry,const uint32_t sum[],int sw,int sh,U8CPU outer_weight)353 static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
354 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
355 SkASSERT(rx > 0 && ry > 0);
356 SkASSERT(outer_weight <= 255);
357
358 if (2*rx > sw) {
359 kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
360 return;
361 }
362
363 int inner_weight = 255 - outer_weight;
364
365 // round these guys up if they're bigger than 127
366 outer_weight += outer_weight >> 7;
367 inner_weight += inner_weight >> 7;
368
369 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
370 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
371
372 int sumStride = sw + 1;
373
374 int dw = sw + 2*rx;
375 int dh = sh + 2*ry;
376
377 int prev_y = -2*ry;
378 int next_y = 1;
379
380 SkASSERT(2*rx <= dw - 2*rx);
381
382 for (int y = 0; y < dh; y++) {
383 int py = SkClampPos(prev_y) * sumStride;
384 int ny = SkFastMin32(next_y, sh) * sumStride;
385
386 int ipy = SkClampPos(prev_y + 1) * sumStride;
387 int iny = SkClampMax(next_y - 1, sh) * sumStride;
388
389 int prev_x = -2*rx;
390 int next_x = 1;
391 int x = 0;
392
393 for (; x < 2*rx; x++) {
394 SkASSERT(prev_x < 0);
395 SkASSERT(next_x <= sw);
396
397 int px = 0;
398 int nx = next_x;
399
400 int ipx = 0;
401 int inx = next_x - 1;
402
403 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
404 - sum[nx+py] - sum[px+ny];
405 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
406 - sum[inx+ipy] - sum[ipx+iny];
407 *dst++ = SkToU8((outer_sum * outer_scale
408 + inner_sum * inner_scale) >> 24);
409
410 prev_x += 1;
411 next_x += 1;
412 }
413
414 int i0 = prev_x + py;
415 int i1 = next_x + ny;
416 int i2 = next_x + py;
417 int i3 = prev_x + ny;
418 int i4 = prev_x + 1 + ipy;
419 int i5 = next_x - 1 + iny;
420 int i6 = next_x - 1 + ipy;
421 int i7 = prev_x + 1 + iny;
422
423 #if UNROLL_KERNEL_LOOP
424 for (; x < dw - 2*rx - 4; x += 4) {
425 SkASSERT(prev_x >= 0);
426 SkASSERT(next_x <= sw);
427
428 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
429 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
430 *dst++ = SkToU8((outer_sum * outer_scale
431 + inner_sum * inner_scale) >> 24);
432 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
433 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
434 *dst++ = SkToU8((outer_sum * outer_scale
435 + inner_sum * inner_scale) >> 24);
436 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
437 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
438 *dst++ = SkToU8((outer_sum * outer_scale
439 + inner_sum * inner_scale) >> 24);
440 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
441 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
442 *dst++ = SkToU8((outer_sum * outer_scale
443 + inner_sum * inner_scale) >> 24);
444
445 prev_x += 4;
446 next_x += 4;
447 }
448 #endif
449
450 for (; x < dw - 2*rx; x++) {
451 SkASSERT(prev_x >= 0);
452 SkASSERT(next_x <= sw);
453
454 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
455 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
456 *dst++ = SkToU8((outer_sum * outer_scale
457 + inner_sum * inner_scale) >> 24);
458
459 prev_x += 1;
460 next_x += 1;
461 }
462
463 for (; x < dw; x++) {
464 SkASSERT(prev_x >= 0);
465 SkASSERT(next_x > sw);
466
467 int px = prev_x;
468 int nx = sw;
469
470 int ipx = prev_x + 1;
471 int inx = sw;
472
473 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
474 - sum[nx+py] - sum[px+ny];
475 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
476 - sum[inx+ipy] - sum[ipx+iny];
477 *dst++ = SkToU8((outer_sum * outer_scale
478 + inner_sum * inner_scale) >> 24);
479
480 prev_x += 1;
481 next_x += 1;
482 }
483
484 prev_y += 1;
485 next_y += 1;
486 }
487 }
488
489 #include "SkColorPriv.h"
490
merge_src_with_blur(uint8_t dst[],int dstRB,const uint8_t src[],int srcRB,const uint8_t blur[],int blurRB,int sw,int sh)491 static void merge_src_with_blur(uint8_t dst[], int dstRB,
492 const uint8_t src[], int srcRB,
493 const uint8_t blur[], int blurRB,
494 int sw, int sh) {
495 dstRB -= sw;
496 srcRB -= sw;
497 blurRB -= sw;
498 while (--sh >= 0) {
499 for (int x = sw - 1; x >= 0; --x) {
500 *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
501 dst += 1;
502 src += 1;
503 blur += 1;
504 }
505 dst += dstRB;
506 src += srcRB;
507 blur += blurRB;
508 }
509 }
510
clamp_with_orig(uint8_t dst[],int dstRowBytes,const uint8_t src[],int srcRowBytes,int sw,int sh,SkBlurMask::Style style)511 static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
512 const uint8_t src[], int srcRowBytes,
513 int sw, int sh,
514 SkBlurMask::Style style) {
515 int x;
516 while (--sh >= 0) {
517 switch (style) {
518 case SkBlurMask::kSolid_Style:
519 for (x = sw - 1; x >= 0; --x) {
520 int s = *src;
521 int d = *dst;
522 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
523 dst += 1;
524 src += 1;
525 }
526 break;
527 case SkBlurMask::kOuter_Style:
528 for (x = sw - 1; x >= 0; --x) {
529 if (*src) {
530 *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
531 }
532 dst += 1;
533 src += 1;
534 }
535 break;
536 default:
537 SkDEBUGFAIL("Unexpected blur style here");
538 break;
539 }
540 dst += dstRowBytes - sw;
541 src += srcRowBytes - sw;
542 }
543 }
544
545 ///////////////////////////////////////////////////////////////////////////////
546
547 // we use a local funciton to wrap the class static method to work around
548 // a bug in gcc98
549 void SkMask_FreeImage(uint8_t* image);
SkMask_FreeImage(uint8_t * image)550 void SkMask_FreeImage(uint8_t* image) {
551 SkMask::FreeImage(image);
552 }
553
Blur(SkMask * dst,const SkMask & src,SkScalar radius,Style style,Quality quality,SkIPoint * margin)554 bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
555 SkScalar radius, Style style, Quality quality,
556 SkIPoint* margin)
557 {
558 if (src.fFormat != SkMask::kA8_Format) {
559 return false;
560 }
561
562 // Force high quality off for small radii (performance)
563 if (radius < SkIntToScalar(3)) quality = kLow_Quality;
564
565 // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
566 int passCount = (quality == kHigh_Quality) ? 3 : 1;
567 SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount)));
568
569 int rx = SkScalarCeil(passRadius);
570 int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
571
572 SkASSERT(rx >= 0);
573 SkASSERT((unsigned)outer_weight <= 255);
574 if (rx <= 0) {
575 return false;
576 }
577
578 int ry = rx; // only do square blur for now
579
580 int padx = passCount * rx;
581 int pady = passCount * ry;
582 if (margin) {
583 margin->set(padx, pady);
584 }
585 dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
586 src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
587 dst->fRowBytes = dst->fBounds.width();
588 dst->fFormat = SkMask::kA8_Format;
589 dst->fImage = NULL;
590
591 if (src.fImage) {
592 size_t dstSize = dst->computeImageSize();
593 if (0 == dstSize) {
594 return false; // too big to allocate, abort
595 }
596
597 int sw = src.fBounds.width();
598 int sh = src.fBounds.height();
599 const uint8_t* sp = src.fImage;
600 uint8_t* dp = SkMask::AllocImage(dstSize);
601
602 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
603
604 // build the blurry destination
605 {
606 const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
607 const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
608 SkAutoTMalloc<uint32_t> storage(storageW * storageH);
609 uint32_t* sumBuffer = storage.get();
610
611 //pass1: sp is source, dp is destination
612 build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
613 if (outer_weight == 255) {
614 apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
615 } else {
616 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
617 }
618
619 if (quality == kHigh_Quality) {
620 //pass2: dp is source, tmpBuffer is destination
621 int tmp_sw = sw + 2 * rx;
622 int tmp_sh = sh + 2 * ry;
623 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
624 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
625 if (outer_weight == 255)
626 apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
627 else
628 apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
629 tmp_sw, tmp_sh, outer_weight);
630
631 //pass3: tmpBuffer is source, dp is destination
632 tmp_sw += 2 * rx;
633 tmp_sh += 2 * ry;
634 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
635 if (outer_weight == 255)
636 apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
637 else
638 apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
639 outer_weight);
640 }
641 }
642
643 dst->fImage = dp;
644 // if need be, alloc the "real" dst (same size as src) and copy/merge
645 // the blur into it (applying the src)
646 if (style == kInner_Style) {
647 // now we allocate the "real" dst, mirror the size of src
648 size_t srcSize = src.computeImageSize();
649 if (0 == srcSize) {
650 return false; // too big to allocate, abort
651 }
652 dst->fImage = SkMask::AllocImage(srcSize);
653 merge_src_with_blur(dst->fImage, src.fRowBytes,
654 sp, src.fRowBytes,
655 dp + passCount * (rx + ry * dst->fRowBytes),
656 dst->fRowBytes, sw, sh);
657 SkMask::FreeImage(dp);
658 } else if (style != kNormal_Style) {
659 clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
660 dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
661 }
662 (void)autoCall.detach();
663 }
664
665 if (style == kInner_Style) {
666 dst->fBounds = src.fBounds; // restore trimmed bounds
667 dst->fRowBytes = src.fRowBytes;
668 }
669
670 return true;
671 }
672
673