1 /*
2 * Copyright 2011 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include <algorithm>
9
10 #include "include/core/SkBitmap.h"
11 #include "include/core/SkTileMode.h"
12 #include "include/effects/SkImageFilters.h"
13 #include "include/private/SkColorData.h"
14 #include "include/private/SkTFitsIn.h"
15 #include "include/private/SkTPin.h"
16 #include "include/private/SkVx.h"
17 #include "src/core/SkArenaAlloc.h"
18 #include "src/core/SkAutoPixmapStorage.h"
19 #include "src/core/SkGpuBlurUtils.h"
20 #include "src/core/SkImageFilter_Base.h"
21 #include "src/core/SkOpts.h"
22 #include "src/core/SkReadBuffer.h"
23 #include "src/core/SkSpecialImage.h"
24 #include "src/core/SkWriteBuffer.h"
25
26 #if SK_SUPPORT_GPU
27 #include "src/gpu/GrTextureProxy.h"
28 #include "src/gpu/SkGr.h"
29 #if SK_GPU_V1
30 #include "src/gpu/v1/SurfaceDrawContext_v1.h"
31 #endif // SK_GPU_V1
32 #endif // SK_SUPPORT_GPU
33
34 namespace {
35
36 class SkBlurImageFilter final : public SkImageFilter_Base {
37 public:
SkBlurImageFilter(SkScalar sigmaX,SkScalar sigmaY,SkTileMode tileMode,sk_sp<SkImageFilter> input,const SkRect * cropRect)38 SkBlurImageFilter(SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode,
39 sk_sp<SkImageFilter> input, const SkRect* cropRect)
40 : INHERITED(&input, 1, cropRect)
41 , fSigma{sigmaX, sigmaY}
42 , fTileMode(tileMode) {}
43
44 SkRect computeFastBounds(const SkRect&) const override;
45
46 protected:
47 void flatten(SkWriteBuffer&) const override;
48 sk_sp<SkSpecialImage> onFilterImage(const Context&, SkIPoint* offset) const override;
49 SkIRect onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm,
50 MapDirection, const SkIRect* inputRect) const override;
51
52 private:
53 friend void ::SkRegisterBlurImageFilterFlattenable();
54 SK_FLATTENABLE_HOOKS(SkBlurImageFilter)
55
56 #if SK_SUPPORT_GPU
57 sk_sp<SkSpecialImage> gpuFilter(
58 const Context& ctx, SkVector sigma,
59 const sk_sp<SkSpecialImage> &input,
60 SkIRect inputBounds, SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const;
61 #endif
62
63 SkSize fSigma;
64 SkTileMode fTileMode;
65
66 using INHERITED = SkImageFilter_Base;
67 };
68
69 } // end namespace
70
Blur(SkScalar sigmaX,SkScalar sigmaY,SkTileMode tileMode,sk_sp<SkImageFilter> input,const CropRect & cropRect)71 sk_sp<SkImageFilter> SkImageFilters::Blur(
72 SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, sk_sp<SkImageFilter> input,
73 const CropRect& cropRect) {
74 if (sigmaX < SK_ScalarNearlyZero && sigmaY < SK_ScalarNearlyZero && !cropRect) {
75 return input;
76 }
77 return sk_sp<SkImageFilter>(
78 new SkBlurImageFilter(sigmaX, sigmaY, tileMode, input, cropRect));
79 }
80
SkRegisterBlurImageFilterFlattenable()81 void SkRegisterBlurImageFilterFlattenable() {
82 SK_REGISTER_FLATTENABLE(SkBlurImageFilter);
83 SkFlattenable::Register("SkBlurImageFilterImpl", SkBlurImageFilter::CreateProc);
84 }
85
CreateProc(SkReadBuffer & buffer)86 sk_sp<SkFlattenable> SkBlurImageFilter::CreateProc(SkReadBuffer& buffer) {
87 SK_IMAGEFILTER_UNFLATTEN_COMMON(common, 1);
88 SkScalar sigmaX = buffer.readScalar();
89 SkScalar sigmaY = buffer.readScalar();
90 SkTileMode tileMode = buffer.read32LE(SkTileMode::kLastTileMode);
91 return SkImageFilters::Blur(
92 sigmaX, sigmaY, tileMode, common.getInput(0), common.cropRect());
93 }
94
flatten(SkWriteBuffer & buffer) const95 void SkBlurImageFilter::flatten(SkWriteBuffer& buffer) const {
96 this->INHERITED::flatten(buffer);
97 buffer.writeScalar(fSigma.fWidth);
98 buffer.writeScalar(fSigma.fHeight);
99
100 SkASSERT(fTileMode <= SkTileMode::kLastTileMode);
101 buffer.writeInt(static_cast<int>(fTileMode));
102 }
103
104 ///////////////////////////////////////////////////////////////////////////////
105
106 namespace {
107 // This is defined by the SVG spec:
108 // https://drafts.fxtf.org/filter-effects/#feGaussianBlurElement
calculate_window(double sigma)109 int calculate_window(double sigma) {
110 auto possibleWindow = static_cast<int>(floor(sigma * 3 * sqrt(2 * SK_DoublePI) / 4 + 0.5));
111 return std::max(1, possibleWindow);
112 }
113
114 class Pass {
115 public:
Pass(int border)116 explicit Pass(int border) : fBorder(border) {}
117 virtual ~Pass() = default;
118
blur(int srcLeft,int srcRight,int dstRight,const uint32_t * src,int srcStride,uint32_t * dst,int dstStride)119 void blur(int srcLeft, int srcRight, int dstRight,
120 const uint32_t* src, int srcStride,
121 uint32_t* dst, int dstStride) {
122 this->startBlur();
123
124 auto srcStart = srcLeft - fBorder,
125 srcEnd = srcRight - fBorder,
126 dstEnd = dstRight,
127 srcIdx = srcStart,
128 dstIdx = 0;
129
130 const uint32_t* srcCursor = src;
131 uint32_t* dstCursor = dst;
132
133 if (dstIdx < srcIdx) {
134 // The destination pixels are not effected by the src pixels,
135 // change to zero as per the spec.
136 // https://drafts.fxtf.org/filter-effects/#FilterPrimitivesOverviewIntro
137 while (dstIdx < srcIdx) {
138 *dstCursor = 0;
139 dstCursor += dstStride;
140 SK_PREFETCH(dstCursor);
141 dstIdx++;
142 }
143 } else if (srcIdx < dstIdx) {
144 // The edge of the source is before the edge of the destination. Calculate the sums for
145 // the pixels before the start of the destination.
146 if (int commonEnd = std::min(dstIdx, srcEnd); srcIdx < commonEnd) {
147 // Preload the blur with values from src before dst is entered.
148 int n = commonEnd - srcIdx;
149 this->blurSegment(n, srcCursor, srcStride, nullptr, 0);
150 srcIdx += n;
151 srcCursor += n * srcStride;
152 }
153 if (srcIdx < dstIdx) {
154 // The weird case where src is out of pixels before dst is even started.
155 int n = dstIdx - srcIdx;
156 this->blurSegment(n, nullptr, 0, nullptr, 0);
157 srcIdx += n;
158 }
159 }
160
161 // Both srcIdx and dstIdx are in sync now, and can run in a 1:1 fashion. This is the
162 // normal mode of operation.
163 SkASSERT(srcIdx == dstIdx);
164 if (int commonEnd = std::min(dstEnd, srcEnd); dstIdx < commonEnd) {
165 int n = commonEnd - dstIdx;
166 this->blurSegment(n, srcCursor, srcStride, dstCursor, dstStride);
167 srcCursor += n * srcStride;
168 dstCursor += n * dstStride;
169 dstIdx += n;
170 srcIdx += n;
171 }
172
173 // Drain the remaining blur values into dst assuming 0's for the leading edge.
174 if (dstIdx < dstEnd) {
175 int n = dstEnd - dstIdx;
176 this->blurSegment(n, nullptr, 0, dstCursor, dstStride);
177 }
178 }
179
180 protected:
181 virtual void startBlur() = 0;
182 virtual void blurSegment(
183 int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) = 0;
184
185 private:
186 const int fBorder;
187 };
188
189 class PassMaker {
190 public:
PassMaker(int window)191 explicit PassMaker(int window) : fWindow{window} {}
192 virtual ~PassMaker() = default;
193 virtual Pass* makePass(void* buffer, SkArenaAlloc* alloc) const = 0;
194 virtual size_t bufferSizeBytes() const = 0;
window() const195 int window() const {return fWindow;}
196
197 private:
198 const int fWindow;
199 };
200
201 // Implement a scanline processor that uses a three-box filter to approximate a Gaussian blur.
202 // The GaussPass is limit to processing sigmas < 135.
203 class GaussPass final : public Pass {
204 public:
205 // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
206 // using the Gauss filter. It also limits the size of buffers used hold intermediate values.
207 // Explanation of maximums:
208 // sum0 = window * 255
209 // sum1 = window * sum0 -> window * window * 255
210 // sum2 = window * sum1 -> window * window * window * 255 -> window^3 * 255
211 //
212 // The value window^3 * 255 must fit in a uint32_t. So,
213 // window^3 < 2^32. window = 255.
214 //
215 // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
216 // For window <= 255, the largest value for sigma is 136.
MakeMaker(double sigma,SkArenaAlloc * alloc)217 static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
218 SkASSERT(0 <= sigma);
219 int window = calculate_window(sigma);
220 if (255 <= window) {
221 return nullptr;
222 }
223
224 class Maker : public PassMaker {
225 public:
226 explicit Maker(int window) : PassMaker{window} {}
227 Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
228 return GaussPass::Make(this->window(), buffer, alloc);
229 }
230
231 size_t bufferSizeBytes() const override {
232 int window = this->window();
233 size_t onePassSize = window - 1;
234 // If the window is odd, then there is an obvious middle element. For even sizes
235 // 2 passes are shifted, and the last pass has an extra element. Like this:
236 // S
237 // aaaAaa
238 // bbBbbb
239 // cccCccc
240 // D
241 size_t bufferCount = (window & 1) == 1 ? 3 * onePassSize : 3 * onePassSize + 1;
242 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
243 }
244 };
245
246 return alloc->make<Maker>(window);
247 }
248
Make(int window,void * buffers,SkArenaAlloc * alloc)249 static GaussPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
250 // We don't need to store the trailing edge pixel in the buffer;
251 int passSize = window - 1;
252 skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
253 skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
254 skvx::Vec<4, uint32_t>* buffer2 = buffer1 + passSize;
255 // If the window is odd just one buffer is needed, but if it's even, then there is one
256 // more element on that pass.
257 skvx::Vec<4, uint32_t>* buffersEnd = buffer2 + ((window & 1) ? passSize : passSize + 1);
258
259 // Calculating the border is tricky. The border is the distance in pixels between the first
260 // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
261 // I will go through the odd case which is simpler, and then through the even case. Given a
262 // stack of filters seven wide for the odd case of three passes.
263 //
264 // S
265 // aaaAaaa
266 // bbbBbbb
267 // cccCccc
268 // D
269 //
270 // The furthest changed pixel is when the filters are in the following configuration.
271 //
272 // S
273 // aaaAaaa
274 // bbbBbbb
275 // cccCccc
276 // D
277 //
278 // The A pixel is calculated using the value S, the B uses A, and the C uses B, and
279 // finally D is C. So, with a window size of seven the border is nine. In the odd case, the
280 // border is 3*((window - 1)/2).
281 //
282 // For even cases the filter stack is more complicated. The spec specifies two passes
283 // of even filters and a final pass of odd filters. A stack for a width of six looks like
284 // this.
285 //
286 // S
287 // aaaAaa
288 // bbBbbb
289 // cccCccc
290 // D
291 //
292 // The furthest pixel looks like this.
293 //
294 // S
295 // aaaAaa
296 // bbBbbb
297 // cccCccc
298 // D
299 //
300 // For a window of six, the border value is eight. In the even case the border is 3 *
301 // (window/2) - 1.
302 int border = (window & 1) == 1 ? 3 * ((window - 1) / 2) : 3 * (window / 2) - 1;
303
304 // If the window is odd then the divisor is just window ^ 3 otherwise,
305 // it is window * window * (window + 1) = window ^ 3 + window ^ 2;
306 int window2 = window * window;
307 int window3 = window2 * window;
308 int divisor = (window & 1) == 1 ? window3 : window3 + window2;
309 return alloc->make<GaussPass>(buffer0, buffer1, buffer2, buffersEnd, border, divisor);
310 }
311
GaussPass(skvx::Vec<4,uint32_t> * buffer0,skvx::Vec<4,uint32_t> * buffer1,skvx::Vec<4,uint32_t> * buffer2,skvx::Vec<4,uint32_t> * buffersEnd,int border,int divisor)312 GaussPass(skvx::Vec<4, uint32_t>* buffer0,
313 skvx::Vec<4, uint32_t>* buffer1,
314 skvx::Vec<4, uint32_t>* buffer2,
315 skvx::Vec<4, uint32_t>* buffersEnd,
316 int border,
317 int divisor)
318 : Pass{border}
319 , fBuffer0{buffer0}
320 , fBuffer1{buffer1}
321 , fBuffer2{buffer2}
322 , fBuffersEnd{buffersEnd}
323 , fDivider(divisor) {}
324
325 private:
startBlur()326 void startBlur() override {
327 skvx::Vec<4, uint32_t> zero = {0u, 0u, 0u, 0u};
328 zero.store(fSum0);
329 zero.store(fSum1);
330 auto half = fDivider.half();
331 skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum2);
332 sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
333
334 fBuffer0Cursor = fBuffer0;
335 fBuffer1Cursor = fBuffer1;
336 fBuffer2Cursor = fBuffer2;
337 }
338
339 // GaussPass implements the common three pass box filter approximation of Gaussian blur,
340 // but combines all three passes into a single pass. This approach is facilitated by three
341 // circular buffers the width of the window which track values for trailing edges of each of
342 // the three passes. This allows the algorithm to use more precision in the calculation
343 // because the values are not rounded each pass. And this implementation also avoids a trap
344 // that's easy to fall into resulting in blending in too many zeroes near the edge.
345 //
346 // In general, a window sum has the form:
347 // sum_n+1 = sum_n + leading_edge - trailing_edge.
348 // If instead we do the subtraction at the end of the previous iteration, we can just
349 // calculate the sums instead of having to do the subtractions too.
350 //
351 // In previous iteration:
352 // sum_n+1 = sum_n - trailing_edge.
353 //
354 // In this iteration:
355 // sum_n+1 = sum_n + leading_edge.
356 //
357 // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
358 // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
359 // three passes at the same time has the form:
360 //
361 // sum0_n+1 = sum0_n + leading edge
362 // sum1_n+1 = sum1_n + sum0_n+1
363 // sum2_n+1 = sum2_n + sum1_n+1
364 //
365 // sum2_n+1 / window^3 is the new value of the destination pixel.
366 //
367 // Reduce the sums by the trailing edges which were stored in the circular buffers for the
368 // next go around. This is the case for odd sized windows, even windows the the third
369 // circular buffer is one larger then the first two circular buffers.
370 //
371 // sum2_n+2 = sum2_n+1 - buffer2[i];
372 // buffer2[i] = sum1;
373 // sum1_n+2 = sum1_n+1 - buffer1[i];
374 // buffer1[i] = sum0;
375 // sum0_n+2 = sum0_n+1 - buffer0[i];
376 // buffer0[i] = leading edge
blurSegment(int n,const uint32_t * src,int srcStride,uint32_t * dst,int dstStride)377 void blurSegment(
378 int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
379 skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
380 skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
381 skvx::Vec<4, uint32_t>* buffer2Cursor = fBuffer2Cursor;
382 skvx::Vec<4, uint32_t> sum0 = skvx::Vec<4, uint32_t>::Load(fSum0);
383 skvx::Vec<4, uint32_t> sum1 = skvx::Vec<4, uint32_t>::Load(fSum1);
384 skvx::Vec<4, uint32_t> sum2 = skvx::Vec<4, uint32_t>::Load(fSum2);
385
386 // Given an expanded input pixel, move the window ahead using the leadingEdge value.
387 auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
388 sum0 += leadingEdge;
389 sum1 += sum0;
390 sum2 += sum1;
391
392 skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum2);
393
394 sum2 -= *buffer2Cursor;
395 *buffer2Cursor = sum1;
396 buffer2Cursor = (buffer2Cursor + 1) < fBuffersEnd ? buffer2Cursor + 1 : fBuffer2;
397 sum1 -= *buffer1Cursor;
398 *buffer1Cursor = sum0;
399 buffer1Cursor = (buffer1Cursor + 1) < fBuffer2 ? buffer1Cursor + 1 : fBuffer1;
400 sum0 -= *buffer0Cursor;
401 *buffer0Cursor = leadingEdge;
402 buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
403
404 return skvx::cast<uint8_t>(blurred);
405 };
406
407 auto loadEdge = [&](const uint32_t* srcCursor) {
408 return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
409 };
410
411 if (!src && !dst) {
412 while (n --> 0) {
413 (void)processValue(0);
414 }
415 } else if (src && !dst) {
416 while (n --> 0) {
417 (void)processValue(loadEdge(src));
418 src += srcStride;
419 }
420 } else if (!src && dst) {
421 while (n --> 0) {
422 processValue(0u).store(dst);
423 dst += dstStride;
424 }
425 } else if (src && dst) {
426 while (n --> 0) {
427 processValue(loadEdge(src)).store(dst);
428 src += srcStride;
429 dst += dstStride;
430 }
431 }
432
433 // Store the state
434 fBuffer0Cursor = buffer0Cursor;
435 fBuffer1Cursor = buffer1Cursor;
436 fBuffer2Cursor = buffer2Cursor;
437
438 sum0.store(fSum0);
439 sum1.store(fSum1);
440 sum2.store(fSum2);
441 }
442
443 skvx::Vec<4, uint32_t>* const fBuffer0;
444 skvx::Vec<4, uint32_t>* const fBuffer1;
445 skvx::Vec<4, uint32_t>* const fBuffer2;
446 skvx::Vec<4, uint32_t>* const fBuffersEnd;
447 const skvx::ScaledDividerU32 fDivider;
448
449 // blur state
450 char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
451 char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
452 char fSum2[sizeof(skvx::Vec<4, uint32_t>)];
453 skvx::Vec<4, uint32_t>* fBuffer0Cursor;
454 skvx::Vec<4, uint32_t>* fBuffer1Cursor;
455 skvx::Vec<4, uint32_t>* fBuffer2Cursor;
456 };
457
458 // Implement a scanline processor that uses a two-box filter to approximate a Tent filter.
459 // The TentPass is limit to processing sigmas < 2183.
460 class TentPass final : public Pass {
461 public:
462 // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
463 // using the Gauss filter. It also limits the size of buffers used hold intermediate values.
464 // Explanation of maximums:
465 // sum0 = window * 255
466 // sum1 = window * sum0 -> window * window * 255
467 //
468 // The value window^2 * 255 must fit in a uint32_t. So,
469 // window^2 < 2^32. window = 4104.
470 //
471 // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
472 // For window <= 4104, the largest value for sigma is 2183.
MakeMaker(double sigma,SkArenaAlloc * alloc)473 static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
474 SkASSERT(0 <= sigma);
475 int gaussianWindow = calculate_window(sigma);
476 // This is a naive method of using the window size for the Gaussian blur to calculate the
477 // window size for the Tent blur. This seems to work well in practice.
478 //
479 // We can use a single pixel to generate the effective blur area given a window size. For
480 // the Gaussian blur this is 3 * window size. For the Tent filter this is 2 * window size.
481 int tentWindow = 3 * gaussianWindow / 2;
482 if (tentWindow >= 4104) {
483 return nullptr;
484 }
485
486 class Maker : public PassMaker {
487 public:
488 explicit Maker(int window) : PassMaker{window} {}
489 Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
490 return TentPass::Make(this->window(), buffer, alloc);
491 }
492
493 size_t bufferSizeBytes() const override {
494 size_t onePassSize = this->window() - 1;
495 // If the window is odd, then there is an obvious middle element. For even sizes 2
496 // passes are shifted, and the last pass has an extra element. Like this:
497 // S
498 // aaaAaa
499 // bbBbbb
500 // D
501 size_t bufferCount = 2 * onePassSize;
502 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
503 }
504 };
505
506 return alloc->make<Maker>(tentWindow);
507 }
508
Make(int window,void * buffers,SkArenaAlloc * alloc)509 static TentPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
510 if (window > 4104) {
511 return nullptr;
512 }
513
514 // We don't need to store the trailing edge pixel in the buffer;
515 int passSize = window - 1;
516 skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
517 skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
518 skvx::Vec<4, uint32_t>* buffersEnd = buffer1 + passSize;
519
520 // Calculating the border is tricky. The border is the distance in pixels between the first
521 // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
522 // I will go through the odd case which is simpler, and then through the even case. Given a
523 // stack of filters seven wide for the odd case of three passes.
524 //
525 // S
526 // aaaAaaa
527 // bbbBbbb
528 // D
529 //
530 // The furthest changed pixel is when the filters are in the following configuration.
531 //
532 // S
533 // aaaAaaa
534 // bbbBbbb
535 // D
536 //
537 // The A pixel is calculated using the value S, the B uses A, and the D uses B.
538 // So, with a window size of seven the border is nine. In the odd case, the border is
539 // window - 1.
540 //
541 // For even cases the filter stack is more complicated. It uses two passes
542 // of even filters offset from each other. A stack for a width of six looks like
543 // this.
544 //
545 // S
546 // aaaAaa
547 // bbBbbb
548 // D
549 //
550 // The furthest pixel looks like this.
551 //
552 // S
553 // aaaAaa
554 // bbBbbb
555 // D
556 //
557 // For a window of six, the border value is 5. In the even case the border is
558 // window - 1.
559 int border = window - 1;
560
561 int divisor = window * window;
562 return alloc->make<TentPass>(buffer0, buffer1, buffersEnd, border, divisor);
563 }
564
TentPass(skvx::Vec<4,uint32_t> * buffer0,skvx::Vec<4,uint32_t> * buffer1,skvx::Vec<4,uint32_t> * buffersEnd,int border,int divisor)565 TentPass(skvx::Vec<4, uint32_t>* buffer0,
566 skvx::Vec<4, uint32_t>* buffer1,
567 skvx::Vec<4, uint32_t>* buffersEnd,
568 int border,
569 int divisor)
570 : Pass{border}
571 , fBuffer0{buffer0}
572 , fBuffer1{buffer1}
573 , fBuffersEnd{buffersEnd}
574 , fDivider(divisor) {}
575
576 private:
startBlur()577 void startBlur() override {
578 skvx::Vec<4, uint32_t>{0u, 0u, 0u, 0u}.store(fSum0);
579 auto half = fDivider.half();
580 skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum1);
581 sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
582
583 fBuffer0Cursor = fBuffer0;
584 fBuffer1Cursor = fBuffer1;
585 }
586
587 // TentPass implements the common two pass box filter approximation of Tent filter,
588 // but combines all both passes into a single pass. This approach is facilitated by two
589 // circular buffers the width of the window which track values for trailing edges of each of
590 // both passes. This allows the algorithm to use more precision in the calculation
591 // because the values are not rounded each pass. And this implementation also avoids a trap
592 // that's easy to fall into resulting in blending in too many zeroes near the edge.
593 //
594 // In general, a window sum has the form:
595 // sum_n+1 = sum_n + leading_edge - trailing_edge.
596 // If instead we do the subtraction at the end of the previous iteration, we can just
597 // calculate the sums instead of having to do the subtractions too.
598 //
599 // In previous iteration:
600 // sum_n+1 = sum_n - trailing_edge.
601 //
602 // In this iteration:
603 // sum_n+1 = sum_n + leading_edge.
604 //
605 // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
606 // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
607 // three passes at the same time has the form:
608 //
609 // sum0_n+1 = sum0_n + leading edge
610 // sum1_n+1 = sum1_n + sum0_n+1
611 //
612 // sum1_n+1 / window^2 is the new value of the destination pixel.
613 //
614 // Reduce the sums by the trailing edges which were stored in the circular buffers for the
615 // next go around.
616 //
617 // sum1_n+2 = sum1_n+1 - buffer1[i];
618 // buffer1[i] = sum0;
619 // sum0_n+2 = sum0_n+1 - buffer0[i];
620 // buffer0[i] = leading edge
blurSegment(int n,const uint32_t * src,int srcStride,uint32_t * dst,int dstStride)621 void blurSegment(
622 int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
623 skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
624 skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
625 skvx::Vec<4, uint32_t> sum0 = skvx::Vec<4, uint32_t>::Load(fSum0);
626 skvx::Vec<4, uint32_t> sum1 = skvx::Vec<4, uint32_t>::Load(fSum1);
627
628 // Given an expanded input pixel, move the window ahead using the leadingEdge value.
629 auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
630 sum0 += leadingEdge;
631 sum1 += sum0;
632
633 skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum1);
634
635 sum1 -= *buffer1Cursor;
636 *buffer1Cursor = sum0;
637 buffer1Cursor = (buffer1Cursor + 1) < fBuffersEnd ? buffer1Cursor + 1 : fBuffer1;
638 sum0 -= *buffer0Cursor;
639 *buffer0Cursor = leadingEdge;
640 buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
641
642 return skvx::cast<uint8_t>(blurred);
643 };
644
645 auto loadEdge = [&](const uint32_t* srcCursor) {
646 return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
647 };
648
649 if (!src && !dst) {
650 while (n --> 0) {
651 (void)processValue(0);
652 }
653 } else if (src && !dst) {
654 while (n --> 0) {
655 (void)processValue(loadEdge(src));
656 src += srcStride;
657 }
658 } else if (!src && dst) {
659 while (n --> 0) {
660 processValue(0u).store(dst);
661 dst += dstStride;
662 }
663 } else if (src && dst) {
664 while (n --> 0) {
665 processValue(loadEdge(src)).store(dst);
666 src += srcStride;
667 dst += dstStride;
668 }
669 }
670
671 // Store the state
672 fBuffer0Cursor = buffer0Cursor;
673 fBuffer1Cursor = buffer1Cursor;
674 sum0.store(fSum0);
675 sum1.store(fSum1);
676 }
677
678 skvx::Vec<4, uint32_t>* const fBuffer0;
679 skvx::Vec<4, uint32_t>* const fBuffer1;
680 skvx::Vec<4, uint32_t>* const fBuffersEnd;
681 const skvx::ScaledDividerU32 fDivider;
682
683 // blur state
684 char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
685 char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
686 skvx::Vec<4, uint32_t>* fBuffer0Cursor;
687 skvx::Vec<4, uint32_t>* fBuffer1Cursor;
688 };
689
copy_image_with_bounds(const SkImageFilter_Base::Context & ctx,const sk_sp<SkSpecialImage> & input,SkIRect srcBounds,SkIRect dstBounds)690 sk_sp<SkSpecialImage> copy_image_with_bounds(
691 const SkImageFilter_Base::Context& ctx, const sk_sp<SkSpecialImage> &input,
692 SkIRect srcBounds, SkIRect dstBounds) {
693 SkBitmap inputBM;
694 if (!input->getROPixels(&inputBM)) {
695 return nullptr;
696 }
697
698 if (inputBM.colorType() != kN32_SkColorType) {
699 return nullptr;
700 }
701
702 SkBitmap src;
703 inputBM.extractSubset(&src, srcBounds);
704
705 // Make everything relative to the destination bounds.
706 srcBounds.offset(-dstBounds.x(), -dstBounds.y());
707 dstBounds.offset(-dstBounds.x(), -dstBounds.y());
708
709 auto srcW = srcBounds.width(),
710 dstW = dstBounds.width(),
711 dstH = dstBounds.height();
712
713 SkImageInfo dstInfo = SkImageInfo::Make(dstW, dstH, inputBM.colorType(), inputBM.alphaType());
714
715 SkBitmap dst;
716 if (!dst.tryAllocPixels(dstInfo)) {
717 return nullptr;
718 }
719
720 // There is no blurring to do, but we still need to copy the source while accounting for the
721 // dstBounds. Remember that the src was intersected with the dst.
722 int y = 0;
723 size_t dstWBytes = dstW * sizeof(uint32_t);
724 for (;y < srcBounds.top(); y++) {
725 sk_bzero(dst.getAddr32(0, y), dstWBytes);
726 }
727
728 for (;y < srcBounds.bottom(); y++) {
729 int x = 0;
730 uint32_t* dstPtr = dst.getAddr32(0, y);
731 for (;x < srcBounds.left(); x++) {
732 *dstPtr++ = 0;
733 }
734
735 memcpy(dstPtr, src.getAddr32(x - srcBounds.left(), y - srcBounds.top()),
736 srcW * sizeof(uint32_t));
737
738 dstPtr += srcW;
739 x += srcW;
740
741 for (;x < dstBounds.right(); x++) {
742 *dstPtr++ = 0;
743 }
744 }
745
746 for (;y < dstBounds.bottom(); y++) {
747 sk_bzero(dst.getAddr32(0, y), dstWBytes);
748 }
749
750 return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(),
751 dstBounds.height()),
752 dst, ctx.surfaceProps());
753 }
754
755 // TODO: Implement CPU backend for different fTileMode.
cpu_blur(const SkImageFilter_Base::Context & ctx,SkVector sigma,const sk_sp<SkSpecialImage> & input,SkIRect srcBounds,SkIRect dstBounds)756 sk_sp<SkSpecialImage> cpu_blur(
757 const SkImageFilter_Base::Context& ctx,
758 SkVector sigma, const sk_sp<SkSpecialImage> &input,
759 SkIRect srcBounds, SkIRect dstBounds) {
760 SkVector limitedSigma = {SkTPin(sigma.x(), 0.0f, 2183.0f), SkTPin(sigma.y(), 0.0f, 2183.0f)};
761
762 SkSTArenaAlloc<1024> alloc;
763 auto makeMaker = [&](double sigma) -> PassMaker* {
764 SkASSERT(0 <= sigma && sigma <= 2183);
765 if (PassMaker* maker = GaussPass::MakeMaker(sigma, &alloc)) {
766 return maker;
767 }
768 if (PassMaker* maker = TentPass::MakeMaker(sigma, &alloc)) {
769 return maker;
770 }
771 SK_ABORT("Sigma is out of range.");
772 };
773
774 PassMaker* makerX = makeMaker(limitedSigma.x());
775 PassMaker* makerY = makeMaker(limitedSigma.y());
776
777 if (makerX->window() <= 1 && makerY->window() <= 1) {
778 return copy_image_with_bounds(ctx, input, srcBounds, dstBounds);
779 }
780
781 SkBitmap inputBM;
782
783 if (!input->getROPixels(&inputBM)) {
784 return nullptr;
785 }
786
787 if (inputBM.colorType() != kN32_SkColorType) {
788 return nullptr;
789 }
790
791 SkBitmap src;
792 inputBM.extractSubset(&src, srcBounds);
793
794 // Make everything relative to the destination bounds.
795 srcBounds.offset(-dstBounds.x(), -dstBounds.y());
796 dstBounds.offset(-dstBounds.x(), -dstBounds.y());
797
798 auto srcW = srcBounds.width(),
799 srcH = srcBounds.height(),
800 dstW = dstBounds.width(),
801 dstH = dstBounds.height();
802
803 SkImageInfo dstInfo = inputBM.info().makeWH(dstW, dstH);
804
805 SkBitmap dst;
806 if (!dst.tryAllocPixels(dstInfo)) {
807 return nullptr;
808 }
809
810 size_t bufferSizeBytes = std::max(makerX->bufferSizeBytes(), makerY->bufferSizeBytes());
811 auto buffer = alloc.makeBytesAlignedTo(bufferSizeBytes, alignof(skvx::Vec<4, uint32_t>));
812
813 // Basic Plan: The three cases to handle
814 // * Horizontal and Vertical - blur horizontally while copying values from the source to
815 // the destination. Then, do an in-place vertical blur.
816 // * Horizontal only - blur horizontally copying values from the source to the destination.
817 // * Vertical only - blur vertically copying values from the source to the destination.
818
819 // Default to vertical only blur case. If a horizontal blur is needed, then these values
820 // will be adjusted while doing the horizontal blur.
821 auto intermediateSrc = static_cast<uint32_t *>(src.getPixels());
822 auto intermediateRowBytesAsPixels = src.rowBytesAsPixels();
823 auto intermediateWidth = srcW;
824
825 // Because the border is calculated before the fork of the GPU/CPU path. The border is
826 // the maximum of the two rendering methods. In the case where sigma is zero, then the
827 // src and dst left values are the same. If sigma is small resulting in a window size of
828 // 1, then border calculations add some pixels which will always be zero. Inset the
829 // destination by those zero pixels. This case is very rare.
830 auto intermediateDst = dst.getAddr32(srcBounds.left(), 0);
831
832 // The following code is executed very rarely, I have never seen it in a real web
833 // page. If sigma is small but not zero then shared GPU/CPU border calculation
834 // code adds extra pixels for the border. Just clear everything to clear those pixels.
835 // This solution is overkill, but very simple.
836 if (makerX->window() == 1 || makerY->window() == 1) {
837 dst.eraseColor(0);
838 }
839
840 if (makerX->window() > 1) {
841 Pass* pass = makerX->makePass(buffer, &alloc);
842 // Make int64 to avoid overflow in multiplication below.
843 int64_t shift = srcBounds.top() - dstBounds.top();
844
845 // For the horizontal blur, starts part way down in anticipation of the vertical blur.
846 // For a vertical sigma of zero shift should be zero. But, for small sigma,
847 // shift may be > 0 but the vertical window could be 1.
848 intermediateSrc = static_cast<uint32_t *>(dst.getPixels())
849 + (shift > 0 ? shift * dst.rowBytesAsPixels() : 0);
850 intermediateRowBytesAsPixels = dst.rowBytesAsPixels();
851 intermediateWidth = dstW;
852 intermediateDst = static_cast<uint32_t *>(dst.getPixels());
853
854 const uint32_t* srcCursor = static_cast<uint32_t*>(src.getPixels());
855 uint32_t* dstCursor = intermediateSrc;
856 for (auto y = 0; y < srcH; y++) {
857 pass->blur(srcBounds.left(), srcBounds.right(), dstBounds.right(),
858 srcCursor, 1, dstCursor, 1);
859 srcCursor += src.rowBytesAsPixels();
860 dstCursor += intermediateRowBytesAsPixels;
861 }
862 }
863
864 if (makerY->window() > 1) {
865 Pass* pass = makerY->makePass(buffer, &alloc);
866 const uint32_t* srcCursor = intermediateSrc;
867 uint32_t* dstCursor = intermediateDst;
868 for (auto x = 0; x < intermediateWidth; x++) {
869 pass->blur(srcBounds.top(), srcBounds.bottom(), dstBounds.bottom(),
870 srcCursor, intermediateRowBytesAsPixels,
871 dstCursor, dst.rowBytesAsPixels());
872 srcCursor += 1;
873 dstCursor += 1;
874 }
875 }
876
877 return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(),
878 dstBounds.height()),
879 dst, ctx.surfaceProps());
880 }
881 } // namespace
882
883 // This rather arbitrary-looking value results in a maximum box blur kernel size
884 // of 1000 pixels on the raster path, which matches the WebKit and Firefox
885 // implementations. Since the GPU path does not compute a box blur, putting
886 // the limit on sigma ensures consistent behaviour between the GPU and
887 // raster paths.
888 #define MAX_SIGMA SkIntToScalar(532)
889
map_sigma(const SkSize & localSigma,const SkMatrix & ctm)890 static SkVector map_sigma(const SkSize& localSigma, const SkMatrix& ctm) {
891 SkVector sigma = SkVector::Make(localSigma.width(), localSigma.height());
892 ctm.mapVectors(&sigma, 1);
893 sigma.fX = std::min(SkScalarAbs(sigma.fX), MAX_SIGMA);
894 sigma.fY = std::min(SkScalarAbs(sigma.fY), MAX_SIGMA);
895 return sigma;
896 }
897
onFilterImage(const Context & ctx,SkIPoint * offset) const898 sk_sp<SkSpecialImage> SkBlurImageFilter::onFilterImage(const Context& ctx,
899 SkIPoint* offset) const {
900 SkIPoint inputOffset = SkIPoint::Make(0, 0);
901
902 sk_sp<SkSpecialImage> input(this->filterInput(0, ctx, &inputOffset));
903 if (!input) {
904 return nullptr;
905 }
906
907 SkIRect inputBounds = SkIRect::MakeXYWH(inputOffset.fX, inputOffset.fY,
908 input->width(), input->height());
909
910 // Calculate the destination bounds.
911 SkIRect dstBounds;
912 if (!this->applyCropRect(this->mapContext(ctx), inputBounds, &dstBounds)) {
913 return nullptr;
914 }
915 if (!inputBounds.intersect(dstBounds)) {
916 return nullptr;
917 }
918
919 // Save the offset in preparation to make all rectangles relative to the inputOffset.
920 SkIPoint resultOffset = SkIPoint::Make(dstBounds.fLeft, dstBounds.fTop);
921
922 // Make all bounds relative to the inputOffset.
923 inputBounds.offset(-inputOffset);
924 dstBounds.offset(-inputOffset);
925
926 SkVector sigma = map_sigma(fSigma, ctx.ctm());
927 if (sigma.x() < 0 || sigma.y() < 0) {
928 return nullptr;
929 }
930
931 sk_sp<SkSpecialImage> result;
932 #if SK_SUPPORT_GPU
933 if (ctx.gpuBacked()) {
934 // Ensure the input is in the destination's gamut. This saves us from having to do the
935 // xform during the filter itself.
936 input = ImageToColorSpace(input.get(), ctx.colorType(), ctx.colorSpace(),
937 ctx.surfaceProps());
938 result = this->gpuFilter(ctx, sigma, input, inputBounds, dstBounds, inputOffset,
939 &resultOffset);
940 } else
941 #endif
942 {
943 // Please see the comment on TentPass::MakeMaker for how the limit of 2183 for sigma is
944 // calculated. The effective limit of blur is 532 which is set by the GPU above in
945 // map_sigma.
946 sigma.fX = SkTPin(sigma.fX, 0.0f, 2183.0f);
947 sigma.fY = SkTPin(sigma.fY, 0.0f, 2183.0f);
948
949 result = cpu_blur(ctx, sigma, input, inputBounds, dstBounds);
950 }
951
952 // Return the resultOffset if the blur succeeded.
953 if (result != nullptr) {
954 *offset = resultOffset;
955 }
956 return result;
957 }
958
959 #if SK_SUPPORT_GPU
gpuFilter(const Context & ctx,SkVector sigma,const sk_sp<SkSpecialImage> & input,SkIRect inputBounds,SkIRect dstBounds,SkIPoint inputOffset,SkIPoint * offset) const960 sk_sp<SkSpecialImage> SkBlurImageFilter::gpuFilter(
961 const Context& ctx, SkVector sigma, const sk_sp<SkSpecialImage> &input, SkIRect inputBounds,
962 SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const {
963 #if SK_GPU_V1
964 if (SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma.x()) &&
965 SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma.y())) {
966 offset->fX = inputBounds.x() + inputOffset.fX;
967 offset->fY = inputBounds.y() + inputOffset.fY;
968 return input->makeSubset(inputBounds);
969 }
970
971 auto context = ctx.getContext();
972
973 GrSurfaceProxyView inputView = input->view(context);
974 if (!inputView.proxy()) {
975 return nullptr;
976 }
977 SkASSERT(inputView.asTextureProxy());
978
979 // TODO (michaelludwig) - The color space choice is odd, should it just be ctx.refColorSpace()?
980 dstBounds.offset(input->subset().topLeft());
981 inputBounds.offset(input->subset().topLeft());
982 auto sdc = SkGpuBlurUtils::GaussianBlur(
983 context,
984 std::move(inputView),
985 SkColorTypeToGrColorType(input->colorType()),
986 input->alphaType(),
987 ctx.colorSpace() ? sk_ref_sp(input->getColorSpace()) : nullptr,
988 dstBounds,
989 inputBounds,
990 sigma.x(),
991 sigma.y(),
992 fTileMode);
993 if (!sdc) {
994 return nullptr;
995 }
996
997 return SkSpecialImage::MakeDeferredFromGpu(context,
998 SkIRect::MakeSize(dstBounds.size()),
999 kNeedNewImageUniqueID_SpecialImage,
1000 sdc->readSurfaceView(),
1001 sdc->colorInfo().colorType(),
1002 sk_ref_sp(input->getColorSpace()),
1003 ctx.surfaceProps());
1004 #else // SK_GPU_V1
1005 return nullptr;
1006 #endif // SK_GPU_V1
1007 }
1008 #endif
1009
computeFastBounds(const SkRect & src) const1010 SkRect SkBlurImageFilter::computeFastBounds(const SkRect& src) const {
1011 SkRect bounds = this->getInput(0) ? this->getInput(0)->computeFastBounds(src) : src;
1012 bounds.outset(fSigma.width() * 3, fSigma.height() * 3);
1013 return bounds;
1014 }
1015
onFilterNodeBounds(const SkIRect & src,const SkMatrix & ctm,MapDirection,const SkIRect * inputRect) const1016 SkIRect SkBlurImageFilter::onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm,
1017 MapDirection, const SkIRect* inputRect) const {
1018 SkVector sigma = map_sigma(fSigma, ctm);
1019 return src.makeOutset(SkScalarCeilToInt(sigma.x() * 3), SkScalarCeilToInt(sigma.y() * 3));
1020 }
1021