• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 The Android Open Source Project
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include <algorithm>
9 
10 #include "include/core/SkBitmap.h"
11 #include "include/core/SkTileMode.h"
12 #include "include/effects/SkImageFilters.h"
13 #include "include/private/SkColorData.h"
14 #include "include/private/SkTFitsIn.h"
15 #include "include/private/SkTPin.h"
16 #include "include/private/SkVx.h"
17 #include "src/core/SkArenaAlloc.h"
18 #include "src/core/SkAutoPixmapStorage.h"
19 #include "src/core/SkGpuBlurUtils.h"
20 #include "src/core/SkImageFilter_Base.h"
21 #include "src/core/SkOpts.h"
22 #include "src/core/SkReadBuffer.h"
23 #include "src/core/SkSpecialImage.h"
24 #include "src/core/SkWriteBuffer.h"
25 
26 #if SK_SUPPORT_GPU
27 #include "src/gpu/GrTextureProxy.h"
28 #include "src/gpu/SkGr.h"
29 #if SK_GPU_V1
30 #include "src/gpu/v1/SurfaceDrawContext_v1.h"
31 #endif // SK_GPU_V1
32 #endif // SK_SUPPORT_GPU
33 
34 namespace {
35 
36 class SkBlurImageFilter final : public SkImageFilter_Base {
37 public:
SkBlurImageFilter(SkScalar sigmaX,SkScalar sigmaY,SkTileMode tileMode,sk_sp<SkImageFilter> input,const SkRect * cropRect)38     SkBlurImageFilter(SkScalar sigmaX, SkScalar sigmaY,  SkTileMode tileMode,
39                       sk_sp<SkImageFilter> input, const SkRect* cropRect)
40             : INHERITED(&input, 1, cropRect)
41             , fSigma{sigmaX, sigmaY}
42             , fTileMode(tileMode) {}
43 
44     SkRect computeFastBounds(const SkRect&) const override;
45 
46 protected:
47     void flatten(SkWriteBuffer&) const override;
48     sk_sp<SkSpecialImage> onFilterImage(const Context&, SkIPoint* offset) const override;
49     SkIRect onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm,
50                                MapDirection, const SkIRect* inputRect) const override;
51 
52 private:
53     friend void ::SkRegisterBlurImageFilterFlattenable();
54     SK_FLATTENABLE_HOOKS(SkBlurImageFilter)
55 
56 #if SK_SUPPORT_GPU
57     sk_sp<SkSpecialImage> gpuFilter(
58             const Context& ctx, SkVector sigma,
59             const sk_sp<SkSpecialImage> &input,
60             SkIRect inputBounds, SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const;
61 #endif
62 
63     SkSize     fSigma;
64     SkTileMode fTileMode;
65 
66     using INHERITED = SkImageFilter_Base;
67 };
68 
69 } // end namespace
70 
Blur(SkScalar sigmaX,SkScalar sigmaY,SkTileMode tileMode,sk_sp<SkImageFilter> input,const CropRect & cropRect)71 sk_sp<SkImageFilter> SkImageFilters::Blur(
72         SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, sk_sp<SkImageFilter> input,
73         const CropRect& cropRect) {
74     if (sigmaX < SK_ScalarNearlyZero && sigmaY < SK_ScalarNearlyZero && !cropRect) {
75         return input;
76     }
77     return sk_sp<SkImageFilter>(
78           new SkBlurImageFilter(sigmaX, sigmaY, tileMode, input, cropRect));
79 }
80 
SkRegisterBlurImageFilterFlattenable()81 void SkRegisterBlurImageFilterFlattenable() {
82     SK_REGISTER_FLATTENABLE(SkBlurImageFilter);
83     SkFlattenable::Register("SkBlurImageFilterImpl", SkBlurImageFilter::CreateProc);
84 }
85 
CreateProc(SkReadBuffer & buffer)86 sk_sp<SkFlattenable> SkBlurImageFilter::CreateProc(SkReadBuffer& buffer) {
87     SK_IMAGEFILTER_UNFLATTEN_COMMON(common, 1);
88     SkScalar sigmaX = buffer.readScalar();
89     SkScalar sigmaY = buffer.readScalar();
90     SkTileMode tileMode = buffer.read32LE(SkTileMode::kLastTileMode);
91     return SkImageFilters::Blur(
92           sigmaX, sigmaY, tileMode, common.getInput(0), common.cropRect());
93 }
94 
flatten(SkWriteBuffer & buffer) const95 void SkBlurImageFilter::flatten(SkWriteBuffer& buffer) const {
96     this->INHERITED::flatten(buffer);
97     buffer.writeScalar(fSigma.fWidth);
98     buffer.writeScalar(fSigma.fHeight);
99 
100     SkASSERT(fTileMode <= SkTileMode::kLastTileMode);
101     buffer.writeInt(static_cast<int>(fTileMode));
102 }
103 
104 ///////////////////////////////////////////////////////////////////////////////
105 
106 namespace {
107 // This is defined by the SVG spec:
108 // https://drafts.fxtf.org/filter-effects/#feGaussianBlurElement
calculate_window(double sigma)109 int calculate_window(double sigma) {
110     auto possibleWindow = static_cast<int>(floor(sigma * 3 * sqrt(2 * SK_DoublePI) / 4 + 0.5));
111     return std::max(1, possibleWindow);
112 }
113 
114 class Pass {
115 public:
Pass(int border)116     explicit Pass(int border) : fBorder(border) {}
117     virtual ~Pass() = default;
118 
blur(int srcLeft,int srcRight,int dstRight,const uint32_t * src,int srcStride,uint32_t * dst,int dstStride)119     void blur(int srcLeft, int srcRight, int dstRight,
120               const uint32_t* src, int srcStride,
121               uint32_t* dst, int dstStride) {
122         this->startBlur();
123 
124         auto srcStart = srcLeft - fBorder,
125                 srcEnd   = srcRight - fBorder,
126                 dstEnd   = dstRight,
127                 srcIdx   = srcStart,
128                 dstIdx   = 0;
129 
130         const uint32_t* srcCursor = src;
131         uint32_t* dstCursor = dst;
132 
133         if (dstIdx < srcIdx) {
134             // The destination pixels are not effected by the src pixels,
135             // change to zero as per the spec.
136             // https://drafts.fxtf.org/filter-effects/#FilterPrimitivesOverviewIntro
137             while (dstIdx < srcIdx) {
138                 *dstCursor = 0;
139                 dstCursor += dstStride;
140                 SK_PREFETCH(dstCursor);
141                 dstIdx++;
142             }
143         } else if (srcIdx < dstIdx) {
144             // The edge of the source is before the edge of the destination. Calculate the sums for
145             // the pixels before the start of the destination.
146             if (int commonEnd = std::min(dstIdx, srcEnd); srcIdx < commonEnd) {
147                 // Preload the blur with values from src before dst is entered.
148                 int n = commonEnd - srcIdx;
149                 this->blurSegment(n, srcCursor, srcStride, nullptr, 0);
150                 srcIdx += n;
151                 srcCursor += n * srcStride;
152             }
153             if (srcIdx < dstIdx) {
154                 // The weird case where src is out of pixels before dst is even started.
155                 int n = dstIdx - srcIdx;
156                 this->blurSegment(n, nullptr, 0, nullptr, 0);
157                 srcIdx += n;
158             }
159         }
160 
161         // Both srcIdx and dstIdx are in sync now, and can run in a 1:1 fashion. This is the
162         // normal mode of operation.
163         SkASSERT(srcIdx == dstIdx);
164         if (int commonEnd = std::min(dstEnd, srcEnd); dstIdx < commonEnd) {
165             int n = commonEnd - dstIdx;
166             this->blurSegment(n, srcCursor, srcStride, dstCursor, dstStride);
167             srcCursor += n * srcStride;
168             dstCursor += n * dstStride;
169             dstIdx += n;
170             srcIdx += n;
171         }
172 
173         // Drain the remaining blur values into dst assuming 0's for the leading edge.
174         if (dstIdx < dstEnd) {
175             int n = dstEnd - dstIdx;
176             this->blurSegment(n, nullptr, 0, dstCursor, dstStride);
177         }
178     }
179 
180 protected:
181     virtual void startBlur() = 0;
182     virtual void blurSegment(
183             int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) = 0;
184 
185 private:
186     const int fBorder;
187 };
188 
189 class PassMaker {
190 public:
PassMaker(int window)191     explicit PassMaker(int window) : fWindow{window} {}
192     virtual ~PassMaker() = default;
193     virtual Pass* makePass(void* buffer, SkArenaAlloc* alloc) const = 0;
194     virtual size_t bufferSizeBytes() const = 0;
window() const195     int window() const {return fWindow;}
196 
197 private:
198     const int fWindow;
199 };
200 
201 // Implement a scanline processor that uses a three-box filter to approximate a Gaussian blur.
202 // The GaussPass is limit to processing sigmas < 135.
203 class GaussPass final : public Pass {
204 public:
205     // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
206     // using the Gauss filter. It also limits the size of buffers used hold intermediate values.
207     // Explanation of maximums:
208     //   sum0 = window * 255
209     //   sum1 = window * sum0 -> window * window * 255
210     //   sum2 = window * sum1 -> window * window * window * 255 -> window^3 * 255
211     //
212     //   The value window^3 * 255 must fit in a uint32_t. So,
213     //      window^3 < 2^32. window = 255.
214     //
215     //   window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
216     //   For window <= 255, the largest value for sigma is 136.
MakeMaker(double sigma,SkArenaAlloc * alloc)217     static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
218         SkASSERT(0 <= sigma);
219         int window = calculate_window(sigma);
220         if (255 <= window) {
221             return nullptr;
222         }
223 
224         class Maker : public PassMaker {
225         public:
226             explicit Maker(int window) : PassMaker{window} {}
227             Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
228                 return GaussPass::Make(this->window(), buffer, alloc);
229             }
230 
231             size_t bufferSizeBytes() const override {
232                 int window = this->window();
233                 size_t onePassSize = window - 1;
234                 // If the window is odd, then there is an obvious middle element. For even sizes
235                 // 2 passes are shifted, and the last pass has an extra element. Like this:
236                 //       S
237                 //    aaaAaa
238                 //     bbBbbb
239                 //    cccCccc
240                 //       D
241                 size_t bufferCount = (window & 1) == 1 ? 3 * onePassSize : 3 * onePassSize + 1;
242                 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
243             }
244         };
245 
246         return alloc->make<Maker>(window);
247     }
248 
Make(int window,void * buffers,SkArenaAlloc * alloc)249     static GaussPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
250         // We don't need to store the trailing edge pixel in the buffer;
251         int passSize = window - 1;
252         skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
253         skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
254         skvx::Vec<4, uint32_t>* buffer2 = buffer1 + passSize;
255         // If the window is odd just one buffer is needed, but if it's even, then there is one
256         // more element on that pass.
257         skvx::Vec<4, uint32_t>* buffersEnd = buffer2 + ((window & 1) ? passSize : passSize + 1);
258 
259         // Calculating the border is tricky. The border is the distance in pixels between the first
260         // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
261         // I will go through the odd case which is simpler, and then through the even case. Given a
262         // stack of filters seven wide for the odd case of three passes.
263         //
264         //        S
265         //     aaaAaaa
266         //     bbbBbbb
267         //     cccCccc
268         //        D
269         //
270         // The furthest changed pixel is when the filters are in the following configuration.
271         //
272         //                 S
273         //           aaaAaaa
274         //        bbbBbbb
275         //     cccCccc
276         //        D
277         //
278         // The A pixel is calculated using the value S, the B uses A, and the C uses B, and
279         // finally D is C. So, with a window size of seven the border is nine. In the odd case, the
280         // border is 3*((window - 1)/2).
281         //
282         // For even cases the filter stack is more complicated. The spec specifies two passes
283         // of even filters and a final pass of odd filters. A stack for a width of six looks like
284         // this.
285         //
286         //       S
287         //    aaaAaa
288         //     bbBbbb
289         //    cccCccc
290         //       D
291         //
292         // The furthest pixel looks like this.
293         //
294         //               S
295         //          aaaAaa
296         //        bbBbbb
297         //    cccCccc
298         //       D
299         //
300         // For a window of six, the border value is eight. In the even case the border is 3 *
301         // (window/2) - 1.
302         int border = (window & 1) == 1 ? 3 * ((window - 1) / 2) : 3 * (window / 2) - 1;
303 
304         // If the window is odd then the divisor is just window ^ 3 otherwise,
305         // it is window * window * (window + 1) = window ^ 3 + window ^ 2;
306         int window2 = window * window;
307         int window3 = window2 * window;
308         int divisor = (window & 1) == 1 ? window3 : window3 + window2;
309         return alloc->make<GaussPass>(buffer0, buffer1, buffer2, buffersEnd, border, divisor);
310     }
311 
GaussPass(skvx::Vec<4,uint32_t> * buffer0,skvx::Vec<4,uint32_t> * buffer1,skvx::Vec<4,uint32_t> * buffer2,skvx::Vec<4,uint32_t> * buffersEnd,int border,int divisor)312     GaussPass(skvx::Vec<4, uint32_t>* buffer0,
313               skvx::Vec<4, uint32_t>* buffer1,
314               skvx::Vec<4, uint32_t>* buffer2,
315               skvx::Vec<4, uint32_t>* buffersEnd,
316               int border,
317               int divisor)
318         : Pass{border}
319         , fBuffer0{buffer0}
320         , fBuffer1{buffer1}
321         , fBuffer2{buffer2}
322         , fBuffersEnd{buffersEnd}
323         , fDivider(divisor) {}
324 
325 private:
startBlur()326     void startBlur() override {
327         skvx::Vec<4, uint32_t> zero = {0u, 0u, 0u, 0u};
328         zero.store(fSum0);
329         zero.store(fSum1);
330         auto half = fDivider.half();
331         skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum2);
332         sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
333 
334         fBuffer0Cursor = fBuffer0;
335         fBuffer1Cursor = fBuffer1;
336         fBuffer2Cursor = fBuffer2;
337     }
338 
339     // GaussPass implements the common three pass box filter approximation of Gaussian blur,
340     // but combines all three passes into a single pass. This approach is facilitated by three
341     // circular buffers the width of the window which track values for trailing edges of each of
342     // the three passes. This allows the algorithm to use more precision in the calculation
343     // because the values are not rounded each pass. And this implementation also avoids a trap
344     // that's easy to fall into resulting in blending in too many zeroes near the edge.
345     //
346     // In general, a window sum has the form:
347     //     sum_n+1 = sum_n + leading_edge - trailing_edge.
348     // If instead we do the subtraction at the end of the previous iteration, we can just
349     // calculate the sums instead of having to do the subtractions too.
350     //
351     //      In previous iteration:
352     //      sum_n+1 = sum_n - trailing_edge.
353     //
354     //      In this iteration:
355     //      sum_n+1 = sum_n + leading_edge.
356     //
357     // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
358     // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
359     // three passes at the same time has the form:
360     //
361     //    sum0_n+1 = sum0_n + leading edge
362     //    sum1_n+1 = sum1_n + sum0_n+1
363     //    sum2_n+1 = sum2_n + sum1_n+1
364     //
365     //    sum2_n+1 / window^3 is the new value of the destination pixel.
366     //
367     // Reduce the sums by the trailing edges which were stored in the circular buffers for the
368     // next go around. This is the case for odd sized windows, even windows the the third
369     // circular buffer is one larger then the first two circular buffers.
370     //
371     //    sum2_n+2 = sum2_n+1 - buffer2[i];
372     //    buffer2[i] = sum1;
373     //    sum1_n+2 = sum1_n+1 - buffer1[i];
374     //    buffer1[i] = sum0;
375     //    sum0_n+2 = sum0_n+1 - buffer0[i];
376     //    buffer0[i] = leading edge
blurSegment(int n,const uint32_t * src,int srcStride,uint32_t * dst,int dstStride)377     void blurSegment(
378             int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
379         skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
380         skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
381         skvx::Vec<4, uint32_t>* buffer2Cursor = fBuffer2Cursor;
382         skvx::Vec<4, uint32_t> sum0 = skvx::Vec<4, uint32_t>::Load(fSum0);
383         skvx::Vec<4, uint32_t> sum1 = skvx::Vec<4, uint32_t>::Load(fSum1);
384         skvx::Vec<4, uint32_t> sum2 = skvx::Vec<4, uint32_t>::Load(fSum2);
385 
386         // Given an expanded input pixel, move the window ahead using the leadingEdge value.
387         auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
388             sum0 += leadingEdge;
389             sum1 += sum0;
390             sum2 += sum1;
391 
392             skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum2);
393 
394             sum2 -= *buffer2Cursor;
395             *buffer2Cursor = sum1;
396             buffer2Cursor = (buffer2Cursor + 1) < fBuffersEnd ? buffer2Cursor + 1 : fBuffer2;
397             sum1 -= *buffer1Cursor;
398             *buffer1Cursor = sum0;
399             buffer1Cursor = (buffer1Cursor + 1) < fBuffer2 ? buffer1Cursor + 1 : fBuffer1;
400             sum0 -= *buffer0Cursor;
401             *buffer0Cursor = leadingEdge;
402             buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
403 
404             return skvx::cast<uint8_t>(blurred);
405         };
406 
407         auto loadEdge = [&](const uint32_t* srcCursor) {
408             return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
409         };
410 
411         if (!src && !dst) {
412             while (n --> 0) {
413                 (void)processValue(0);
414             }
415         } else if (src && !dst) {
416             while (n --> 0) {
417                 (void)processValue(loadEdge(src));
418                 src += srcStride;
419             }
420         } else if (!src && dst) {
421             while (n --> 0) {
422                 processValue(0u).store(dst);
423                 dst += dstStride;
424             }
425         } else if (src && dst) {
426             while (n --> 0) {
427                 processValue(loadEdge(src)).store(dst);
428                 src += srcStride;
429                 dst += dstStride;
430             }
431         }
432 
433         // Store the state
434         fBuffer0Cursor = buffer0Cursor;
435         fBuffer1Cursor = buffer1Cursor;
436         fBuffer2Cursor = buffer2Cursor;
437 
438         sum0.store(fSum0);
439         sum1.store(fSum1);
440         sum2.store(fSum2);
441     }
442 
443     skvx::Vec<4, uint32_t>* const fBuffer0;
444     skvx::Vec<4, uint32_t>* const fBuffer1;
445     skvx::Vec<4, uint32_t>* const fBuffer2;
446     skvx::Vec<4, uint32_t>* const fBuffersEnd;
447     const skvx::ScaledDividerU32 fDivider;
448 
449     // blur state
450     char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
451     char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
452     char fSum2[sizeof(skvx::Vec<4, uint32_t>)];
453     skvx::Vec<4, uint32_t>* fBuffer0Cursor;
454     skvx::Vec<4, uint32_t>* fBuffer1Cursor;
455     skvx::Vec<4, uint32_t>* fBuffer2Cursor;
456 };
457 
458 // Implement a scanline processor that uses a two-box filter to approximate a Tent filter.
459 // The TentPass is limit to processing sigmas < 2183.
460 class TentPass final : public Pass {
461 public:
462     // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
463     // using the Gauss filter. It also limits the size of buffers used hold intermediate values.
464     // Explanation of maximums:
465     //   sum0 = window * 255
466     //   sum1 = window * sum0 -> window * window * 255
467     //
468     //   The value window^2 * 255 must fit in a uint32_t. So,
469     //      window^2 < 2^32. window = 4104.
470     //
471     //   window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
472     //   For window <= 4104, the largest value for sigma is 2183.
MakeMaker(double sigma,SkArenaAlloc * alloc)473     static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
474         SkASSERT(0 <= sigma);
475         int gaussianWindow = calculate_window(sigma);
476         // This is a naive method of using the window size for the Gaussian blur to calculate the
477         // window size for the Tent blur. This seems to work well in practice.
478         //
479         // We can use a single pixel to generate the effective blur area given a window size. For
480         // the Gaussian blur this is 3 * window size. For the Tent filter this is 2 * window size.
481         int tentWindow = 3 * gaussianWindow / 2;
482         if (tentWindow >= 4104) {
483             return nullptr;
484         }
485 
486         class Maker : public PassMaker {
487         public:
488             explicit Maker(int window) : PassMaker{window} {}
489             Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
490                 return TentPass::Make(this->window(), buffer, alloc);
491             }
492 
493             size_t bufferSizeBytes() const override {
494                 size_t onePassSize = this->window() - 1;
495                 // If the window is odd, then there is an obvious middle element. For even sizes 2
496                 // passes are shifted, and the last pass has an extra element. Like this:
497                 //       S
498                 //    aaaAaa
499                 //     bbBbbb
500                 //       D
501                 size_t bufferCount = 2 * onePassSize;
502                 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
503             }
504         };
505 
506         return alloc->make<Maker>(tentWindow);
507     }
508 
Make(int window,void * buffers,SkArenaAlloc * alloc)509     static TentPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
510         if (window > 4104) {
511             return nullptr;
512         }
513 
514         // We don't need to store the trailing edge pixel in the buffer;
515         int passSize = window - 1;
516         skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
517         skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
518         skvx::Vec<4, uint32_t>* buffersEnd = buffer1 + passSize;
519 
520         // Calculating the border is tricky. The border is the distance in pixels between the first
521         // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
522         // I will go through the odd case which is simpler, and then through the even case. Given a
523         // stack of filters seven wide for the odd case of three passes.
524         //
525         //        S
526         //     aaaAaaa
527         //     bbbBbbb
528         //        D
529         //
530         // The furthest changed pixel is when the filters are in the following configuration.
531         //
532         //              S
533         //        aaaAaaa
534         //     bbbBbbb
535         //        D
536         //
537         // The A pixel is calculated using the value S, the B uses A, and the D uses B.
538         // So, with a window size of seven the border is nine. In the odd case, the border is
539         // window - 1.
540         //
541         // For even cases the filter stack is more complicated. It uses two passes
542         // of even filters offset from each other. A stack for a width of six looks like
543         // this.
544         //
545         //       S
546         //    aaaAaa
547         //     bbBbbb
548         //       D
549         //
550         // The furthest pixel looks like this.
551         //
552         //            S
553         //       aaaAaa
554         //     bbBbbb
555         //       D
556         //
557         // For a window of six, the border value is 5. In the even case the border is
558         // window - 1.
559         int border = window - 1;
560 
561         int divisor = window * window;
562         return alloc->make<TentPass>(buffer0, buffer1, buffersEnd, border, divisor);
563     }
564 
TentPass(skvx::Vec<4,uint32_t> * buffer0,skvx::Vec<4,uint32_t> * buffer1,skvx::Vec<4,uint32_t> * buffersEnd,int border,int divisor)565     TentPass(skvx::Vec<4, uint32_t>* buffer0,
566              skvx::Vec<4, uint32_t>* buffer1,
567              skvx::Vec<4, uint32_t>* buffersEnd,
568              int border,
569              int divisor)
570          : Pass{border}
571          , fBuffer0{buffer0}
572          , fBuffer1{buffer1}
573          , fBuffersEnd{buffersEnd}
574          , fDivider(divisor) {}
575 
576 private:
startBlur()577     void startBlur() override {
578         skvx::Vec<4, uint32_t>{0u, 0u, 0u, 0u}.store(fSum0);
579         auto half = fDivider.half();
580         skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum1);
581         sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
582 
583         fBuffer0Cursor = fBuffer0;
584         fBuffer1Cursor = fBuffer1;
585     }
586 
587     // TentPass implements the common two pass box filter approximation of Tent filter,
588     // but combines all both passes into a single pass. This approach is facilitated by two
589     // circular buffers the width of the window which track values for trailing edges of each of
590     // both passes. This allows the algorithm to use more precision in the calculation
591     // because the values are not rounded each pass. And this implementation also avoids a trap
592     // that's easy to fall into resulting in blending in too many zeroes near the edge.
593     //
594     // In general, a window sum has the form:
595     //     sum_n+1 = sum_n + leading_edge - trailing_edge.
596     // If instead we do the subtraction at the end of the previous iteration, we can just
597     // calculate the sums instead of having to do the subtractions too.
598     //
599     //      In previous iteration:
600     //      sum_n+1 = sum_n - trailing_edge.
601     //
602     //      In this iteration:
603     //      sum_n+1 = sum_n + leading_edge.
604     //
605     // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
606     // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
607     // three passes at the same time has the form:
608     //
609     //    sum0_n+1 = sum0_n + leading edge
610     //    sum1_n+1 = sum1_n + sum0_n+1
611     //
612     //    sum1_n+1 / window^2 is the new value of the destination pixel.
613     //
614     // Reduce the sums by the trailing edges which were stored in the circular buffers for the
615     // next go around.
616     //
617     //    sum1_n+2 = sum1_n+1 - buffer1[i];
618     //    buffer1[i] = sum0;
619     //    sum0_n+2 = sum0_n+1 - buffer0[i];
620     //    buffer0[i] = leading edge
blurSegment(int n,const uint32_t * src,int srcStride,uint32_t * dst,int dstStride)621     void blurSegment(
622             int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
623         skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
624         skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
625         skvx::Vec<4, uint32_t> sum0 = skvx::Vec<4, uint32_t>::Load(fSum0);
626         skvx::Vec<4, uint32_t> sum1 = skvx::Vec<4, uint32_t>::Load(fSum1);
627 
628         // Given an expanded input pixel, move the window ahead using the leadingEdge value.
629         auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
630             sum0 += leadingEdge;
631             sum1 += sum0;
632 
633             skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum1);
634 
635             sum1 -= *buffer1Cursor;
636             *buffer1Cursor = sum0;
637             buffer1Cursor = (buffer1Cursor + 1) < fBuffersEnd ? buffer1Cursor + 1 : fBuffer1;
638             sum0 -= *buffer0Cursor;
639             *buffer0Cursor = leadingEdge;
640             buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
641 
642             return skvx::cast<uint8_t>(blurred);
643         };
644 
645         auto loadEdge = [&](const uint32_t* srcCursor) {
646             return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
647         };
648 
649         if (!src && !dst) {
650             while (n --> 0) {
651                 (void)processValue(0);
652             }
653         } else if (src && !dst) {
654             while (n --> 0) {
655                 (void)processValue(loadEdge(src));
656                 src += srcStride;
657             }
658         } else if (!src && dst) {
659             while (n --> 0) {
660                 processValue(0u).store(dst);
661                 dst += dstStride;
662             }
663         } else if (src && dst) {
664             while (n --> 0) {
665                 processValue(loadEdge(src)).store(dst);
666                 src += srcStride;
667                 dst += dstStride;
668             }
669         }
670 
671         // Store the state
672         fBuffer0Cursor = buffer0Cursor;
673         fBuffer1Cursor = buffer1Cursor;
674         sum0.store(fSum0);
675         sum1.store(fSum1);
676     }
677 
678     skvx::Vec<4, uint32_t>* const fBuffer0;
679     skvx::Vec<4, uint32_t>* const fBuffer1;
680     skvx::Vec<4, uint32_t>* const fBuffersEnd;
681     const skvx::ScaledDividerU32 fDivider;
682 
683     // blur state
684     char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
685     char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
686     skvx::Vec<4, uint32_t>* fBuffer0Cursor;
687     skvx::Vec<4, uint32_t>* fBuffer1Cursor;
688 };
689 
copy_image_with_bounds(const SkImageFilter_Base::Context & ctx,const sk_sp<SkSpecialImage> & input,SkIRect srcBounds,SkIRect dstBounds)690 sk_sp<SkSpecialImage> copy_image_with_bounds(
691         const SkImageFilter_Base::Context& ctx, const sk_sp<SkSpecialImage> &input,
692         SkIRect srcBounds, SkIRect dstBounds) {
693     SkBitmap inputBM;
694     if (!input->getROPixels(&inputBM)) {
695         return nullptr;
696     }
697 
698     if (inputBM.colorType() != kN32_SkColorType) {
699         return nullptr;
700     }
701 
702     SkBitmap src;
703     inputBM.extractSubset(&src, srcBounds);
704 
705     // Make everything relative to the destination bounds.
706     srcBounds.offset(-dstBounds.x(), -dstBounds.y());
707     dstBounds.offset(-dstBounds.x(), -dstBounds.y());
708 
709     auto srcW = srcBounds.width(),
710          dstW = dstBounds.width(),
711          dstH = dstBounds.height();
712 
713     SkImageInfo dstInfo = SkImageInfo::Make(dstW, dstH, inputBM.colorType(), inputBM.alphaType());
714 
715     SkBitmap dst;
716     if (!dst.tryAllocPixels(dstInfo)) {
717         return nullptr;
718     }
719 
720     // There is no blurring to do, but we still need to copy the source while accounting for the
721     // dstBounds. Remember that the src was intersected with the dst.
722     int y = 0;
723     size_t dstWBytes = dstW * sizeof(uint32_t);
724     for (;y < srcBounds.top(); y++) {
725         sk_bzero(dst.getAddr32(0, y), dstWBytes);
726     }
727 
728     for (;y < srcBounds.bottom(); y++) {
729         int x = 0;
730         uint32_t* dstPtr = dst.getAddr32(0, y);
731         for (;x < srcBounds.left(); x++) {
732             *dstPtr++ = 0;
733         }
734 
735         memcpy(dstPtr, src.getAddr32(x - srcBounds.left(), y - srcBounds.top()),
736                srcW * sizeof(uint32_t));
737 
738         dstPtr += srcW;
739         x += srcW;
740 
741         for (;x < dstBounds.right(); x++) {
742             *dstPtr++ = 0;
743         }
744     }
745 
746     for (;y < dstBounds.bottom(); y++) {
747         sk_bzero(dst.getAddr32(0, y), dstWBytes);
748     }
749 
750     return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(),
751                                                           dstBounds.height()),
752                                           dst, ctx.surfaceProps());
753 }
754 
755 // TODO: Implement CPU backend for different fTileMode.
cpu_blur(const SkImageFilter_Base::Context & ctx,SkVector sigma,const sk_sp<SkSpecialImage> & input,SkIRect srcBounds,SkIRect dstBounds)756 sk_sp<SkSpecialImage> cpu_blur(
757         const SkImageFilter_Base::Context& ctx,
758         SkVector sigma, const sk_sp<SkSpecialImage> &input,
759         SkIRect srcBounds, SkIRect dstBounds) {
760     SkVector limitedSigma = {SkTPin(sigma.x(), 0.0f, 2183.0f), SkTPin(sigma.y(), 0.0f, 2183.0f)};
761 
762     SkSTArenaAlloc<1024> alloc;
763     auto makeMaker = [&](double sigma) -> PassMaker* {
764         SkASSERT(0 <= sigma && sigma <= 2183);
765         if (PassMaker* maker = GaussPass::MakeMaker(sigma, &alloc)) {
766             return maker;
767         }
768         if (PassMaker* maker = TentPass::MakeMaker(sigma, &alloc)) {
769             return maker;
770         }
771         SK_ABORT("Sigma is out of range.");
772     };
773 
774     PassMaker* makerX = makeMaker(limitedSigma.x());
775     PassMaker* makerY = makeMaker(limitedSigma.y());
776 
777     if (makerX->window() <= 1 && makerY->window() <= 1) {
778         return copy_image_with_bounds(ctx, input, srcBounds, dstBounds);
779     }
780 
781     SkBitmap inputBM;
782 
783     if (!input->getROPixels(&inputBM)) {
784         return nullptr;
785     }
786 
787     if (inputBM.colorType() != kN32_SkColorType) {
788         return nullptr;
789     }
790 
791     SkBitmap src;
792     inputBM.extractSubset(&src, srcBounds);
793 
794     // Make everything relative to the destination bounds.
795     srcBounds.offset(-dstBounds.x(), -dstBounds.y());
796     dstBounds.offset(-dstBounds.x(), -dstBounds.y());
797 
798     auto srcW = srcBounds.width(),
799          srcH = srcBounds.height(),
800          dstW = dstBounds.width(),
801          dstH = dstBounds.height();
802 
803     SkImageInfo dstInfo = inputBM.info().makeWH(dstW, dstH);
804 
805     SkBitmap dst;
806     if (!dst.tryAllocPixels(dstInfo)) {
807         return nullptr;
808     }
809 
810     size_t bufferSizeBytes = std::max(makerX->bufferSizeBytes(), makerY->bufferSizeBytes());
811     auto buffer = alloc.makeBytesAlignedTo(bufferSizeBytes, alignof(skvx::Vec<4, uint32_t>));
812 
813     // Basic Plan: The three cases to handle
814     // * Horizontal and Vertical - blur horizontally while copying values from the source to
815     //     the destination. Then, do an in-place vertical blur.
816     // * Horizontal only - blur horizontally copying values from the source to the destination.
817     // * Vertical only - blur vertically copying values from the source to the destination.
818 
819     // Default to vertical only blur case. If a horizontal blur is needed, then these values
820     // will be adjusted while doing the horizontal blur.
821     auto intermediateSrc = static_cast<uint32_t *>(src.getPixels());
822     auto intermediateRowBytesAsPixels = src.rowBytesAsPixels();
823     auto intermediateWidth = srcW;
824 
825     // Because the border is calculated before the fork of the GPU/CPU path. The border is
826     // the maximum of the two rendering methods. In the case where sigma is zero, then the
827     // src and dst left values are the same. If sigma is small resulting in a window size of
828     // 1, then border calculations add some pixels which will always be zero. Inset the
829     // destination by those zero pixels. This case is very rare.
830     auto intermediateDst = dst.getAddr32(srcBounds.left(), 0);
831 
832     // The following code is executed very rarely, I have never seen it in a real web
833     // page. If sigma is small but not zero then shared GPU/CPU border calculation
834     // code adds extra pixels for the border. Just clear everything to clear those pixels.
835     // This solution is overkill, but very simple.
836     if (makerX->window() == 1 || makerY->window() == 1) {
837         dst.eraseColor(0);
838     }
839 
840     if (makerX->window() > 1) {
841         Pass* pass = makerX->makePass(buffer, &alloc);
842         // Make int64 to avoid overflow in multiplication below.
843         int64_t shift = srcBounds.top() - dstBounds.top();
844 
845         // For the horizontal blur, starts part way down in anticipation of the vertical blur.
846         // For a vertical sigma of zero shift should be zero. But, for small sigma,
847         // shift may be > 0 but the vertical window could be 1.
848         intermediateSrc = static_cast<uint32_t *>(dst.getPixels())
849                           + (shift > 0 ? shift * dst.rowBytesAsPixels() : 0);
850         intermediateRowBytesAsPixels = dst.rowBytesAsPixels();
851         intermediateWidth = dstW;
852         intermediateDst = static_cast<uint32_t *>(dst.getPixels());
853 
854         const uint32_t* srcCursor = static_cast<uint32_t*>(src.getPixels());
855         uint32_t* dstCursor = intermediateSrc;
856         for (auto y = 0; y < srcH; y++) {
857             pass->blur(srcBounds.left(), srcBounds.right(), dstBounds.right(),
858                       srcCursor, 1, dstCursor, 1);
859             srcCursor += src.rowBytesAsPixels();
860             dstCursor += intermediateRowBytesAsPixels;
861         }
862     }
863 
864     if (makerY->window() > 1) {
865         Pass* pass = makerY->makePass(buffer, &alloc);
866         const uint32_t* srcCursor = intermediateSrc;
867         uint32_t* dstCursor = intermediateDst;
868         for (auto x = 0; x < intermediateWidth; x++) {
869             pass->blur(srcBounds.top(), srcBounds.bottom(), dstBounds.bottom(),
870                        srcCursor, intermediateRowBytesAsPixels,
871                        dstCursor, dst.rowBytesAsPixels());
872             srcCursor += 1;
873             dstCursor += 1;
874         }
875     }
876 
877     return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(),
878                                                           dstBounds.height()),
879                                           dst, ctx.surfaceProps());
880 }
881 }  // namespace
882 
883 // This rather arbitrary-looking value results in a maximum box blur kernel size
884 // of 1000 pixels on the raster path, which matches the WebKit and Firefox
885 // implementations. Since the GPU path does not compute a box blur, putting
886 // the limit on sigma ensures consistent behaviour between the GPU and
887 // raster paths.
888 #define MAX_SIGMA SkIntToScalar(532)
889 
map_sigma(const SkSize & localSigma,const SkMatrix & ctm)890 static SkVector map_sigma(const SkSize& localSigma, const SkMatrix& ctm) {
891     SkVector sigma = SkVector::Make(localSigma.width(), localSigma.height());
892     ctm.mapVectors(&sigma, 1);
893     sigma.fX = std::min(SkScalarAbs(sigma.fX), MAX_SIGMA);
894     sigma.fY = std::min(SkScalarAbs(sigma.fY), MAX_SIGMA);
895     return sigma;
896 }
897 
onFilterImage(const Context & ctx,SkIPoint * offset) const898 sk_sp<SkSpecialImage> SkBlurImageFilter::onFilterImage(const Context& ctx,
899                                                        SkIPoint* offset) const {
900     SkIPoint inputOffset = SkIPoint::Make(0, 0);
901 
902     sk_sp<SkSpecialImage> input(this->filterInput(0, ctx, &inputOffset));
903     if (!input) {
904         return nullptr;
905     }
906 
907     SkIRect inputBounds = SkIRect::MakeXYWH(inputOffset.fX, inputOffset.fY,
908                                             input->width(), input->height());
909 
910     // Calculate the destination bounds.
911     SkIRect dstBounds;
912     if (!this->applyCropRect(this->mapContext(ctx), inputBounds, &dstBounds)) {
913         return nullptr;
914     }
915     if (!inputBounds.intersect(dstBounds)) {
916         return nullptr;
917     }
918 
919     // Save the offset in preparation to make all rectangles relative to the inputOffset.
920     SkIPoint resultOffset = SkIPoint::Make(dstBounds.fLeft, dstBounds.fTop);
921 
922     // Make all bounds relative to the inputOffset.
923     inputBounds.offset(-inputOffset);
924     dstBounds.offset(-inputOffset);
925 
926     SkVector sigma = map_sigma(fSigma, ctx.ctm());
927     if (sigma.x() < 0 || sigma.y() < 0) {
928         return nullptr;
929     }
930 
931     sk_sp<SkSpecialImage> result;
932 #if SK_SUPPORT_GPU
933     if (ctx.gpuBacked()) {
934         // Ensure the input is in the destination's gamut. This saves us from having to do the
935         // xform during the filter itself.
936         input = ImageToColorSpace(input.get(), ctx.colorType(), ctx.colorSpace(),
937                                   ctx.surfaceProps());
938         result = this->gpuFilter(ctx, sigma, input, inputBounds, dstBounds, inputOffset,
939                                  &resultOffset);
940     } else
941 #endif
942     {
943         // Please see the comment on TentPass::MakeMaker for how the limit of 2183 for sigma is
944         // calculated. The effective limit of blur is 532 which is set by the GPU above in
945         // map_sigma.
946         sigma.fX = SkTPin(sigma.fX, 0.0f, 2183.0f);
947         sigma.fY = SkTPin(sigma.fY, 0.0f, 2183.0f);
948 
949         result = cpu_blur(ctx, sigma, input, inputBounds, dstBounds);
950     }
951 
952     // Return the resultOffset if the blur succeeded.
953     if (result != nullptr) {
954         *offset = resultOffset;
955     }
956     return result;
957 }
958 
959 #if SK_SUPPORT_GPU
gpuFilter(const Context & ctx,SkVector sigma,const sk_sp<SkSpecialImage> & input,SkIRect inputBounds,SkIRect dstBounds,SkIPoint inputOffset,SkIPoint * offset) const960 sk_sp<SkSpecialImage> SkBlurImageFilter::gpuFilter(
961         const Context& ctx, SkVector sigma, const sk_sp<SkSpecialImage> &input, SkIRect inputBounds,
962         SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const {
963 #if SK_GPU_V1
964     if (SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma.x()) &&
965         SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma.y())) {
966         offset->fX = inputBounds.x() + inputOffset.fX;
967         offset->fY = inputBounds.y() + inputOffset.fY;
968         return input->makeSubset(inputBounds);
969     }
970 
971     auto context = ctx.getContext();
972 
973     GrSurfaceProxyView inputView = input->view(context);
974     if (!inputView.proxy()) {
975         return nullptr;
976     }
977     SkASSERT(inputView.asTextureProxy());
978 
979     // TODO (michaelludwig) - The color space choice is odd, should it just be ctx.refColorSpace()?
980     dstBounds.offset(input->subset().topLeft());
981     inputBounds.offset(input->subset().topLeft());
982     auto sdc = SkGpuBlurUtils::GaussianBlur(
983             context,
984             std::move(inputView),
985             SkColorTypeToGrColorType(input->colorType()),
986             input->alphaType(),
987             ctx.colorSpace() ? sk_ref_sp(input->getColorSpace()) : nullptr,
988             dstBounds,
989             inputBounds,
990             sigma.x(),
991             sigma.y(),
992             fTileMode);
993     if (!sdc) {
994         return nullptr;
995     }
996 
997     return SkSpecialImage::MakeDeferredFromGpu(context,
998                                                SkIRect::MakeSize(dstBounds.size()),
999                                                kNeedNewImageUniqueID_SpecialImage,
1000                                                sdc->readSurfaceView(),
1001                                                sdc->colorInfo().colorType(),
1002                                                sk_ref_sp(input->getColorSpace()),
1003                                                ctx.surfaceProps());
1004 #else // SK_GPU_V1
1005     return nullptr;
1006 #endif // SK_GPU_V1
1007 }
1008 #endif
1009 
computeFastBounds(const SkRect & src) const1010 SkRect SkBlurImageFilter::computeFastBounds(const SkRect& src) const {
1011     SkRect bounds = this->getInput(0) ? this->getInput(0)->computeFastBounds(src) : src;
1012     bounds.outset(fSigma.width() * 3, fSigma.height() * 3);
1013     return bounds;
1014 }
1015 
onFilterNodeBounds(const SkIRect & src,const SkMatrix & ctm,MapDirection,const SkIRect * inputRect) const1016 SkIRect SkBlurImageFilter::onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm,
1017                                               MapDirection, const SkIRect* inputRect) const {
1018     SkVector sigma = map_sigma(fSigma, ctm);
1019     return src.makeOutset(SkScalarCeilToInt(sigma.x() * 3), SkScalarCeilToInt(sigma.y() * 3));
1020 }
1021