• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 The Android Open Source Project
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "include/core/SkBitmap.h"
9 #include "include/core/SkColorType.h"
10 #include "include/core/SkFlattenable.h"
11 #include "include/core/SkImageFilter.h"
12 #include "include/core/SkImageInfo.h"
13 #include "include/core/SkMatrix.h"
14 #include "include/core/SkPoint.h"
15 #include "include/core/SkRect.h"
16 #include "include/core/SkRefCnt.h"
17 #include "include/core/SkScalar.h"
18 #include "include/core/SkSize.h"
19 #include "include/core/SkTileMode.h"
20 #include "include/core/SkTypes.h"
21 #include "include/effects/SkImageFilters.h"
22 #include "include/private/base/SkFloatingPoint.h"
23 #include "src/base/SkVx.h"
24 #include "include/private/base/SkMalloc.h"
25 #include "src/base/SkArenaAlloc.h"
26 #include "src/core/SkImageFilter_Base.h"
27 #include "src/core/SkReadBuffer.h"
28 #include "src/core/SkSpecialImage.h"
29 #include "src/core/SkWriteBuffer.h"
30 
31 #include <algorithm>
32 #include <cmath>
33 #include <cstdint>
34 #include <cstring>
35 #include <memory>
36 #include <utility>
37 
38 #if defined(SK_GANESH)
39 #include "include/private/gpu/ganesh/GrTypesPriv.h"
40 #include "src/core/SkGpuBlurUtils.h"
41 #include "src/gpu/ganesh/GrSurfaceProxyView.h"
42 #include "src/gpu/ganesh/SurfaceDrawContext.h"
43 #endif // defined(SK_GANESH)
44 
45 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
46     #include <xmmintrin.h>
47     #define SK_PREFETCH(ptr) _mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_T0)
48 #elif defined(__GNUC__)
49     #define SK_PREFETCH(ptr) __builtin_prefetch(ptr)
50 #else
51     #define SK_PREFETCH(ptr)
52 #endif
53 
54 namespace {
55 
56 class SkBlurImageFilter final : public SkImageFilter_Base {
57 public:
SkBlurImageFilter(SkScalar sigmaX,SkScalar sigmaY,SkTileMode tileMode,sk_sp<SkImageFilter> input,const SkRect * cropRect)58     SkBlurImageFilter(SkScalar sigmaX, SkScalar sigmaY,  SkTileMode tileMode,
59                       sk_sp<SkImageFilter> input, const SkRect* cropRect)
60             : INHERITED(&input, 1, cropRect)
61             , fSigma{sigmaX, sigmaY}
62             , fTileMode(tileMode) {}
63 
64     SkRect computeFastBounds(const SkRect&) const override;
65 
66 protected:
67     void flatten(SkWriteBuffer&) const override;
68     sk_sp<SkSpecialImage> onFilterImage(const Context&, SkIPoint* offset) const override;
69     SkIRect onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm,
70                                MapDirection, const SkIRect* inputRect) const override;
71 
72 private:
73     friend void ::SkRegisterBlurImageFilterFlattenable();
74     SK_FLATTENABLE_HOOKS(SkBlurImageFilter)
75 
76 #if defined(SK_GANESH)
77     sk_sp<SkSpecialImage> gpuFilter(
78             const Context& ctx, SkVector sigma,
79             const sk_sp<SkSpecialImage> &input,
80             SkIRect inputBounds, SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const;
81 #endif
82 
83     SkSize     fSigma;
84     SkTileMode fTileMode;
85 
86     using INHERITED = SkImageFilter_Base;
87 };
88 
89 } // end namespace
90 
Blur(SkScalar sigmaX,SkScalar sigmaY,SkTileMode tileMode,sk_sp<SkImageFilter> input,const CropRect & cropRect)91 sk_sp<SkImageFilter> SkImageFilters::Blur(
92         SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, sk_sp<SkImageFilter> input,
93         const CropRect& cropRect) {
94     if (sigmaX < SK_ScalarNearlyZero && sigmaY < SK_ScalarNearlyZero && !cropRect) {
95         return input;
96     }
97     return sk_sp<SkImageFilter>(
98           new SkBlurImageFilter(sigmaX, sigmaY, tileMode, input, cropRect));
99 }
100 
SkRegisterBlurImageFilterFlattenable()101 void SkRegisterBlurImageFilterFlattenable() {
102     SK_REGISTER_FLATTENABLE(SkBlurImageFilter);
103     SkFlattenable::Register("SkBlurImageFilterImpl", SkBlurImageFilter::CreateProc);
104 }
105 
CreateProc(SkReadBuffer & buffer)106 sk_sp<SkFlattenable> SkBlurImageFilter::CreateProc(SkReadBuffer& buffer) {
107     SK_IMAGEFILTER_UNFLATTEN_COMMON(common, 1);
108     SkScalar sigmaX = buffer.readScalar();
109     SkScalar sigmaY = buffer.readScalar();
110     SkTileMode tileMode = buffer.read32LE(SkTileMode::kLastTileMode);
111     return SkImageFilters::Blur(
112           sigmaX, sigmaY, tileMode, common.getInput(0), common.cropRect());
113 }
114 
flatten(SkWriteBuffer & buffer) const115 void SkBlurImageFilter::flatten(SkWriteBuffer& buffer) const {
116     this->INHERITED::flatten(buffer);
117     buffer.writeScalar(fSigma.fWidth);
118     buffer.writeScalar(fSigma.fHeight);
119 
120     SkASSERT(fTileMode <= SkTileMode::kLastTileMode);
121     buffer.writeInt(static_cast<int>(fTileMode));
122 }
123 
124 ///////////////////////////////////////////////////////////////////////////////
125 
126 namespace {
127 // This is defined by the SVG spec:
128 // https://drafts.fxtf.org/filter-effects/#feGaussianBlurElement
calculate_window(double sigma)129 int calculate_window(double sigma) {
130     auto possibleWindow = static_cast<int>(floor(sigma * 3 * sqrt(2 * SK_DoublePI) / 4 + 0.5));
131     return std::max(1, possibleWindow);
132 }
133 
134 // This rather arbitrary-looking value results in a maximum box blur kernel size
135 // of 1000 pixels on the raster path, which matches the WebKit and Firefox
136 // implementations. Since the GPU path does not compute a box blur, putting
137 // the limit on sigma ensures consistent behaviour between the GPU and
138 // raster paths.
139 static constexpr SkScalar kMaxSigma = 532.f;
140 
map_sigma(const SkSize & localSigma,const SkMatrix & ctm)141 static SkVector map_sigma(const SkSize& localSigma, const SkMatrix& ctm) {
142     SkVector sigma = SkVector::Make(localSigma.width(), localSigma.height());
143     ctm.mapVectors(&sigma, 1);
144     sigma.fX = std::min(SkScalarAbs(sigma.fX), kMaxSigma);
145     sigma.fY = std::min(SkScalarAbs(sigma.fY), kMaxSigma);
146     // Disable blurring on axes that were never finite, or became non-finite after mapping by ctm.
147     if (!SkScalarIsFinite(sigma.fX)) {
148         sigma.fX = 0.f;
149     }
150     if (!SkScalarIsFinite(sigma.fY)) {
151         sigma.fY = 0.f;
152     }
153     return sigma;
154 }
155 
156 
157 class Pass {
158 public:
Pass(int border)159     explicit Pass(int border) : fBorder(border) {}
160     virtual ~Pass() = default;
161 
blur(int srcLeft,int srcRight,int dstRight,const uint32_t * src,int srcStride,uint32_t * dst,int dstStride)162     void blur(int srcLeft, int srcRight, int dstRight,
163               const uint32_t* src, int srcStride,
164               uint32_t* dst, int dstStride) {
165         this->startBlur();
166 
167         auto srcStart = srcLeft - fBorder,
168                 srcEnd   = srcRight - fBorder,
169                 dstEnd   = dstRight,
170                 srcIdx   = srcStart,
171                 dstIdx   = 0;
172 
173         const uint32_t* srcCursor = src;
174         uint32_t* dstCursor = dst;
175 
176         if (dstIdx < srcIdx) {
177             // The destination pixels are not effected by the src pixels,
178             // change to zero as per the spec.
179             // https://drafts.fxtf.org/filter-effects/#FilterPrimitivesOverviewIntro
180             while (dstIdx < srcIdx) {
181                 *dstCursor = 0;
182                 dstCursor += dstStride;
183                 SK_PREFETCH(dstCursor);
184                 dstIdx++;
185             }
186         } else if (srcIdx < dstIdx) {
187             // The edge of the source is before the edge of the destination. Calculate the sums for
188             // the pixels before the start of the destination.
189             if (int commonEnd = std::min(dstIdx, srcEnd); srcIdx < commonEnd) {
190                 // Preload the blur with values from src before dst is entered.
191                 int n = commonEnd - srcIdx;
192                 this->blurSegment(n, srcCursor, srcStride, nullptr, 0);
193                 srcIdx += n;
194                 srcCursor += n * srcStride;
195             }
196             if (srcIdx < dstIdx) {
197                 // The weird case where src is out of pixels before dst is even started.
198                 int n = dstIdx - srcIdx;
199                 this->blurSegment(n, nullptr, 0, nullptr, 0);
200                 srcIdx += n;
201             }
202         }
203 
204         // Both srcIdx and dstIdx are in sync now, and can run in a 1:1 fashion. This is the
205         // normal mode of operation.
206         SkASSERT(srcIdx == dstIdx);
207         if (int commonEnd = std::min(dstEnd, srcEnd); dstIdx < commonEnd) {
208             int n = commonEnd - dstIdx;
209             this->blurSegment(n, srcCursor, srcStride, dstCursor, dstStride);
210             srcCursor += n * srcStride;
211             dstCursor += n * dstStride;
212             dstIdx += n;
213             srcIdx += n;
214         }
215 
216         // Drain the remaining blur values into dst assuming 0's for the leading edge.
217         if (dstIdx < dstEnd) {
218             int n = dstEnd - dstIdx;
219             this->blurSegment(n, nullptr, 0, dstCursor, dstStride);
220         }
221     }
222 
223 protected:
224     virtual void startBlur() = 0;
225     virtual void blurSegment(
226             int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) = 0;
227 
228 private:
229     const int fBorder;
230 };
231 
232 class PassMaker {
233 public:
PassMaker(int window)234     explicit PassMaker(int window) : fWindow{window} {}
235     virtual ~PassMaker() = default;
236     virtual Pass* makePass(void* buffer, SkArenaAlloc* alloc) const = 0;
237     virtual size_t bufferSizeBytes() const = 0;
window() const238     int window() const {return fWindow;}
239 
240 private:
241     const int fWindow;
242 };
243 
244 // Implement a scanline processor that uses a three-box filter to approximate a Gaussian blur.
245 // The GaussPass is limit to processing sigmas < 135.
246 class GaussPass final : public Pass {
247 public:
248     // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
249     // using the Gauss filter. It also limits the size of buffers used hold intermediate values.
250     // Explanation of maximums:
251     //   sum0 = window * 255
252     //   sum1 = window * sum0 -> window * window * 255
253     //   sum2 = window * sum1 -> window * window * window * 255 -> window^3 * 255
254     //
255     //   The value window^3 * 255 must fit in a uint32_t. So,
256     //      window^3 < 2^32. window = 255.
257     //
258     //   window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
259     //   For window <= 255, the largest value for sigma is 136.
MakeMaker(double sigma,SkArenaAlloc * alloc)260     static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
261         SkASSERT(0 <= sigma);
262         int window = calculate_window(sigma);
263         if (255 <= window) {
264             return nullptr;
265         }
266 
267         class Maker : public PassMaker {
268         public:
269             explicit Maker(int window) : PassMaker{window} {}
270             Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
271                 return GaussPass::Make(this->window(), buffer, alloc);
272             }
273 
274             size_t bufferSizeBytes() const override {
275                 int window = this->window();
276                 size_t onePassSize = window - 1;
277                 // If the window is odd, then there is an obvious middle element. For even sizes
278                 // 2 passes are shifted, and the last pass has an extra element. Like this:
279                 //       S
280                 //    aaaAaa
281                 //     bbBbbb
282                 //    cccCccc
283                 //       D
284                 size_t bufferCount = (window & 1) == 1 ? 3 * onePassSize : 3 * onePassSize + 1;
285                 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
286             }
287         };
288 
289         return alloc->make<Maker>(window);
290     }
291 
Make(int window,void * buffers,SkArenaAlloc * alloc)292     static GaussPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
293         // We don't need to store the trailing edge pixel in the buffer;
294         int passSize = window - 1;
295         skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
296         skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
297         skvx::Vec<4, uint32_t>* buffer2 = buffer1 + passSize;
298         // If the window is odd just one buffer is needed, but if it's even, then there is one
299         // more element on that pass.
300         skvx::Vec<4, uint32_t>* buffersEnd = buffer2 + ((window & 1) ? passSize : passSize + 1);
301 
302         // Calculating the border is tricky. The border is the distance in pixels between the first
303         // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
304         // I will go through the odd case which is simpler, and then through the even case. Given a
305         // stack of filters seven wide for the odd case of three passes.
306         //
307         //        S
308         //     aaaAaaa
309         //     bbbBbbb
310         //     cccCccc
311         //        D
312         //
313         // The furthest changed pixel is when the filters are in the following configuration.
314         //
315         //                 S
316         //           aaaAaaa
317         //        bbbBbbb
318         //     cccCccc
319         //        D
320         //
321         // The A pixel is calculated using the value S, the B uses A, and the C uses B, and
322         // finally D is C. So, with a window size of seven the border is nine. In the odd case, the
323         // border is 3*((window - 1)/2).
324         //
325         // For even cases the filter stack is more complicated. The spec specifies two passes
326         // of even filters and a final pass of odd filters. A stack for a width of six looks like
327         // this.
328         //
329         //       S
330         //    aaaAaa
331         //     bbBbbb
332         //    cccCccc
333         //       D
334         //
335         // The furthest pixel looks like this.
336         //
337         //               S
338         //          aaaAaa
339         //        bbBbbb
340         //    cccCccc
341         //       D
342         //
343         // For a window of six, the border value is eight. In the even case the border is 3 *
344         // (window/2) - 1.
345         int border = (window & 1) == 1 ? 3 * ((window - 1) / 2) : 3 * (window / 2) - 1;
346 
347         // If the window is odd then the divisor is just window ^ 3 otherwise,
348         // it is window * window * (window + 1) = window ^ 3 + window ^ 2;
349         int window2 = window * window;
350         int window3 = window2 * window;
351         int divisor = (window & 1) == 1 ? window3 : window3 + window2;
352         return alloc->make<GaussPass>(buffer0, buffer1, buffer2, buffersEnd, border, divisor);
353     }
354 
GaussPass(skvx::Vec<4,uint32_t> * buffer0,skvx::Vec<4,uint32_t> * buffer1,skvx::Vec<4,uint32_t> * buffer2,skvx::Vec<4,uint32_t> * buffersEnd,int border,int divisor)355     GaussPass(skvx::Vec<4, uint32_t>* buffer0,
356               skvx::Vec<4, uint32_t>* buffer1,
357               skvx::Vec<4, uint32_t>* buffer2,
358               skvx::Vec<4, uint32_t>* buffersEnd,
359               int border,
360               int divisor)
361         : Pass{border}
362         , fBuffer0{buffer0}
363         , fBuffer1{buffer1}
364         , fBuffer2{buffer2}
365         , fBuffersEnd{buffersEnd}
366         , fDivider(divisor) {}
367 
368 private:
startBlur()369     void startBlur() override {
370         skvx::Vec<4, uint32_t> zero = {0u, 0u, 0u, 0u};
371         zero.store(fSum0);
372         zero.store(fSum1);
373         auto half = fDivider.half();
374         skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum2);
375         sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
376 
377         fBuffer0Cursor = fBuffer0;
378         fBuffer1Cursor = fBuffer1;
379         fBuffer2Cursor = fBuffer2;
380     }
381 
382     // GaussPass implements the common three pass box filter approximation of Gaussian blur,
383     // but combines all three passes into a single pass. This approach is facilitated by three
384     // circular buffers the width of the window which track values for trailing edges of each of
385     // the three passes. This allows the algorithm to use more precision in the calculation
386     // because the values are not rounded each pass. And this implementation also avoids a trap
387     // that's easy to fall into resulting in blending in too many zeroes near the edge.
388     //
389     // In general, a window sum has the form:
390     //     sum_n+1 = sum_n + leading_edge - trailing_edge.
391     // If instead we do the subtraction at the end of the previous iteration, we can just
392     // calculate the sums instead of having to do the subtractions too.
393     //
394     //      In previous iteration:
395     //      sum_n+1 = sum_n - trailing_edge.
396     //
397     //      In this iteration:
398     //      sum_n+1 = sum_n + leading_edge.
399     //
400     // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
401     // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
402     // three passes at the same time has the form:
403     //
404     //    sum0_n+1 = sum0_n + leading edge
405     //    sum1_n+1 = sum1_n + sum0_n+1
406     //    sum2_n+1 = sum2_n + sum1_n+1
407     //
408     //    sum2_n+1 / window^3 is the new value of the destination pixel.
409     //
410     // Reduce the sums by the trailing edges which were stored in the circular buffers for the
411     // next go around. This is the case for odd sized windows, even windows the the third
412     // circular buffer is one larger then the first two circular buffers.
413     //
414     //    sum2_n+2 = sum2_n+1 - buffer2[i];
415     //    buffer2[i] = sum1;
416     //    sum1_n+2 = sum1_n+1 - buffer1[i];
417     //    buffer1[i] = sum0;
418     //    sum0_n+2 = sum0_n+1 - buffer0[i];
419     //    buffer0[i] = leading edge
blurSegment(int n,const uint32_t * src,int srcStride,uint32_t * dst,int dstStride)420     void blurSegment(
421             int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
422         skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
423         skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
424         skvx::Vec<4, uint32_t>* buffer2Cursor = fBuffer2Cursor;
425         skvx::Vec<4, uint32_t> sum0 = skvx::Vec<4, uint32_t>::Load(fSum0);
426         skvx::Vec<4, uint32_t> sum1 = skvx::Vec<4, uint32_t>::Load(fSum1);
427         skvx::Vec<4, uint32_t> sum2 = skvx::Vec<4, uint32_t>::Load(fSum2);
428 
429         // Given an expanded input pixel, move the window ahead using the leadingEdge value.
430         auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
431             sum0 += leadingEdge;
432             sum1 += sum0;
433             sum2 += sum1;
434 
435             skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum2);
436 
437             sum2 -= *buffer2Cursor;
438             *buffer2Cursor = sum1;
439             buffer2Cursor = (buffer2Cursor + 1) < fBuffersEnd ? buffer2Cursor + 1 : fBuffer2;
440             sum1 -= *buffer1Cursor;
441             *buffer1Cursor = sum0;
442             buffer1Cursor = (buffer1Cursor + 1) < fBuffer2 ? buffer1Cursor + 1 : fBuffer1;
443             sum0 -= *buffer0Cursor;
444             *buffer0Cursor = leadingEdge;
445             buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
446 
447             return skvx::cast<uint8_t>(blurred);
448         };
449 
450         auto loadEdge = [&](const uint32_t* srcCursor) {
451             return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
452         };
453 
454         if (!src && !dst) {
455             while (n --> 0) {
456                 (void)processValue(0);
457             }
458         } else if (src && !dst) {
459             while (n --> 0) {
460                 (void)processValue(loadEdge(src));
461                 src += srcStride;
462             }
463         } else if (!src && dst) {
464             while (n --> 0) {
465                 processValue(0u).store(dst);
466                 dst += dstStride;
467             }
468         } else if (src && dst) {
469             while (n --> 0) {
470                 processValue(loadEdge(src)).store(dst);
471                 src += srcStride;
472                 dst += dstStride;
473             }
474         }
475 
476         // Store the state
477         fBuffer0Cursor = buffer0Cursor;
478         fBuffer1Cursor = buffer1Cursor;
479         fBuffer2Cursor = buffer2Cursor;
480 
481         sum0.store(fSum0);
482         sum1.store(fSum1);
483         sum2.store(fSum2);
484     }
485 
486     skvx::Vec<4, uint32_t>* const fBuffer0;
487     skvx::Vec<4, uint32_t>* const fBuffer1;
488     skvx::Vec<4, uint32_t>* const fBuffer2;
489     skvx::Vec<4, uint32_t>* const fBuffersEnd;
490     const skvx::ScaledDividerU32 fDivider;
491 
492     // blur state
493     char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
494     char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
495     char fSum2[sizeof(skvx::Vec<4, uint32_t>)];
496     skvx::Vec<4, uint32_t>* fBuffer0Cursor;
497     skvx::Vec<4, uint32_t>* fBuffer1Cursor;
498     skvx::Vec<4, uint32_t>* fBuffer2Cursor;
499 };
500 
501 // Implement a scanline processor that uses a two-box filter to approximate a Tent filter.
502 // The TentPass is limit to processing sigmas < 2183.
503 class TentPass final : public Pass {
504 public:
505     // NB 2183 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
506     // using the Tent filter. It also limits the size of buffers used hold intermediate values.
507     // Explanation of maximums:
508     //   sum0 = window * 255
509     //   sum1 = window * sum0 -> window * window * 255
510     //
511     //   The value window^2 * 255 must fit in a uint32_t. So,
512     //      window^2 < 2^32. window = 4104.
513     //
514     //   window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
515     //   For window <= 4104, the largest value for sigma is 2183.
MakeMaker(double sigma,SkArenaAlloc * alloc)516     static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
517         SkASSERT(0 <= sigma);
518         int gaussianWindow = calculate_window(sigma);
519         // This is a naive method of using the window size for the Gaussian blur to calculate the
520         // window size for the Tent blur. This seems to work well in practice.
521         //
522         // We can use a single pixel to generate the effective blur area given a window size. For
523         // the Gaussian blur this is 3 * window size. For the Tent filter this is 2 * window size.
524         int tentWindow = 3 * gaussianWindow / 2;
525         if (tentWindow >= 4104) {
526             return nullptr;
527         }
528 
529         class Maker : public PassMaker {
530         public:
531             explicit Maker(int window) : PassMaker{window} {}
532             Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
533                 return TentPass::Make(this->window(), buffer, alloc);
534             }
535 
536             size_t bufferSizeBytes() const override {
537                 size_t onePassSize = this->window() - 1;
538                 // If the window is odd, then there is an obvious middle element. For even sizes 2
539                 // passes are shifted, and the last pass has an extra element. Like this:
540                 //       S
541                 //    aaaAaa
542                 //     bbBbbb
543                 //       D
544                 size_t bufferCount = 2 * onePassSize;
545                 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
546             }
547         };
548 
549         return alloc->make<Maker>(tentWindow);
550     }
551 
Make(int window,void * buffers,SkArenaAlloc * alloc)552     static TentPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
553         if (window > 4104) {
554             return nullptr;
555         }
556 
557         // We don't need to store the trailing edge pixel in the buffer;
558         int passSize = window - 1;
559         skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
560         skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
561         skvx::Vec<4, uint32_t>* buffersEnd = buffer1 + passSize;
562 
563         // Calculating the border is tricky. The border is the distance in pixels between the first
564         // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
565         // I will go through the odd case which is simpler, and then through the even case. Given a
566         // stack of filters seven wide for the odd case of three passes.
567         //
568         //        S
569         //     aaaAaaa
570         //     bbbBbbb
571         //        D
572         //
573         // The furthest changed pixel is when the filters are in the following configuration.
574         //
575         //              S
576         //        aaaAaaa
577         //     bbbBbbb
578         //        D
579         //
580         // The A pixel is calculated using the value S, the B uses A, and the D uses B.
581         // So, with a window size of seven the border is nine. In the odd case, the border is
582         // window - 1.
583         //
584         // For even cases the filter stack is more complicated. It uses two passes
585         // of even filters offset from each other. A stack for a width of six looks like
586         // this.
587         //
588         //       S
589         //    aaaAaa
590         //     bbBbbb
591         //       D
592         //
593         // The furthest pixel looks like this.
594         //
595         //            S
596         //       aaaAaa
597         //     bbBbbb
598         //       D
599         //
600         // For a window of six, the border value is 5. In the even case the border is
601         // window - 1.
602         int border = window - 1;
603 
604         int divisor = window * window;
605         return alloc->make<TentPass>(buffer0, buffer1, buffersEnd, border, divisor);
606     }
607 
TentPass(skvx::Vec<4,uint32_t> * buffer0,skvx::Vec<4,uint32_t> * buffer1,skvx::Vec<4,uint32_t> * buffersEnd,int border,int divisor)608     TentPass(skvx::Vec<4, uint32_t>* buffer0,
609              skvx::Vec<4, uint32_t>* buffer1,
610              skvx::Vec<4, uint32_t>* buffersEnd,
611              int border,
612              int divisor)
613          : Pass{border}
614          , fBuffer0{buffer0}
615          , fBuffer1{buffer1}
616          , fBuffersEnd{buffersEnd}
617          , fDivider(divisor) {}
618 
619 private:
startBlur()620     void startBlur() override {
621         skvx::Vec<4, uint32_t>{0u, 0u, 0u, 0u}.store(fSum0);
622         auto half = fDivider.half();
623         skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum1);
624         sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
625 
626         fBuffer0Cursor = fBuffer0;
627         fBuffer1Cursor = fBuffer1;
628     }
629 
630     // TentPass implements the common two pass box filter approximation of Tent filter,
631     // but combines all both passes into a single pass. This approach is facilitated by two
632     // circular buffers the width of the window which track values for trailing edges of each of
633     // both passes. This allows the algorithm to use more precision in the calculation
634     // because the values are not rounded each pass. And this implementation also avoids a trap
635     // that's easy to fall into resulting in blending in too many zeroes near the edge.
636     //
637     // In general, a window sum has the form:
638     //     sum_n+1 = sum_n + leading_edge - trailing_edge.
639     // If instead we do the subtraction at the end of the previous iteration, we can just
640     // calculate the sums instead of having to do the subtractions too.
641     //
642     //      In previous iteration:
643     //      sum_n+1 = sum_n - trailing_edge.
644     //
645     //      In this iteration:
646     //      sum_n+1 = sum_n + leading_edge.
647     //
648     // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
649     // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
650     // three passes at the same time has the form:
651     //
652     //    sum0_n+1 = sum0_n + leading edge
653     //    sum1_n+1 = sum1_n + sum0_n+1
654     //
655     //    sum1_n+1 / window^2 is the new value of the destination pixel.
656     //
657     // Reduce the sums by the trailing edges which were stored in the circular buffers for the
658     // next go around.
659     //
660     //    sum1_n+2 = sum1_n+1 - buffer1[i];
661     //    buffer1[i] = sum0;
662     //    sum0_n+2 = sum0_n+1 - buffer0[i];
663     //    buffer0[i] = leading edge
blurSegment(int n,const uint32_t * src,int srcStride,uint32_t * dst,int dstStride)664     void blurSegment(
665             int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
666         skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
667         skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
668         skvx::Vec<4, uint32_t> sum0 = skvx::Vec<4, uint32_t>::Load(fSum0);
669         skvx::Vec<4, uint32_t> sum1 = skvx::Vec<4, uint32_t>::Load(fSum1);
670 
671         // Given an expanded input pixel, move the window ahead using the leadingEdge value.
672         auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
673             sum0 += leadingEdge;
674             sum1 += sum0;
675 
676             skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum1);
677 
678             sum1 -= *buffer1Cursor;
679             *buffer1Cursor = sum0;
680             buffer1Cursor = (buffer1Cursor + 1) < fBuffersEnd ? buffer1Cursor + 1 : fBuffer1;
681             sum0 -= *buffer0Cursor;
682             *buffer0Cursor = leadingEdge;
683             buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
684 
685             return skvx::cast<uint8_t>(blurred);
686         };
687 
688         auto loadEdge = [&](const uint32_t* srcCursor) {
689             return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
690         };
691 
692         if (!src && !dst) {
693             while (n --> 0) {
694                 (void)processValue(0);
695             }
696         } else if (src && !dst) {
697             while (n --> 0) {
698                 (void)processValue(loadEdge(src));
699                 src += srcStride;
700             }
701         } else if (!src && dst) {
702             while (n --> 0) {
703                 processValue(0u).store(dst);
704                 dst += dstStride;
705             }
706         } else if (src && dst) {
707             while (n --> 0) {
708                 processValue(loadEdge(src)).store(dst);
709                 src += srcStride;
710                 dst += dstStride;
711             }
712         }
713 
714         // Store the state
715         fBuffer0Cursor = buffer0Cursor;
716         fBuffer1Cursor = buffer1Cursor;
717         sum0.store(fSum0);
718         sum1.store(fSum1);
719     }
720 
721     skvx::Vec<4, uint32_t>* const fBuffer0;
722     skvx::Vec<4, uint32_t>* const fBuffer1;
723     skvx::Vec<4, uint32_t>* const fBuffersEnd;
724     const skvx::ScaledDividerU32 fDivider;
725 
726     // blur state
727     char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
728     char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
729     skvx::Vec<4, uint32_t>* fBuffer0Cursor;
730     skvx::Vec<4, uint32_t>* fBuffer1Cursor;
731 };
732 
copy_image_with_bounds(const SkImageFilter_Base::Context & ctx,const sk_sp<SkSpecialImage> & input,SkIRect srcBounds,SkIRect dstBounds)733 sk_sp<SkSpecialImage> copy_image_with_bounds(
734         const SkImageFilter_Base::Context& ctx, const sk_sp<SkSpecialImage> &input,
735         SkIRect srcBounds, SkIRect dstBounds) {
736     SkBitmap inputBM;
737     if (!input->getROPixels(&inputBM)) {
738         return nullptr;
739     }
740 
741     if (inputBM.colorType() != kN32_SkColorType) {
742         return nullptr;
743     }
744 
745     SkBitmap src;
746     inputBM.extractSubset(&src, srcBounds);
747 
748     // Make everything relative to the destination bounds.
749     srcBounds.offset(-dstBounds.x(), -dstBounds.y());
750     dstBounds.offset(-dstBounds.x(), -dstBounds.y());
751 
752     auto srcW = srcBounds.width(),
753          dstW = dstBounds.width(),
754          dstH = dstBounds.height();
755 
756     SkImageInfo dstInfo = SkImageInfo::Make(dstW, dstH, inputBM.colorType(), inputBM.alphaType());
757 
758     SkBitmap dst;
759     if (!dst.tryAllocPixels(dstInfo)) {
760         return nullptr;
761     }
762 
763     // There is no blurring to do, but we still need to copy the source while accounting for the
764     // dstBounds. Remember that the src was intersected with the dst.
765     int y = 0;
766     size_t dstWBytes = dstW * sizeof(uint32_t);
767     for (;y < srcBounds.top(); y++) {
768         sk_bzero(dst.getAddr32(0, y), dstWBytes);
769     }
770 
771     for (;y < srcBounds.bottom(); y++) {
772         int x = 0;
773         uint32_t* dstPtr = dst.getAddr32(0, y);
774         for (;x < srcBounds.left(); x++) {
775             *dstPtr++ = 0;
776         }
777 
778         memcpy(dstPtr, src.getAddr32(x - srcBounds.left(), y - srcBounds.top()),
779                srcW * sizeof(uint32_t));
780 
781         dstPtr += srcW;
782         x += srcW;
783 
784         for (;x < dstBounds.right(); x++) {
785             *dstPtr++ = 0;
786         }
787     }
788 
789     for (;y < dstBounds.bottom(); y++) {
790         sk_bzero(dst.getAddr32(0, y), dstWBytes);
791     }
792 
793     return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(),
794                                                           dstBounds.height()),
795                                           dst, ctx.surfaceProps());
796 }
797 
798 // TODO: Implement CPU backend for different fTileMode.
cpu_blur(const SkImageFilter_Base::Context & ctx,SkVector sigma,const sk_sp<SkSpecialImage> & input,SkIRect srcBounds,SkIRect dstBounds)799 sk_sp<SkSpecialImage> cpu_blur(
800         const SkImageFilter_Base::Context& ctx,
801         SkVector sigma, const sk_sp<SkSpecialImage> &input,
802         SkIRect srcBounds, SkIRect dstBounds) {
803     // map_sigma limits sigma to 532 to match 1000px box filter limit of WebKit and Firefox.
804     // Since this does not exceed the limits of the TentPass (2183), there won't be overflow when
805     // computing a kernel over a pixel window filled with 255.
806     static_assert(kMaxSigma <= 2183.0f);
807 
808     SkSTArenaAlloc<1024> alloc;
809     auto makeMaker = [&](double sigma) -> PassMaker* {
810         SkASSERT(0 <= sigma && sigma <= 2183); // should be guaranteed after map_sigma
811         if (PassMaker* maker = GaussPass::MakeMaker(sigma, &alloc)) {
812             return maker;
813         }
814         if (PassMaker* maker = TentPass::MakeMaker(sigma, &alloc)) {
815             return maker;
816         }
817         SK_ABORT("Sigma is out of range.");
818     };
819 
820     PassMaker* makerX = makeMaker(sigma.x());
821     PassMaker* makerY = makeMaker(sigma.y());
822 
823     if (makerX->window() <= 1 && makerY->window() <= 1) {
824         return copy_image_with_bounds(ctx, input, srcBounds, dstBounds);
825     }
826 
827     SkBitmap inputBM;
828 
829     if (!input->getROPixels(&inputBM)) {
830         return nullptr;
831     }
832 
833     if (inputBM.colorType() != kN32_SkColorType) {
834         return nullptr;
835     }
836 
837     SkBitmap src;
838     inputBM.extractSubset(&src, srcBounds);
839 
840     // Make everything relative to the destination bounds.
841     srcBounds.offset(-dstBounds.x(), -dstBounds.y());
842     dstBounds.offset(-dstBounds.x(), -dstBounds.y());
843 
844     auto srcW = srcBounds.width(),
845          srcH = srcBounds.height(),
846          dstW = dstBounds.width(),
847          dstH = dstBounds.height();
848 
849     SkImageInfo dstInfo = inputBM.info().makeWH(dstW, dstH);
850 
851     SkBitmap dst;
852     if (!dst.tryAllocPixels(dstInfo)) {
853         return nullptr;
854     }
855 
856     size_t bufferSizeBytes = std::max(makerX->bufferSizeBytes(), makerY->bufferSizeBytes());
857     auto buffer = alloc.makeBytesAlignedTo(bufferSizeBytes, alignof(skvx::Vec<4, uint32_t>));
858 
859     // Basic Plan: The three cases to handle
860     // * Horizontal and Vertical - blur horizontally while copying values from the source to
861     //     the destination. Then, do an in-place vertical blur.
862     // * Horizontal only - blur horizontally copying values from the source to the destination.
863     // * Vertical only - blur vertically copying values from the source to the destination.
864 
865     // Default to vertical only blur case. If a horizontal blur is needed, then these values
866     // will be adjusted while doing the horizontal blur.
867     auto intermediateSrc = static_cast<uint32_t *>(src.getPixels());
868     auto intermediateRowBytesAsPixels = src.rowBytesAsPixels();
869     auto intermediateWidth = srcW;
870 
871     // Because the border is calculated before the fork of the GPU/CPU path. The border is
872     // the maximum of the two rendering methods. In the case where sigma is zero, then the
873     // src and dst left values are the same. If sigma is small resulting in a window size of
874     // 1, then border calculations add some pixels which will always be zero. Inset the
875     // destination by those zero pixels. This case is very rare.
876     auto intermediateDst = dst.getAddr32(srcBounds.left(), 0);
877 
878     // The following code is executed very rarely, I have never seen it in a real web
879     // page. If sigma is small but not zero then shared GPU/CPU border calculation
880     // code adds extra pixels for the border. Just clear everything to clear those pixels.
881     // This solution is overkill, but very simple.
882     if (makerX->window() == 1 || makerY->window() == 1) {
883         dst.eraseColor(0);
884     }
885 
886     if (makerX->window() > 1) {
887         Pass* pass = makerX->makePass(buffer, &alloc);
888         // Make int64 to avoid overflow in multiplication below.
889         int64_t shift = srcBounds.top() - dstBounds.top();
890 
891         // For the horizontal blur, starts part way down in anticipation of the vertical blur.
892         // For a vertical sigma of zero shift should be zero. But, for small sigma,
893         // shift may be > 0 but the vertical window could be 1.
894         intermediateSrc = static_cast<uint32_t *>(dst.getPixels())
895                           + (shift > 0 ? shift * dst.rowBytesAsPixels() : 0);
896         intermediateRowBytesAsPixels = dst.rowBytesAsPixels();
897         intermediateWidth = dstW;
898         intermediateDst = static_cast<uint32_t *>(dst.getPixels());
899 
900         const uint32_t* srcCursor = static_cast<uint32_t*>(src.getPixels());
901         uint32_t* dstCursor = intermediateSrc;
902         for (auto y = 0; y < srcH; y++) {
903             pass->blur(srcBounds.left(), srcBounds.right(), dstBounds.right(),
904                       srcCursor, 1, dstCursor, 1);
905             srcCursor += src.rowBytesAsPixels();
906             dstCursor += intermediateRowBytesAsPixels;
907         }
908     }
909 
910     if (makerY->window() > 1) {
911         Pass* pass = makerY->makePass(buffer, &alloc);
912         const uint32_t* srcCursor = intermediateSrc;
913         uint32_t* dstCursor = intermediateDst;
914         for (auto x = 0; x < intermediateWidth; x++) {
915             pass->blur(srcBounds.top(), srcBounds.bottom(), dstBounds.bottom(),
916                        srcCursor, intermediateRowBytesAsPixels,
917                        dstCursor, dst.rowBytesAsPixels());
918             srcCursor += 1;
919             dstCursor += 1;
920         }
921     }
922 
923     return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(),
924                                                           dstBounds.height()),
925                                           dst, ctx.surfaceProps());
926 }
927 }  // namespace
928 
onFilterImage(const Context & ctx,SkIPoint * offset) const929 sk_sp<SkSpecialImage> SkBlurImageFilter::onFilterImage(const Context& ctx,
930                                                        SkIPoint* offset) const {
931     SkIPoint inputOffset = SkIPoint::Make(0, 0);
932 
933     sk_sp<SkSpecialImage> input(this->filterInput(0, ctx, &inputOffset));
934     if (!input) {
935         return nullptr;
936     }
937 
938     SkIRect inputBounds = SkIRect::MakeXYWH(inputOffset.fX, inputOffset.fY,
939                                             input->width(), input->height());
940 
941     // Calculate the destination bounds.
942     SkIRect dstBounds;
943     if (!this->applyCropRect(this->mapContext(ctx), inputBounds, &dstBounds)) {
944         return nullptr;
945     }
946     if (!inputBounds.intersect(dstBounds)) {
947         return nullptr;
948     }
949 
950     // Save the offset in preparation to make all rectangles relative to the inputOffset.
951     SkIPoint resultOffset = SkIPoint::Make(dstBounds.fLeft, dstBounds.fTop);
952 
953     // Make all bounds relative to the inputOffset.
954     inputBounds.offset(-inputOffset);
955     dstBounds.offset(-inputOffset);
956 
957     SkVector sigma = map_sigma(fSigma, ctx.ctm());
958     SkASSERT(SkScalarIsFinite(sigma.x()) && sigma.x() >= 0.f && sigma.x() <= kMaxSigma &&
959              SkScalarIsFinite(sigma.y()) && sigma.y() >= 0.f && sigma.y() <= kMaxSigma);
960 
961     sk_sp<SkSpecialImage> result;
962 #if defined(SK_GANESH)
963     if (ctx.gpuBacked()) {
964         // Ensure the input is in the destination's gamut. This saves us from having to do the
965         // xform during the filter itself.
966         input = ImageToColorSpace(input.get(), ctx.colorType(), ctx.colorSpace(),
967                                   ctx.surfaceProps());
968         result = this->gpuFilter(ctx, sigma, input, inputBounds, dstBounds, inputOffset,
969                                  &resultOffset);
970     } else
971 #endif
972     {
973         result = cpu_blur(ctx, sigma, input, inputBounds, dstBounds);
974     }
975 
976     // Return the resultOffset if the blur succeeded.
977     if (result != nullptr) {
978         *offset = resultOffset;
979     }
980     return result;
981 }
982 
983 #if defined(SK_GANESH)
gpuFilter(const Context & ctx,SkVector sigma,const sk_sp<SkSpecialImage> & input,SkIRect inputBounds,SkIRect dstBounds,SkIPoint inputOffset,SkIPoint * offset) const984 sk_sp<SkSpecialImage> SkBlurImageFilter::gpuFilter(
985         const Context& ctx, SkVector sigma, const sk_sp<SkSpecialImage> &input, SkIRect inputBounds,
986         SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const {
987     if (SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma.x()) &&
988         SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma.y())) {
989         offset->fX = inputBounds.x() + inputOffset.fX;
990         offset->fY = inputBounds.y() + inputOffset.fY;
991         return input->makeSubset(inputBounds);
992     }
993 
994     auto context = ctx.getContext();
995 
996     GrSurfaceProxyView inputView = input->view(context);
997     if (!inputView.proxy()) {
998         return nullptr;
999     }
1000     SkASSERT(inputView.asTextureProxy());
1001 
1002     dstBounds.offset(input->subset().topLeft());
1003     inputBounds.offset(input->subset().topLeft());
1004     auto sdc = SkGpuBlurUtils::GaussianBlur(
1005             context,
1006             std::move(inputView),
1007             SkColorTypeToGrColorType(input->colorType()),
1008             input->alphaType(),
1009             ctx.refColorSpace(),
1010             dstBounds,
1011             inputBounds,
1012             sigma.x(),
1013             sigma.y(),
1014             fTileMode);
1015     if (!sdc) {
1016         return nullptr;
1017     }
1018 
1019     return SkSpecialImage::MakeDeferredFromGpu(context,
1020                                                SkIRect::MakeSize(dstBounds.size()),
1021                                                kNeedNewImageUniqueID_SpecialImage,
1022                                                sdc->readSurfaceView(),
1023                                                sdc->colorInfo(),
1024                                                ctx.surfaceProps());
1025 }
1026 #endif
1027 
computeFastBounds(const SkRect & src) const1028 SkRect SkBlurImageFilter::computeFastBounds(const SkRect& src) const {
1029     SkRect bounds = this->getInput(0) ? this->getInput(0)->computeFastBounds(src) : src;
1030     bounds.outset(fSigma.width() * 3, fSigma.height() * 3);
1031     return bounds;
1032 }
1033 
onFilterNodeBounds(const SkIRect & src,const SkMatrix & ctm,MapDirection,const SkIRect * inputRect) const1034 SkIRect SkBlurImageFilter::onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm,
1035                                               MapDirection, const SkIRect* inputRect) const {
1036     SkVector sigma = map_sigma(fSigma, ctm);
1037     return src.makeOutset(SkScalarCeilToInt(sigma.x() * 3), SkScalarCeilToInt(sigma.y() * 3));
1038 }
1039