1 /*
2 * Copyright 2011 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "include/core/SkBitmap.h"
9 #include "include/core/SkColorType.h"
10 #include "include/core/SkFlattenable.h"
11 #include "include/core/SkImageFilter.h"
12 #include "include/core/SkImageInfo.h"
13 #include "include/core/SkMatrix.h"
14 #include "include/core/SkPoint.h"
15 #include "include/core/SkRect.h"
16 #include "include/core/SkRefCnt.h"
17 #include "include/core/SkScalar.h"
18 #include "include/core/SkSize.h"
19 #include "include/core/SkTileMode.h"
20 #include "include/core/SkTypes.h"
21 #include "include/effects/SkImageFilters.h"
22 #include "include/private/base/SkFloatingPoint.h"
23 #include "src/base/SkVx.h"
24 #include "include/private/base/SkMalloc.h"
25 #include "src/base/SkArenaAlloc.h"
26 #include "src/core/SkImageFilter_Base.h"
27 #include "src/core/SkReadBuffer.h"
28 #include "src/core/SkSpecialImage.h"
29 #include "src/core/SkWriteBuffer.h"
30
31 #include <algorithm>
32 #include <cmath>
33 #include <cstdint>
34 #include <cstring>
35 #include <memory>
36 #include <utility>
37
38 #if defined(SK_GANESH)
39 #include "include/private/gpu/ganesh/GrTypesPriv.h"
40 #include "src/core/SkGpuBlurUtils.h"
41 #include "src/gpu/ganesh/GrSurfaceProxyView.h"
42 #include "src/gpu/ganesh/SurfaceDrawContext.h"
43 #endif // defined(SK_GANESH)
44
45 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
46 #include <xmmintrin.h>
47 #define SK_PREFETCH(ptr) _mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_T0)
48 #elif defined(__GNUC__)
49 #define SK_PREFETCH(ptr) __builtin_prefetch(ptr)
50 #else
51 #define SK_PREFETCH(ptr)
52 #endif
53
54 namespace {
55
56 class SkBlurImageFilter final : public SkImageFilter_Base {
57 public:
SkBlurImageFilter(SkScalar sigmaX,SkScalar sigmaY,SkTileMode tileMode,sk_sp<SkImageFilter> input,const SkRect * cropRect)58 SkBlurImageFilter(SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode,
59 sk_sp<SkImageFilter> input, const SkRect* cropRect)
60 : INHERITED(&input, 1, cropRect)
61 , fSigma{sigmaX, sigmaY}
62 , fTileMode(tileMode) {}
63
64 SkRect computeFastBounds(const SkRect&) const override;
65
66 protected:
67 void flatten(SkWriteBuffer&) const override;
68 sk_sp<SkSpecialImage> onFilterImage(const Context&, SkIPoint* offset) const override;
69 SkIRect onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm,
70 MapDirection, const SkIRect* inputRect) const override;
71
72 private:
73 friend void ::SkRegisterBlurImageFilterFlattenable();
74 SK_FLATTENABLE_HOOKS(SkBlurImageFilter)
75
76 #if defined(SK_GANESH)
77 sk_sp<SkSpecialImage> gpuFilter(
78 const Context& ctx, SkVector sigma,
79 const sk_sp<SkSpecialImage> &input,
80 SkIRect inputBounds, SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const;
81 #endif
82
83 SkSize fSigma;
84 SkTileMode fTileMode;
85
86 using INHERITED = SkImageFilter_Base;
87 };
88
89 } // end namespace
90
Blur(SkScalar sigmaX,SkScalar sigmaY,SkTileMode tileMode,sk_sp<SkImageFilter> input,const CropRect & cropRect)91 sk_sp<SkImageFilter> SkImageFilters::Blur(
92 SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, sk_sp<SkImageFilter> input,
93 const CropRect& cropRect) {
94 if (sigmaX < SK_ScalarNearlyZero && sigmaY < SK_ScalarNearlyZero && !cropRect) {
95 return input;
96 }
97 return sk_sp<SkImageFilter>(
98 new SkBlurImageFilter(sigmaX, sigmaY, tileMode, input, cropRect));
99 }
100
SkRegisterBlurImageFilterFlattenable()101 void SkRegisterBlurImageFilterFlattenable() {
102 SK_REGISTER_FLATTENABLE(SkBlurImageFilter);
103 SkFlattenable::Register("SkBlurImageFilterImpl", SkBlurImageFilter::CreateProc);
104 }
105
CreateProc(SkReadBuffer & buffer)106 sk_sp<SkFlattenable> SkBlurImageFilter::CreateProc(SkReadBuffer& buffer) {
107 SK_IMAGEFILTER_UNFLATTEN_COMMON(common, 1);
108 SkScalar sigmaX = buffer.readScalar();
109 SkScalar sigmaY = buffer.readScalar();
110 SkTileMode tileMode = buffer.read32LE(SkTileMode::kLastTileMode);
111 return SkImageFilters::Blur(
112 sigmaX, sigmaY, tileMode, common.getInput(0), common.cropRect());
113 }
114
flatten(SkWriteBuffer & buffer) const115 void SkBlurImageFilter::flatten(SkWriteBuffer& buffer) const {
116 this->INHERITED::flatten(buffer);
117 buffer.writeScalar(fSigma.fWidth);
118 buffer.writeScalar(fSigma.fHeight);
119
120 SkASSERT(fTileMode <= SkTileMode::kLastTileMode);
121 buffer.writeInt(static_cast<int>(fTileMode));
122 }
123
124 ///////////////////////////////////////////////////////////////////////////////
125
126 namespace {
127 // This is defined by the SVG spec:
128 // https://drafts.fxtf.org/filter-effects/#feGaussianBlurElement
calculate_window(double sigma)129 int calculate_window(double sigma) {
130 auto possibleWindow = static_cast<int>(floor(sigma * 3 * sqrt(2 * SK_DoublePI) / 4 + 0.5));
131 return std::max(1, possibleWindow);
132 }
133
134 // This rather arbitrary-looking value results in a maximum box blur kernel size
135 // of 1000 pixels on the raster path, which matches the WebKit and Firefox
136 // implementations. Since the GPU path does not compute a box blur, putting
137 // the limit on sigma ensures consistent behaviour between the GPU and
138 // raster paths.
139 static constexpr SkScalar kMaxSigma = 532.f;
140
map_sigma(const SkSize & localSigma,const SkMatrix & ctm)141 static SkVector map_sigma(const SkSize& localSigma, const SkMatrix& ctm) {
142 SkVector sigma = SkVector::Make(localSigma.width(), localSigma.height());
143 ctm.mapVectors(&sigma, 1);
144 sigma.fX = std::min(SkScalarAbs(sigma.fX), kMaxSigma);
145 sigma.fY = std::min(SkScalarAbs(sigma.fY), kMaxSigma);
146 // Disable blurring on axes that were never finite, or became non-finite after mapping by ctm.
147 if (!SkScalarIsFinite(sigma.fX)) {
148 sigma.fX = 0.f;
149 }
150 if (!SkScalarIsFinite(sigma.fY)) {
151 sigma.fY = 0.f;
152 }
153 return sigma;
154 }
155
156
157 class Pass {
158 public:
Pass(int border)159 explicit Pass(int border) : fBorder(border) {}
160 virtual ~Pass() = default;
161
blur(int srcLeft,int srcRight,int dstRight,const uint32_t * src,int srcStride,uint32_t * dst,int dstStride)162 void blur(int srcLeft, int srcRight, int dstRight,
163 const uint32_t* src, int srcStride,
164 uint32_t* dst, int dstStride) {
165 this->startBlur();
166
167 auto srcStart = srcLeft - fBorder,
168 srcEnd = srcRight - fBorder,
169 dstEnd = dstRight,
170 srcIdx = srcStart,
171 dstIdx = 0;
172
173 const uint32_t* srcCursor = src;
174 uint32_t* dstCursor = dst;
175
176 if (dstIdx < srcIdx) {
177 // The destination pixels are not effected by the src pixels,
178 // change to zero as per the spec.
179 // https://drafts.fxtf.org/filter-effects/#FilterPrimitivesOverviewIntro
180 while (dstIdx < srcIdx) {
181 *dstCursor = 0;
182 dstCursor += dstStride;
183 SK_PREFETCH(dstCursor);
184 dstIdx++;
185 }
186 } else if (srcIdx < dstIdx) {
187 // The edge of the source is before the edge of the destination. Calculate the sums for
188 // the pixels before the start of the destination.
189 if (int commonEnd = std::min(dstIdx, srcEnd); srcIdx < commonEnd) {
190 // Preload the blur with values from src before dst is entered.
191 int n = commonEnd - srcIdx;
192 this->blurSegment(n, srcCursor, srcStride, nullptr, 0);
193 srcIdx += n;
194 srcCursor += n * srcStride;
195 }
196 if (srcIdx < dstIdx) {
197 // The weird case where src is out of pixels before dst is even started.
198 int n = dstIdx - srcIdx;
199 this->blurSegment(n, nullptr, 0, nullptr, 0);
200 srcIdx += n;
201 }
202 }
203
204 // Both srcIdx and dstIdx are in sync now, and can run in a 1:1 fashion. This is the
205 // normal mode of operation.
206 SkASSERT(srcIdx == dstIdx);
207 if (int commonEnd = std::min(dstEnd, srcEnd); dstIdx < commonEnd) {
208 int n = commonEnd - dstIdx;
209 this->blurSegment(n, srcCursor, srcStride, dstCursor, dstStride);
210 srcCursor += n * srcStride;
211 dstCursor += n * dstStride;
212 dstIdx += n;
213 srcIdx += n;
214 }
215
216 // Drain the remaining blur values into dst assuming 0's for the leading edge.
217 if (dstIdx < dstEnd) {
218 int n = dstEnd - dstIdx;
219 this->blurSegment(n, nullptr, 0, dstCursor, dstStride);
220 }
221 }
222
223 protected:
224 virtual void startBlur() = 0;
225 virtual void blurSegment(
226 int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) = 0;
227
228 private:
229 const int fBorder;
230 };
231
232 class PassMaker {
233 public:
PassMaker(int window)234 explicit PassMaker(int window) : fWindow{window} {}
235 virtual ~PassMaker() = default;
236 virtual Pass* makePass(void* buffer, SkArenaAlloc* alloc) const = 0;
237 virtual size_t bufferSizeBytes() const = 0;
window() const238 int window() const {return fWindow;}
239
240 private:
241 const int fWindow;
242 };
243
244 // Implement a scanline processor that uses a three-box filter to approximate a Gaussian blur.
245 // The GaussPass is limit to processing sigmas < 135.
246 class GaussPass final : public Pass {
247 public:
248 // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
249 // using the Gauss filter. It also limits the size of buffers used hold intermediate values.
250 // Explanation of maximums:
251 // sum0 = window * 255
252 // sum1 = window * sum0 -> window * window * 255
253 // sum2 = window * sum1 -> window * window * window * 255 -> window^3 * 255
254 //
255 // The value window^3 * 255 must fit in a uint32_t. So,
256 // window^3 < 2^32. window = 255.
257 //
258 // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
259 // For window <= 255, the largest value for sigma is 136.
MakeMaker(double sigma,SkArenaAlloc * alloc)260 static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
261 SkASSERT(0 <= sigma);
262 int window = calculate_window(sigma);
263 if (255 <= window) {
264 return nullptr;
265 }
266
267 class Maker : public PassMaker {
268 public:
269 explicit Maker(int window) : PassMaker{window} {}
270 Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
271 return GaussPass::Make(this->window(), buffer, alloc);
272 }
273
274 size_t bufferSizeBytes() const override {
275 int window = this->window();
276 size_t onePassSize = window - 1;
277 // If the window is odd, then there is an obvious middle element. For even sizes
278 // 2 passes are shifted, and the last pass has an extra element. Like this:
279 // S
280 // aaaAaa
281 // bbBbbb
282 // cccCccc
283 // D
284 size_t bufferCount = (window & 1) == 1 ? 3 * onePassSize : 3 * onePassSize + 1;
285 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
286 }
287 };
288
289 return alloc->make<Maker>(window);
290 }
291
Make(int window,void * buffers,SkArenaAlloc * alloc)292 static GaussPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
293 // We don't need to store the trailing edge pixel in the buffer;
294 int passSize = window - 1;
295 skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
296 skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
297 skvx::Vec<4, uint32_t>* buffer2 = buffer1 + passSize;
298 // If the window is odd just one buffer is needed, but if it's even, then there is one
299 // more element on that pass.
300 skvx::Vec<4, uint32_t>* buffersEnd = buffer2 + ((window & 1) ? passSize : passSize + 1);
301
302 // Calculating the border is tricky. The border is the distance in pixels between the first
303 // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
304 // I will go through the odd case which is simpler, and then through the even case. Given a
305 // stack of filters seven wide for the odd case of three passes.
306 //
307 // S
308 // aaaAaaa
309 // bbbBbbb
310 // cccCccc
311 // D
312 //
313 // The furthest changed pixel is when the filters are in the following configuration.
314 //
315 // S
316 // aaaAaaa
317 // bbbBbbb
318 // cccCccc
319 // D
320 //
321 // The A pixel is calculated using the value S, the B uses A, and the C uses B, and
322 // finally D is C. So, with a window size of seven the border is nine. In the odd case, the
323 // border is 3*((window - 1)/2).
324 //
325 // For even cases the filter stack is more complicated. The spec specifies two passes
326 // of even filters and a final pass of odd filters. A stack for a width of six looks like
327 // this.
328 //
329 // S
330 // aaaAaa
331 // bbBbbb
332 // cccCccc
333 // D
334 //
335 // The furthest pixel looks like this.
336 //
337 // S
338 // aaaAaa
339 // bbBbbb
340 // cccCccc
341 // D
342 //
343 // For a window of six, the border value is eight. In the even case the border is 3 *
344 // (window/2) - 1.
345 int border = (window & 1) == 1 ? 3 * ((window - 1) / 2) : 3 * (window / 2) - 1;
346
347 // If the window is odd then the divisor is just window ^ 3 otherwise,
348 // it is window * window * (window + 1) = window ^ 3 + window ^ 2;
349 int window2 = window * window;
350 int window3 = window2 * window;
351 int divisor = (window & 1) == 1 ? window3 : window3 + window2;
352 return alloc->make<GaussPass>(buffer0, buffer1, buffer2, buffersEnd, border, divisor);
353 }
354
GaussPass(skvx::Vec<4,uint32_t> * buffer0,skvx::Vec<4,uint32_t> * buffer1,skvx::Vec<4,uint32_t> * buffer2,skvx::Vec<4,uint32_t> * buffersEnd,int border,int divisor)355 GaussPass(skvx::Vec<4, uint32_t>* buffer0,
356 skvx::Vec<4, uint32_t>* buffer1,
357 skvx::Vec<4, uint32_t>* buffer2,
358 skvx::Vec<4, uint32_t>* buffersEnd,
359 int border,
360 int divisor)
361 : Pass{border}
362 , fBuffer0{buffer0}
363 , fBuffer1{buffer1}
364 , fBuffer2{buffer2}
365 , fBuffersEnd{buffersEnd}
366 , fDivider(divisor) {}
367
368 private:
startBlur()369 void startBlur() override {
370 skvx::Vec<4, uint32_t> zero = {0u, 0u, 0u, 0u};
371 zero.store(fSum0);
372 zero.store(fSum1);
373 auto half = fDivider.half();
374 skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum2);
375 sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
376
377 fBuffer0Cursor = fBuffer0;
378 fBuffer1Cursor = fBuffer1;
379 fBuffer2Cursor = fBuffer2;
380 }
381
382 // GaussPass implements the common three pass box filter approximation of Gaussian blur,
383 // but combines all three passes into a single pass. This approach is facilitated by three
384 // circular buffers the width of the window which track values for trailing edges of each of
385 // the three passes. This allows the algorithm to use more precision in the calculation
386 // because the values are not rounded each pass. And this implementation also avoids a trap
387 // that's easy to fall into resulting in blending in too many zeroes near the edge.
388 //
389 // In general, a window sum has the form:
390 // sum_n+1 = sum_n + leading_edge - trailing_edge.
391 // If instead we do the subtraction at the end of the previous iteration, we can just
392 // calculate the sums instead of having to do the subtractions too.
393 //
394 // In previous iteration:
395 // sum_n+1 = sum_n - trailing_edge.
396 //
397 // In this iteration:
398 // sum_n+1 = sum_n + leading_edge.
399 //
400 // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
401 // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
402 // three passes at the same time has the form:
403 //
404 // sum0_n+1 = sum0_n + leading edge
405 // sum1_n+1 = sum1_n + sum0_n+1
406 // sum2_n+1 = sum2_n + sum1_n+1
407 //
408 // sum2_n+1 / window^3 is the new value of the destination pixel.
409 //
410 // Reduce the sums by the trailing edges which were stored in the circular buffers for the
411 // next go around. This is the case for odd sized windows, even windows the the third
412 // circular buffer is one larger then the first two circular buffers.
413 //
414 // sum2_n+2 = sum2_n+1 - buffer2[i];
415 // buffer2[i] = sum1;
416 // sum1_n+2 = sum1_n+1 - buffer1[i];
417 // buffer1[i] = sum0;
418 // sum0_n+2 = sum0_n+1 - buffer0[i];
419 // buffer0[i] = leading edge
blurSegment(int n,const uint32_t * src,int srcStride,uint32_t * dst,int dstStride)420 void blurSegment(
421 int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
422 skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
423 skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
424 skvx::Vec<4, uint32_t>* buffer2Cursor = fBuffer2Cursor;
425 skvx::Vec<4, uint32_t> sum0 = skvx::Vec<4, uint32_t>::Load(fSum0);
426 skvx::Vec<4, uint32_t> sum1 = skvx::Vec<4, uint32_t>::Load(fSum1);
427 skvx::Vec<4, uint32_t> sum2 = skvx::Vec<4, uint32_t>::Load(fSum2);
428
429 // Given an expanded input pixel, move the window ahead using the leadingEdge value.
430 auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
431 sum0 += leadingEdge;
432 sum1 += sum0;
433 sum2 += sum1;
434
435 skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum2);
436
437 sum2 -= *buffer2Cursor;
438 *buffer2Cursor = sum1;
439 buffer2Cursor = (buffer2Cursor + 1) < fBuffersEnd ? buffer2Cursor + 1 : fBuffer2;
440 sum1 -= *buffer1Cursor;
441 *buffer1Cursor = sum0;
442 buffer1Cursor = (buffer1Cursor + 1) < fBuffer2 ? buffer1Cursor + 1 : fBuffer1;
443 sum0 -= *buffer0Cursor;
444 *buffer0Cursor = leadingEdge;
445 buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
446
447 return skvx::cast<uint8_t>(blurred);
448 };
449
450 auto loadEdge = [&](const uint32_t* srcCursor) {
451 return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
452 };
453
454 if (!src && !dst) {
455 while (n --> 0) {
456 (void)processValue(0);
457 }
458 } else if (src && !dst) {
459 while (n --> 0) {
460 (void)processValue(loadEdge(src));
461 src += srcStride;
462 }
463 } else if (!src && dst) {
464 while (n --> 0) {
465 processValue(0u).store(dst);
466 dst += dstStride;
467 }
468 } else if (src && dst) {
469 while (n --> 0) {
470 processValue(loadEdge(src)).store(dst);
471 src += srcStride;
472 dst += dstStride;
473 }
474 }
475
476 // Store the state
477 fBuffer0Cursor = buffer0Cursor;
478 fBuffer1Cursor = buffer1Cursor;
479 fBuffer2Cursor = buffer2Cursor;
480
481 sum0.store(fSum0);
482 sum1.store(fSum1);
483 sum2.store(fSum2);
484 }
485
486 skvx::Vec<4, uint32_t>* const fBuffer0;
487 skvx::Vec<4, uint32_t>* const fBuffer1;
488 skvx::Vec<4, uint32_t>* const fBuffer2;
489 skvx::Vec<4, uint32_t>* const fBuffersEnd;
490 const skvx::ScaledDividerU32 fDivider;
491
492 // blur state
493 char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
494 char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
495 char fSum2[sizeof(skvx::Vec<4, uint32_t>)];
496 skvx::Vec<4, uint32_t>* fBuffer0Cursor;
497 skvx::Vec<4, uint32_t>* fBuffer1Cursor;
498 skvx::Vec<4, uint32_t>* fBuffer2Cursor;
499 };
500
501 // Implement a scanline processor that uses a two-box filter to approximate a Tent filter.
502 // The TentPass is limit to processing sigmas < 2183.
503 class TentPass final : public Pass {
504 public:
505 // NB 2183 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
506 // using the Tent filter. It also limits the size of buffers used hold intermediate values.
507 // Explanation of maximums:
508 // sum0 = window * 255
509 // sum1 = window * sum0 -> window * window * 255
510 //
511 // The value window^2 * 255 must fit in a uint32_t. So,
512 // window^2 < 2^32. window = 4104.
513 //
514 // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
515 // For window <= 4104, the largest value for sigma is 2183.
MakeMaker(double sigma,SkArenaAlloc * alloc)516 static PassMaker* MakeMaker(double sigma, SkArenaAlloc* alloc) {
517 SkASSERT(0 <= sigma);
518 int gaussianWindow = calculate_window(sigma);
519 // This is a naive method of using the window size for the Gaussian blur to calculate the
520 // window size for the Tent blur. This seems to work well in practice.
521 //
522 // We can use a single pixel to generate the effective blur area given a window size. For
523 // the Gaussian blur this is 3 * window size. For the Tent filter this is 2 * window size.
524 int tentWindow = 3 * gaussianWindow / 2;
525 if (tentWindow >= 4104) {
526 return nullptr;
527 }
528
529 class Maker : public PassMaker {
530 public:
531 explicit Maker(int window) : PassMaker{window} {}
532 Pass* makePass(void* buffer, SkArenaAlloc* alloc) const override {
533 return TentPass::Make(this->window(), buffer, alloc);
534 }
535
536 size_t bufferSizeBytes() const override {
537 size_t onePassSize = this->window() - 1;
538 // If the window is odd, then there is an obvious middle element. For even sizes 2
539 // passes are shifted, and the last pass has an extra element. Like this:
540 // S
541 // aaaAaa
542 // bbBbbb
543 // D
544 size_t bufferCount = 2 * onePassSize;
545 return bufferCount * sizeof(skvx::Vec<4, uint32_t>);
546 }
547 };
548
549 return alloc->make<Maker>(tentWindow);
550 }
551
Make(int window,void * buffers,SkArenaAlloc * alloc)552 static TentPass* Make(int window, void* buffers, SkArenaAlloc* alloc) {
553 if (window > 4104) {
554 return nullptr;
555 }
556
557 // We don't need to store the trailing edge pixel in the buffer;
558 int passSize = window - 1;
559 skvx::Vec<4, uint32_t>* buffer0 = static_cast<skvx::Vec<4, uint32_t>*>(buffers);
560 skvx::Vec<4, uint32_t>* buffer1 = buffer0 + passSize;
561 skvx::Vec<4, uint32_t>* buffersEnd = buffer1 + passSize;
562
563 // Calculating the border is tricky. The border is the distance in pixels between the first
564 // dst pixel and the first src pixel (or the last src pixel and the last dst pixel).
565 // I will go through the odd case which is simpler, and then through the even case. Given a
566 // stack of filters seven wide for the odd case of three passes.
567 //
568 // S
569 // aaaAaaa
570 // bbbBbbb
571 // D
572 //
573 // The furthest changed pixel is when the filters are in the following configuration.
574 //
575 // S
576 // aaaAaaa
577 // bbbBbbb
578 // D
579 //
580 // The A pixel is calculated using the value S, the B uses A, and the D uses B.
581 // So, with a window size of seven the border is nine. In the odd case, the border is
582 // window - 1.
583 //
584 // For even cases the filter stack is more complicated. It uses two passes
585 // of even filters offset from each other. A stack for a width of six looks like
586 // this.
587 //
588 // S
589 // aaaAaa
590 // bbBbbb
591 // D
592 //
593 // The furthest pixel looks like this.
594 //
595 // S
596 // aaaAaa
597 // bbBbbb
598 // D
599 //
600 // For a window of six, the border value is 5. In the even case the border is
601 // window - 1.
602 int border = window - 1;
603
604 int divisor = window * window;
605 return alloc->make<TentPass>(buffer0, buffer1, buffersEnd, border, divisor);
606 }
607
TentPass(skvx::Vec<4,uint32_t> * buffer0,skvx::Vec<4,uint32_t> * buffer1,skvx::Vec<4,uint32_t> * buffersEnd,int border,int divisor)608 TentPass(skvx::Vec<4, uint32_t>* buffer0,
609 skvx::Vec<4, uint32_t>* buffer1,
610 skvx::Vec<4, uint32_t>* buffersEnd,
611 int border,
612 int divisor)
613 : Pass{border}
614 , fBuffer0{buffer0}
615 , fBuffer1{buffer1}
616 , fBuffersEnd{buffersEnd}
617 , fDivider(divisor) {}
618
619 private:
startBlur()620 void startBlur() override {
621 skvx::Vec<4, uint32_t>{0u, 0u, 0u, 0u}.store(fSum0);
622 auto half = fDivider.half();
623 skvx::Vec<4, uint32_t>{half, half, half, half}.store(fSum1);
624 sk_bzero(fBuffer0, (fBuffersEnd - fBuffer0) * sizeof(skvx::Vec<4, uint32_t>));
625
626 fBuffer0Cursor = fBuffer0;
627 fBuffer1Cursor = fBuffer1;
628 }
629
630 // TentPass implements the common two pass box filter approximation of Tent filter,
631 // but combines all both passes into a single pass. This approach is facilitated by two
632 // circular buffers the width of the window which track values for trailing edges of each of
633 // both passes. This allows the algorithm to use more precision in the calculation
634 // because the values are not rounded each pass. And this implementation also avoids a trap
635 // that's easy to fall into resulting in blending in too many zeroes near the edge.
636 //
637 // In general, a window sum has the form:
638 // sum_n+1 = sum_n + leading_edge - trailing_edge.
639 // If instead we do the subtraction at the end of the previous iteration, we can just
640 // calculate the sums instead of having to do the subtractions too.
641 //
642 // In previous iteration:
643 // sum_n+1 = sum_n - trailing_edge.
644 //
645 // In this iteration:
646 // sum_n+1 = sum_n + leading_edge.
647 //
648 // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
649 // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
650 // three passes at the same time has the form:
651 //
652 // sum0_n+1 = sum0_n + leading edge
653 // sum1_n+1 = sum1_n + sum0_n+1
654 //
655 // sum1_n+1 / window^2 is the new value of the destination pixel.
656 //
657 // Reduce the sums by the trailing edges which were stored in the circular buffers for the
658 // next go around.
659 //
660 // sum1_n+2 = sum1_n+1 - buffer1[i];
661 // buffer1[i] = sum0;
662 // sum0_n+2 = sum0_n+1 - buffer0[i];
663 // buffer0[i] = leading edge
blurSegment(int n,const uint32_t * src,int srcStride,uint32_t * dst,int dstStride)664 void blurSegment(
665 int n, const uint32_t* src, int srcStride, uint32_t* dst, int dstStride) override {
666 skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor;
667 skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor;
668 skvx::Vec<4, uint32_t> sum0 = skvx::Vec<4, uint32_t>::Load(fSum0);
669 skvx::Vec<4, uint32_t> sum1 = skvx::Vec<4, uint32_t>::Load(fSum1);
670
671 // Given an expanded input pixel, move the window ahead using the leadingEdge value.
672 auto processValue = [&](const skvx::Vec<4, uint32_t>& leadingEdge) {
673 sum0 += leadingEdge;
674 sum1 += sum0;
675
676 skvx::Vec<4, uint32_t> blurred = fDivider.divide(sum1);
677
678 sum1 -= *buffer1Cursor;
679 *buffer1Cursor = sum0;
680 buffer1Cursor = (buffer1Cursor + 1) < fBuffersEnd ? buffer1Cursor + 1 : fBuffer1;
681 sum0 -= *buffer0Cursor;
682 *buffer0Cursor = leadingEdge;
683 buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0;
684
685 return skvx::cast<uint8_t>(blurred);
686 };
687
688 auto loadEdge = [&](const uint32_t* srcCursor) {
689 return skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
690 };
691
692 if (!src && !dst) {
693 while (n --> 0) {
694 (void)processValue(0);
695 }
696 } else if (src && !dst) {
697 while (n --> 0) {
698 (void)processValue(loadEdge(src));
699 src += srcStride;
700 }
701 } else if (!src && dst) {
702 while (n --> 0) {
703 processValue(0u).store(dst);
704 dst += dstStride;
705 }
706 } else if (src && dst) {
707 while (n --> 0) {
708 processValue(loadEdge(src)).store(dst);
709 src += srcStride;
710 dst += dstStride;
711 }
712 }
713
714 // Store the state
715 fBuffer0Cursor = buffer0Cursor;
716 fBuffer1Cursor = buffer1Cursor;
717 sum0.store(fSum0);
718 sum1.store(fSum1);
719 }
720
721 skvx::Vec<4, uint32_t>* const fBuffer0;
722 skvx::Vec<4, uint32_t>* const fBuffer1;
723 skvx::Vec<4, uint32_t>* const fBuffersEnd;
724 const skvx::ScaledDividerU32 fDivider;
725
726 // blur state
727 char fSum0[sizeof(skvx::Vec<4, uint32_t>)];
728 char fSum1[sizeof(skvx::Vec<4, uint32_t>)];
729 skvx::Vec<4, uint32_t>* fBuffer0Cursor;
730 skvx::Vec<4, uint32_t>* fBuffer1Cursor;
731 };
732
copy_image_with_bounds(const SkImageFilter_Base::Context & ctx,const sk_sp<SkSpecialImage> & input,SkIRect srcBounds,SkIRect dstBounds)733 sk_sp<SkSpecialImage> copy_image_with_bounds(
734 const SkImageFilter_Base::Context& ctx, const sk_sp<SkSpecialImage> &input,
735 SkIRect srcBounds, SkIRect dstBounds) {
736 SkBitmap inputBM;
737 if (!input->getROPixels(&inputBM)) {
738 return nullptr;
739 }
740
741 if (inputBM.colorType() != kN32_SkColorType) {
742 return nullptr;
743 }
744
745 SkBitmap src;
746 inputBM.extractSubset(&src, srcBounds);
747
748 // Make everything relative to the destination bounds.
749 srcBounds.offset(-dstBounds.x(), -dstBounds.y());
750 dstBounds.offset(-dstBounds.x(), -dstBounds.y());
751
752 auto srcW = srcBounds.width(),
753 dstW = dstBounds.width(),
754 dstH = dstBounds.height();
755
756 SkImageInfo dstInfo = SkImageInfo::Make(dstW, dstH, inputBM.colorType(), inputBM.alphaType());
757
758 SkBitmap dst;
759 if (!dst.tryAllocPixels(dstInfo)) {
760 return nullptr;
761 }
762
763 // There is no blurring to do, but we still need to copy the source while accounting for the
764 // dstBounds. Remember that the src was intersected with the dst.
765 int y = 0;
766 size_t dstWBytes = dstW * sizeof(uint32_t);
767 for (;y < srcBounds.top(); y++) {
768 sk_bzero(dst.getAddr32(0, y), dstWBytes);
769 }
770
771 for (;y < srcBounds.bottom(); y++) {
772 int x = 0;
773 uint32_t* dstPtr = dst.getAddr32(0, y);
774 for (;x < srcBounds.left(); x++) {
775 *dstPtr++ = 0;
776 }
777
778 memcpy(dstPtr, src.getAddr32(x - srcBounds.left(), y - srcBounds.top()),
779 srcW * sizeof(uint32_t));
780
781 dstPtr += srcW;
782 x += srcW;
783
784 for (;x < dstBounds.right(); x++) {
785 *dstPtr++ = 0;
786 }
787 }
788
789 for (;y < dstBounds.bottom(); y++) {
790 sk_bzero(dst.getAddr32(0, y), dstWBytes);
791 }
792
793 return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(),
794 dstBounds.height()),
795 dst, ctx.surfaceProps());
796 }
797
798 // TODO: Implement CPU backend for different fTileMode.
cpu_blur(const SkImageFilter_Base::Context & ctx,SkVector sigma,const sk_sp<SkSpecialImage> & input,SkIRect srcBounds,SkIRect dstBounds)799 sk_sp<SkSpecialImage> cpu_blur(
800 const SkImageFilter_Base::Context& ctx,
801 SkVector sigma, const sk_sp<SkSpecialImage> &input,
802 SkIRect srcBounds, SkIRect dstBounds) {
803 // map_sigma limits sigma to 532 to match 1000px box filter limit of WebKit and Firefox.
804 // Since this does not exceed the limits of the TentPass (2183), there won't be overflow when
805 // computing a kernel over a pixel window filled with 255.
806 static_assert(kMaxSigma <= 2183.0f);
807
808 SkSTArenaAlloc<1024> alloc;
809 auto makeMaker = [&](double sigma) -> PassMaker* {
810 SkASSERT(0 <= sigma && sigma <= 2183); // should be guaranteed after map_sigma
811 if (PassMaker* maker = GaussPass::MakeMaker(sigma, &alloc)) {
812 return maker;
813 }
814 if (PassMaker* maker = TentPass::MakeMaker(sigma, &alloc)) {
815 return maker;
816 }
817 SK_ABORT("Sigma is out of range.");
818 };
819
820 PassMaker* makerX = makeMaker(sigma.x());
821 PassMaker* makerY = makeMaker(sigma.y());
822
823 if (makerX->window() <= 1 && makerY->window() <= 1) {
824 return copy_image_with_bounds(ctx, input, srcBounds, dstBounds);
825 }
826
827 SkBitmap inputBM;
828
829 if (!input->getROPixels(&inputBM)) {
830 return nullptr;
831 }
832
833 if (inputBM.colorType() != kN32_SkColorType) {
834 return nullptr;
835 }
836
837 SkBitmap src;
838 inputBM.extractSubset(&src, srcBounds);
839
840 // Make everything relative to the destination bounds.
841 srcBounds.offset(-dstBounds.x(), -dstBounds.y());
842 dstBounds.offset(-dstBounds.x(), -dstBounds.y());
843
844 auto srcW = srcBounds.width(),
845 srcH = srcBounds.height(),
846 dstW = dstBounds.width(),
847 dstH = dstBounds.height();
848
849 SkImageInfo dstInfo = inputBM.info().makeWH(dstW, dstH);
850
851 SkBitmap dst;
852 if (!dst.tryAllocPixels(dstInfo)) {
853 return nullptr;
854 }
855
856 size_t bufferSizeBytes = std::max(makerX->bufferSizeBytes(), makerY->bufferSizeBytes());
857 auto buffer = alloc.makeBytesAlignedTo(bufferSizeBytes, alignof(skvx::Vec<4, uint32_t>));
858
859 // Basic Plan: The three cases to handle
860 // * Horizontal and Vertical - blur horizontally while copying values from the source to
861 // the destination. Then, do an in-place vertical blur.
862 // * Horizontal only - blur horizontally copying values from the source to the destination.
863 // * Vertical only - blur vertically copying values from the source to the destination.
864
865 // Default to vertical only blur case. If a horizontal blur is needed, then these values
866 // will be adjusted while doing the horizontal blur.
867 auto intermediateSrc = static_cast<uint32_t *>(src.getPixels());
868 auto intermediateRowBytesAsPixels = src.rowBytesAsPixels();
869 auto intermediateWidth = srcW;
870
871 // Because the border is calculated before the fork of the GPU/CPU path. The border is
872 // the maximum of the two rendering methods. In the case where sigma is zero, then the
873 // src and dst left values are the same. If sigma is small resulting in a window size of
874 // 1, then border calculations add some pixels which will always be zero. Inset the
875 // destination by those zero pixels. This case is very rare.
876 auto intermediateDst = dst.getAddr32(srcBounds.left(), 0);
877
878 // The following code is executed very rarely, I have never seen it in a real web
879 // page. If sigma is small but not zero then shared GPU/CPU border calculation
880 // code adds extra pixels for the border. Just clear everything to clear those pixels.
881 // This solution is overkill, but very simple.
882 if (makerX->window() == 1 || makerY->window() == 1) {
883 dst.eraseColor(0);
884 }
885
886 if (makerX->window() > 1) {
887 Pass* pass = makerX->makePass(buffer, &alloc);
888 // Make int64 to avoid overflow in multiplication below.
889 int64_t shift = srcBounds.top() - dstBounds.top();
890
891 // For the horizontal blur, starts part way down in anticipation of the vertical blur.
892 // For a vertical sigma of zero shift should be zero. But, for small sigma,
893 // shift may be > 0 but the vertical window could be 1.
894 intermediateSrc = static_cast<uint32_t *>(dst.getPixels())
895 + (shift > 0 ? shift * dst.rowBytesAsPixels() : 0);
896 intermediateRowBytesAsPixels = dst.rowBytesAsPixels();
897 intermediateWidth = dstW;
898 intermediateDst = static_cast<uint32_t *>(dst.getPixels());
899
900 const uint32_t* srcCursor = static_cast<uint32_t*>(src.getPixels());
901 uint32_t* dstCursor = intermediateSrc;
902 for (auto y = 0; y < srcH; y++) {
903 pass->blur(srcBounds.left(), srcBounds.right(), dstBounds.right(),
904 srcCursor, 1, dstCursor, 1);
905 srcCursor += src.rowBytesAsPixels();
906 dstCursor += intermediateRowBytesAsPixels;
907 }
908 }
909
910 if (makerY->window() > 1) {
911 Pass* pass = makerY->makePass(buffer, &alloc);
912 const uint32_t* srcCursor = intermediateSrc;
913 uint32_t* dstCursor = intermediateDst;
914 for (auto x = 0; x < intermediateWidth; x++) {
915 pass->blur(srcBounds.top(), srcBounds.bottom(), dstBounds.bottom(),
916 srcCursor, intermediateRowBytesAsPixels,
917 dstCursor, dst.rowBytesAsPixels());
918 srcCursor += 1;
919 dstCursor += 1;
920 }
921 }
922
923 return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(),
924 dstBounds.height()),
925 dst, ctx.surfaceProps());
926 }
927 } // namespace
928
onFilterImage(const Context & ctx,SkIPoint * offset) const929 sk_sp<SkSpecialImage> SkBlurImageFilter::onFilterImage(const Context& ctx,
930 SkIPoint* offset) const {
931 SkIPoint inputOffset = SkIPoint::Make(0, 0);
932
933 sk_sp<SkSpecialImage> input(this->filterInput(0, ctx, &inputOffset));
934 if (!input) {
935 return nullptr;
936 }
937
938 SkIRect inputBounds = SkIRect::MakeXYWH(inputOffset.fX, inputOffset.fY,
939 input->width(), input->height());
940
941 // Calculate the destination bounds.
942 SkIRect dstBounds;
943 if (!this->applyCropRect(this->mapContext(ctx), inputBounds, &dstBounds)) {
944 return nullptr;
945 }
946 if (!inputBounds.intersect(dstBounds)) {
947 return nullptr;
948 }
949
950 // Save the offset in preparation to make all rectangles relative to the inputOffset.
951 SkIPoint resultOffset = SkIPoint::Make(dstBounds.fLeft, dstBounds.fTop);
952
953 // Make all bounds relative to the inputOffset.
954 inputBounds.offset(-inputOffset);
955 dstBounds.offset(-inputOffset);
956
957 SkVector sigma = map_sigma(fSigma, ctx.ctm());
958 SkASSERT(SkScalarIsFinite(sigma.x()) && sigma.x() >= 0.f && sigma.x() <= kMaxSigma &&
959 SkScalarIsFinite(sigma.y()) && sigma.y() >= 0.f && sigma.y() <= kMaxSigma);
960
961 sk_sp<SkSpecialImage> result;
962 #if defined(SK_GANESH)
963 if (ctx.gpuBacked()) {
964 // Ensure the input is in the destination's gamut. This saves us from having to do the
965 // xform during the filter itself.
966 input = ImageToColorSpace(input.get(), ctx.colorType(), ctx.colorSpace(),
967 ctx.surfaceProps());
968 result = this->gpuFilter(ctx, sigma, input, inputBounds, dstBounds, inputOffset,
969 &resultOffset);
970 } else
971 #endif
972 {
973 result = cpu_blur(ctx, sigma, input, inputBounds, dstBounds);
974 }
975
976 // Return the resultOffset if the blur succeeded.
977 if (result != nullptr) {
978 *offset = resultOffset;
979 }
980 return result;
981 }
982
983 #if defined(SK_GANESH)
gpuFilter(const Context & ctx,SkVector sigma,const sk_sp<SkSpecialImage> & input,SkIRect inputBounds,SkIRect dstBounds,SkIPoint inputOffset,SkIPoint * offset) const984 sk_sp<SkSpecialImage> SkBlurImageFilter::gpuFilter(
985 const Context& ctx, SkVector sigma, const sk_sp<SkSpecialImage> &input, SkIRect inputBounds,
986 SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const {
987 if (SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma.x()) &&
988 SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma.y())) {
989 offset->fX = inputBounds.x() + inputOffset.fX;
990 offset->fY = inputBounds.y() + inputOffset.fY;
991 return input->makeSubset(inputBounds);
992 }
993
994 auto context = ctx.getContext();
995
996 GrSurfaceProxyView inputView = input->view(context);
997 if (!inputView.proxy()) {
998 return nullptr;
999 }
1000 SkASSERT(inputView.asTextureProxy());
1001
1002 dstBounds.offset(input->subset().topLeft());
1003 inputBounds.offset(input->subset().topLeft());
1004 auto sdc = SkGpuBlurUtils::GaussianBlur(
1005 context,
1006 std::move(inputView),
1007 SkColorTypeToGrColorType(input->colorType()),
1008 input->alphaType(),
1009 ctx.refColorSpace(),
1010 dstBounds,
1011 inputBounds,
1012 sigma.x(),
1013 sigma.y(),
1014 fTileMode);
1015 if (!sdc) {
1016 return nullptr;
1017 }
1018
1019 return SkSpecialImage::MakeDeferredFromGpu(context,
1020 SkIRect::MakeSize(dstBounds.size()),
1021 kNeedNewImageUniqueID_SpecialImage,
1022 sdc->readSurfaceView(),
1023 sdc->colorInfo(),
1024 ctx.surfaceProps());
1025 }
1026 #endif
1027
computeFastBounds(const SkRect & src) const1028 SkRect SkBlurImageFilter::computeFastBounds(const SkRect& src) const {
1029 SkRect bounds = this->getInput(0) ? this->getInput(0)->computeFastBounds(src) : src;
1030 bounds.outset(fSigma.width() * 3, fSigma.height() * 3);
1031 return bounds;
1032 }
1033
onFilterNodeBounds(const SkIRect & src,const SkMatrix & ctm,MapDirection,const SkIRect * inputRect) const1034 SkIRect SkBlurImageFilter::onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm,
1035 MapDirection, const SkIRect* inputRect) const {
1036 SkVector sigma = map_sigma(fSigma, ctm);
1037 return src.makeOutset(SkScalarCeilToInt(sigma.x() * 3), SkScalarCeilToInt(sigma.y() * 3));
1038 }
1039