• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/core/SkBlurEngine.h"
9 
10 #include "include/core/SkAlphaType.h"
11 #include "include/core/SkBlendMode.h"
12 #include "include/core/SkClipOp.h"
13 #include "include/core/SkColorSpace.h" // IWYU pragma: keep
14 #include "include/core/SkImageInfo.h"
15 #include "include/core/SkM44.h"
16 #include "include/core/SkMatrix.h"
17 #include "include/core/SkPaint.h"
18 #include "include/core/SkRect.h"
19 #include "include/core/SkSamplingOptions.h"
20 #include "include/core/SkScalar.h"
21 #include "include/core/SkTileMode.h"
22 #include "include/effects/SkRuntimeEffect.h"
23 #include "include/private/base/SkAssert.h"
24 #include "include/private/base/SkMath.h"
25 #include "include/private/base/SkTo.h"
26 #include "src/core/SkDevice.h"
27 #include "src/core/SkKnownRuntimeEffects.h"
28 #include "src/core/SkSpecialImage.h"
29 
30 #include <algorithm>
31 #include <array>
32 #include <cmath>
33 #include <cstdint>
34 #include <cstring>
35 #include <utility>
36 
Compute2DBlurKernel(SkSize sigma,SkISize radius,SkSpan<float> kernel)37 void SkShaderBlurAlgorithm::Compute2DBlurKernel(SkSize sigma,
38                                                 SkISize radius,
39                                                 SkSpan<float> kernel) {
40     // Callers likely had to calculate the radius prior to filling out the kernel value, which is
41     // why it's provided; but make sure it's consistent with expectations.
42     SkASSERT(SkBlurEngine::SigmaToRadius(sigma.width()) == radius.width() &&
43              SkBlurEngine::SigmaToRadius(sigma.height()) == radius.height());
44 
45     // Callers are responsible for downscaling large sigmas to values that can be processed by the
46     // effects, so ensure the radius won't overflow 'kernel'
47     const int width = KernelWidth(radius.width());
48     const int height = KernelWidth(radius.height());
49     const size_t kernelSize = SkTo<size_t>(sk_64_mul(width, height));
50     SkASSERT(kernelSize <= kernel.size());
51 
52     // And the definition of an identity blur should be sufficient that 2sigma^2 isn't near zero
53     // when there's a non-trivial radius.
54     const float twoSigmaSqrdX = 2.0f * sigma.width() * sigma.width();
55     const float twoSigmaSqrdY = 2.0f * sigma.height() * sigma.height();
56     SkASSERT((radius.width() == 0 || !SkScalarNearlyZero(twoSigmaSqrdX)) &&
57              (radius.height() == 0 || !SkScalarNearlyZero(twoSigmaSqrdY)));
58 
59     // Setting the denominator to 1 when the radius is 0 automatically converts the remaining math
60     // to the 1D Gaussian distribution. When both radii are 0, it correctly computes a weight of 1.0
61     const float sigmaXDenom = radius.width() > 0 ? 1.0f / twoSigmaSqrdX : 1.f;
62     const float sigmaYDenom = radius.height() > 0 ? 1.0f / twoSigmaSqrdY : 1.f;
63 
64     float sum = 0.0f;
65     for (int x = 0; x < width; x++) {
66         float xTerm = static_cast<float>(x - radius.width());
67         xTerm = xTerm * xTerm * sigmaXDenom;
68         for (int y = 0; y < height; y++) {
69             float yTerm = static_cast<float>(y - radius.height());
70             float xyTerm = std::exp(-(xTerm + yTerm * yTerm * sigmaYDenom));
71             // Note that the constant term (1/(sqrt(2*pi*sigma^2)) of the Gaussian
72             // is dropped here, since we renormalize the kernel below.
73             kernel[y * width + x] = xyTerm;
74             sum += xyTerm;
75         }
76     }
77     // Normalize the kernel
78     float scale = 1.0f / sum;
79     for (size_t i = 0; i < kernelSize; ++i) {
80         kernel[i] *= scale;
81     }
82     // Zero remainder of the array
83     memset(kernel.data() + kernelSize, 0, sizeof(float)*(kernel.size() - kernelSize));
84 }
85 
Compute2DBlurKernel(SkSize sigma,SkISize radii,std::array<SkV4,kMaxSamples/4> & kernel)86 void SkShaderBlurAlgorithm::Compute2DBlurKernel(SkSize sigma,
87                                                 SkISize radii,
88                                                 std::array<SkV4, kMaxSamples/4>& kernel) {
89     static_assert(sizeof(kernel) == sizeof(std::array<float, kMaxSamples>));
90     static_assert(alignof(float) == alignof(SkV4));
91     float* data = kernel[0].ptr();
92     Compute2DBlurKernel(sigma, radii, SkSpan<float>(data, kMaxSamples));
93 }
94 
Compute2DBlurOffsets(SkISize radius,std::array<SkV4,kMaxSamples/2> & offsets)95 void SkShaderBlurAlgorithm::Compute2DBlurOffsets(SkISize radius,
96                                                  std::array<SkV4, kMaxSamples/2>& offsets) {
97     const int kernelArea = KernelWidth(radius.width()) * KernelWidth(radius.height());
98     SkASSERT(kernelArea <= kMaxSamples);
99 
100     SkSpan<float> offsetView{offsets[0].ptr(), kMaxSamples*2};
101 
102     int i = 0;
103     for (int y = -radius.height(); y <= radius.height(); ++y) {
104         for (int x = -radius.width(); x <= radius.width(); ++x) {
105             offsetView[2*i]   = x;
106             offsetView[2*i+1] = y;
107             ++i;
108         }
109     }
110     SkASSERT(i == kernelArea);
111     const int lastValidOffset = 2*(kernelArea - 1);
112     for (; i < kMaxSamples; ++i) {
113         offsetView[2*i]   = offsetView[lastValidOffset];
114         offsetView[2*i+1] = offsetView[lastValidOffset+1];
115     }
116 }
117 
Compute1DBlurLinearKernel(float sigma,int radius,std::array<SkV4,kMaxSamples/2> & offsetsAndKernel)118 void SkShaderBlurAlgorithm::Compute1DBlurLinearKernel(
119         float sigma,
120         int radius,
121         std::array<SkV4, kMaxSamples/2>& offsetsAndKernel) {
122     SkASSERT(sigma <= kMaxLinearSigma);
123     SkASSERT(radius == SkBlurEngine::SigmaToRadius(sigma));
124     SkASSERT(LinearKernelWidth(radius) <= kMaxSamples);
125 
126     // Given 2 adjacent gaussian points, they are blended as: Wi * Ci + Wj * Cj.
127     // The GPU will mix Ci and Cj as Ci * (1 - x) + Cj * x during sampling.
128     // Compute W', x such that W' * (Ci * (1 - x) + Cj * x) = Wi * Ci + Wj * Cj.
129     // Solving W' * x = Wj, W' * (1 - x) = Wi:
130     // W' = Wi + Wj
131     // x = Wj / (Wi + Wj)
132     auto get_new_weight = [](float* new_w, float* offset, float wi, float wj) {
133         *new_w = wi + wj;
134         *offset = wj / (wi + wj);
135     };
136 
137     // Create a temporary standard kernel. The maximum blur radius that can be passed to this
138     // function is (kMaxBlurSamples-1), so make an array large enough to hold the full kernel width.
139     static constexpr int kMaxKernelWidth = KernelWidth(kMaxSamples - 1);
140     SkASSERT(KernelWidth(radius) <= kMaxKernelWidth);
141     std::array<float, kMaxKernelWidth> fullKernel;
142     Compute1DBlurKernel(sigma, radius, SkSpan<float>{fullKernel.data(), KernelWidth(radius)});
143 
144     std::array<float, kMaxSamples> kernel;
145     std::array<float, kMaxSamples> offsets;
146     // Note that halfsize isn't just size / 2, but radius + 1. This is the size of the output array.
147     int halfSize = LinearKernelWidth(radius);
148     int halfRadius = halfSize / 2;
149     int lowIndex = halfRadius - 1;
150 
151     // Compute1DGaussianKernel produces a full 2N + 1 kernel. Since the kernel can be mirrored,
152     // compute only the upper half and mirror to the lower half.
153 
154     int index = radius;
155     if (radius & 1) {
156         // If N is odd, then use two samples.
157         // The centre texel gets sampled twice, so halve its influence for each sample.
158         // We essentially sample like this:
159         // Texel edges
160         // v    v    v    v
161         // |    |    |    |
162         // \-----^---/ Lower sample
163         //      \---^-----/ Upper sample
164         get_new_weight(&kernel[halfRadius],
165                        &offsets[halfRadius],
166                        fullKernel[index] * 0.5f,
167                        fullKernel[index + 1]);
168         kernel[lowIndex] = kernel[halfRadius];
169         offsets[lowIndex] = -offsets[halfRadius];
170         index++;
171         lowIndex--;
172     } else {
173         // If N is even, then there are an even number of texels on either side of the centre texel.
174         // Sample the centre texel directly.
175         kernel[halfRadius] = fullKernel[index];
176         offsets[halfRadius] = 0.0f;
177     }
178     index++;
179 
180     // Every other pair gets one sample.
181     for (int i = halfRadius + 1; i < halfSize; index += 2, i++, lowIndex--) {
182         get_new_weight(&kernel[i], &offsets[i], fullKernel[index], fullKernel[index + 1]);
183         offsets[i] += static_cast<float>(index - radius);
184 
185         // Mirror to lower half.
186         kernel[lowIndex] = kernel[i];
187         offsets[lowIndex] = -offsets[i];
188     }
189 
190     // Zero out remaining values in the kernel
191     memset(kernel.data() + halfSize, 0, sizeof(float)*(kMaxSamples - halfSize));
192     // But copy the last valid offset into the remaining offsets, to increase the chance that
193     // over-iteration in a fragment shader will have a cache hit.
194     for (int i = halfSize; i < kMaxSamples; ++i) {
195         offsets[i] = offsets[halfSize - 1];
196     }
197 
198     // Interleave into the output array to match the 1D SkSL effect
199     for (int i = 0; i < kMaxSamples / 2; ++i) {
200         offsetsAndKernel[i] = SkV4{offsets[2*i], kernel[2*i], offsets[2*i+1], kernel[2*i+1]};
201     }
202 }
203 
to_stablekey(int kernelWidth,uint32_t baseKey)204 static SkKnownRuntimeEffects::StableKey to_stablekey(int kernelWidth, uint32_t baseKey) {
205     SkASSERT(kernelWidth >= 2 && kernelWidth <= SkShaderBlurAlgorithm::kMaxSamples);
206     switch(kernelWidth) {
207         // Batch on multiples of 4 (skipping width=1, since that can't happen)
208         case 2:  [[fallthrough]];
209         case 3:  [[fallthrough]];
210         case 4:  return static_cast<SkKnownRuntimeEffects::StableKey>(baseKey);
211         case 5:  [[fallthrough]];
212         case 6:  [[fallthrough]];
213         case 7:  [[fallthrough]];
214         case 8:  return static_cast<SkKnownRuntimeEffects::StableKey>(baseKey+1);
215         case 9:  [[fallthrough]];
216         case 10: [[fallthrough]];
217         case 11: [[fallthrough]];
218         case 12: return static_cast<SkKnownRuntimeEffects::StableKey>(baseKey+2);
219         case 13: [[fallthrough]];
220         case 14: [[fallthrough]];
221         case 15: [[fallthrough]];
222         case 16: return static_cast<SkKnownRuntimeEffects::StableKey>(baseKey+3);
223         case 17: [[fallthrough]];
224         case 18: [[fallthrough]];
225         case 19: [[fallthrough]];
226         // With larger kernels, batch on multiples of eight so up to 7 wasted samples.
227         case 20: return static_cast<SkKnownRuntimeEffects::StableKey>(baseKey+4);
228         case 21: [[fallthrough]];
229         case 22: [[fallthrough]];
230         case 23: [[fallthrough]];
231         case 24: [[fallthrough]];
232         case 25: [[fallthrough]];
233         case 26: [[fallthrough]];
234         case 27: [[fallthrough]];
235         case 28: return static_cast<SkKnownRuntimeEffects::StableKey>(baseKey+5);
236         default:
237             SkUNREACHABLE;
238     }
239 }
240 
GetLinearBlur1DEffect(int radius)241 const SkRuntimeEffect* SkShaderBlurAlgorithm::GetLinearBlur1DEffect(int radius) {
242     return GetKnownRuntimeEffect(
243             to_stablekey(LinearKernelWidth(radius),
244                          static_cast<uint32_t>(SkKnownRuntimeEffects::StableKey::k1DBlurBase)));
245 }
246 
GetBlur2DEffect(const SkISize & radii)247 const SkRuntimeEffect* SkShaderBlurAlgorithm::GetBlur2DEffect(const SkISize& radii) {
248     int kernelArea = KernelWidth(radii.width()) * KernelWidth(radii.height());
249     return GetKnownRuntimeEffect(
250             to_stablekey(kernelArea,
251                          static_cast<uint32_t>(SkKnownRuntimeEffects::StableKey::k2DBlurBase)));
252 }
253 
renderBlur(SkRuntimeShaderBuilder * blurEffectBuilder,SkFilterMode filter,SkISize radii,sk_sp<SkSpecialImage> input,const SkIRect & srcRect,SkTileMode tileMode,const SkIRect & dstRect) const254 sk_sp<SkSpecialImage> SkShaderBlurAlgorithm::renderBlur(SkRuntimeShaderBuilder* blurEffectBuilder,
255                                                         SkFilterMode filter,
256                                                         SkISize radii,
257                                                         sk_sp<SkSpecialImage> input,
258                                                         const SkIRect& srcRect,
259                                                         SkTileMode tileMode,
260                                                         const SkIRect& dstRect) const {
261     SkImageInfo outII = SkImageInfo::Make({dstRect.width(), dstRect.height()},
262                                           input->colorType(),
263                                           kPremul_SkAlphaType,
264                                           input->colorInfo().refColorSpace());
265     sk_sp<SkDevice> device = this->makeDevice(outII);
266     if (!device) {
267         return nullptr;
268     }
269 
270     SkIRect subset = SkIRect::MakeSize(dstRect.size());
271     device->clipRect(SkRect::Make(subset), SkClipOp::kIntersect, /*aa=*/false);
272     device->setLocalToDevice(SkM44::Translate(-dstRect.left(), -dstRect.top()));
273 
274     // renderBlur() will either mix multiple fast and strict draws to cover dstRect, or will issue
275     // a single strict draw. While the SkShader object changes (really just strict mode), the rest
276     // of the SkPaint remains the same.
277     SkPaint paint;
278     paint.setBlendMode(SkBlendMode::kSrc);
279 
280     SkIRect safeSrcRect = srcRect.makeInset(radii.width(), radii.height());
281     SkIRect fastDstRect = dstRect;
282 
283     // Only consider the safeSrcRect for shader-based tiling if the original srcRect is different
284     // from the backing store dimensions; when they match the full image we can use HW tiling.
285     if (srcRect != SkIRect::MakeSize(input->backingStoreDimensions())) {
286         if (fastDstRect.intersect(safeSrcRect)) {
287             // If the area of the non-clamping shader is small, it's better to just issue a single
288             // draw that performs shader tiling over the whole dst.
289             if (fastDstRect.width() * fastDstRect.height() < 128 * 128) {
290                 fastDstRect.setEmpty();
291             }
292         } else {
293             fastDstRect.setEmpty();
294         }
295     }
296 
297     if (!fastDstRect.isEmpty()) {
298         // Fill as much as possible without adding shader tiling logic to each blur sample,
299         // switching to clamp tiling if we aren't in this block due to HW tiling.
300         SkIRect untiledSrcRect = srcRect.makeInset(1, 1);
301         SkTileMode fastTileMode = untiledSrcRect.contains(fastDstRect) ? SkTileMode::kClamp
302                                                                        : tileMode;
303         blurEffectBuilder->child("child") = input->asShader(
304                 fastTileMode, filter, SkMatrix::I(), /*strict=*/false);
305         paint.setShader(blurEffectBuilder->makeShader());
306         device->drawRect(SkRect::Make(fastDstRect), paint);
307     }
308 
309     // Switch to a strict shader if there are remaining pixels to fill
310     if (fastDstRect != dstRect) {
311         blurEffectBuilder->child("child") = input->makeSubset(srcRect)->asShader(
312                 tileMode, filter, SkMatrix::Translate(srcRect.left(), srcRect.top()));
313         paint.setShader(blurEffectBuilder->makeShader());
314     }
315 
316     if (fastDstRect.isEmpty()) {
317         // Fill the entire dst with the strict shader
318         device->drawRect(SkRect::Make(dstRect), paint);
319     } else if (fastDstRect != dstRect) {
320         // There will be up to four additional strict draws to fill in the border. The left and
321         // right sides will span the full height of the dst rect. The top and bottom will span
322         // the just the width of the fast interior. Strict border draws with zero width/height
323         // are skipped.
324         auto drawBorder = [&](const SkIRect& r) {
325             if (!r.isEmpty()) {
326                 device->drawRect(SkRect::Make(r), paint);
327             }
328         };
329 
330         drawBorder({dstRect.left(),      dstRect.top(),
331                     fastDstRect.left(),  dstRect.bottom()});   // Left, spanning full height
332         drawBorder({fastDstRect.right(), dstRect.top(),
333                     dstRect.right(),     dstRect.bottom()});   // Right, spanning full height
334         drawBorder({fastDstRect.left(),  dstRect.top(),
335                     fastDstRect.right(), fastDstRect.top()});  // Top, spanning inner width
336         drawBorder({fastDstRect.left(),  fastDstRect.bottom(),
337                     fastDstRect.right(), dstRect.bottom()});   // Bottom, spanning inner width
338     }
339 
340     return device->snapSpecial(subset);
341 }
342 
evalBlur2D(SkSize sigma,SkISize radii,sk_sp<SkSpecialImage> input,const SkIRect & srcRect,SkTileMode tileMode,const SkIRect & dstRect) const343 sk_sp<SkSpecialImage> SkShaderBlurAlgorithm::evalBlur2D(SkSize sigma,
344                                                         SkISize radii,
345                                                         sk_sp<SkSpecialImage> input,
346                                                         const SkIRect& srcRect,
347                                                         SkTileMode tileMode,
348                                                         const SkIRect& dstRect) const {
349     std::array<SkV4, kMaxSamples/4> kernel;
350     std::array<SkV4, kMaxSamples/2> offsets;
351     Compute2DBlurKernel(sigma, radii, kernel);
352     Compute2DBlurOffsets(radii, offsets);
353 
354     SkRuntimeShaderBuilder builder{sk_ref_sp(GetBlur2DEffect(radii))};
355     builder.uniform("kernel") = kernel;
356     builder.uniform("offsets") = offsets;
357     // NOTE: renderBlur() will configure the "child" shader as needed. The 2D blur effect only
358     // requires nearest-neighbor filtering.
359     return this->renderBlur(&builder, SkFilterMode::kNearest, radii,
360                             std::move(input), srcRect, tileMode, dstRect);
361 }
362 
evalBlur1D(float sigma,int radius,SkV2 dir,sk_sp<SkSpecialImage> input,SkIRect srcRect,SkTileMode tileMode,SkIRect dstRect) const363 sk_sp<SkSpecialImage> SkShaderBlurAlgorithm::evalBlur1D(float sigma,
364                                                         int radius,
365                                                         SkV2 dir,
366                                                         sk_sp<SkSpecialImage> input,
367                                                         SkIRect srcRect,
368                                                         SkTileMode tileMode,
369                                                         SkIRect dstRect) const {
370     std::array<SkV4, kMaxSamples/2> offsetsAndKernel;
371     Compute1DBlurLinearKernel(sigma, radius, offsetsAndKernel);
372 
373     SkRuntimeShaderBuilder builder{sk_ref_sp(GetLinearBlur1DEffect(radius))};
374     builder.uniform("offsetsAndKernel") = offsetsAndKernel;
375     builder.uniform("dir") = dir;
376     // NOTE: renderBlur() will configure the "child" shader as needed. The 1D blur effect requires
377     // linear filtering. Reconstruct the appropriate "2D" radii inset value from 'dir'.
378     SkISize radii{dir.x ? radius : 0, dir.y ? radius : 0};
379     return this->renderBlur(&builder, SkFilterMode::kLinear, radii,
380                             std::move(input), srcRect, tileMode, dstRect);
381 }
382 
blur(SkSize sigma,sk_sp<SkSpecialImage> src,const SkIRect & srcRect,SkTileMode tileMode,const SkIRect & dstRect) const383 sk_sp<SkSpecialImage> SkShaderBlurAlgorithm::blur(SkSize sigma,
384                                                   sk_sp<SkSpecialImage> src,
385                                                   const SkIRect& srcRect,
386                                                   SkTileMode tileMode,
387                                                   const SkIRect& dstRect) const {
388     SkASSERT(sigma.width() <= kMaxLinearSigma &&  sigma.height() <= kMaxLinearSigma);
389 
390     int radiusX = SkBlurEngine::SigmaToRadius(sigma.width());
391     int radiusY = SkBlurEngine::SigmaToRadius(sigma.height());
392     const int kernelArea = KernelWidth(radiusX) * KernelWidth(radiusY);
393     if (kernelArea <= kMaxSamples && radiusX > 0 && radiusY > 0) {
394         // Use a single-pass 2D kernel if it fits and isn't just 1D already
395         return this->evalBlur2D(sigma,
396                                 {radiusX, radiusY},
397                                 std::move(src),
398                                 srcRect,
399                                 tileMode,
400                                 dstRect);
401     } else {
402         // Use two passes of a 1D kernel (one per axis).
403         SkIRect intermediateSrcRect = srcRect;
404         SkIRect intermediateDstRect = dstRect;
405         if (radiusX > 0) {
406             if (radiusY > 0) {
407                 // May need to maintain extra rows above and below 'dstRect' for the follow-up pass.
408                 if (tileMode == SkTileMode::kRepeat || tileMode == SkTileMode::kMirror) {
409                     // If the srcRect and dstRect are aligned, then we don't need extra rows since
410                     // the periodic tiling on srcRect is the same for the intermediate. If they
411                     // are not aligned, then outset by the Y radius.
412                     const int period = srcRect.height() * (tileMode == SkTileMode::kMirror ? 2 : 1);
413                     if (std::abs(dstRect.fTop - srcRect.fTop) % period != 0 ||
414                         dstRect.height() != srcRect.height()) {
415                         intermediateDstRect.outset(0, radiusY);
416                     }
417                 } else {
418                     // For clamp and decal tiling, we outset by the Y radius up to what's available
419                     // from the srcRect. Anything beyond that is identical to tiling the
420                     // intermediate dst image directly.
421                     intermediateDstRect.outset(0, radiusY);
422                     intermediateDstRect.fTop = std::max(intermediateDstRect.fTop, srcRect.fTop);
423                     intermediateDstRect.fBottom =
424                             std::min(intermediateDstRect.fBottom, srcRect.fBottom);
425                     if (intermediateDstRect.fTop >= intermediateDstRect.fBottom) {
426                         return nullptr;
427                     }
428                 }
429             }
430 
431             src = this->evalBlur1D(sigma.width(),
432                                    radiusX,
433                                    /*dir=*/{1.f, 0.f},
434                                    std::move(src),
435                                    srcRect,
436                                    tileMode,
437                                    intermediateDstRect);
438             if (!src) {
439                 return nullptr;
440             }
441             intermediateSrcRect = SkIRect::MakeWH(src->width(), src->height());
442             intermediateDstRect = dstRect.makeOffset(-intermediateDstRect.left(),
443                                                      -intermediateDstRect.top());
444         }
445 
446         if (radiusY > 0) {
447             src = this->evalBlur1D(sigma.height(),
448                                    radiusY,
449                                    /*dir=*/{0.f, 1.f},
450                                    std::move(src),
451                                    intermediateSrcRect,
452                                    tileMode,
453                                    intermediateDstRect);
454         }
455 
456         return src;
457     }
458 }
459