1 /*
2 * Copyright 2014 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7 #include "src/gpu/ganesh/effects/GrMatrixConvolutionEffect.h"
8
9 #include "include/core/SkBitmap.h"
10 #include "src/base/SkHalf.h"
11 #include "src/gpu/KeyBuilder.h"
12 #include "src/gpu/ganesh/GrDirectContextPriv.h"
13 #include "src/gpu/ganesh/GrProxyProvider.h"
14 #include "src/gpu/ganesh/GrRecordingContextPriv.h"
15 #include "src/gpu/ganesh/GrTexture.h"
16 #include "src/gpu/ganesh/GrTextureProxy.h"
17 #include "src/gpu/ganesh/GrThreadSafeCache.h"
18 #include "src/gpu/ganesh/SkGr.h"
19 #include "src/gpu/ganesh/effects/GrTextureEffect.h"
20 #include "src/gpu/ganesh/glsl/GrGLSLFragmentShaderBuilder.h"
21 #include "src/gpu/ganesh/glsl/GrGLSLProgramDataManager.h"
22 #include "src/gpu/ganesh/glsl/GrGLSLUniformHandler.h"
23
24 class GrMatrixConvolutionEffect::Impl : public ProgramImpl {
25 public:
26 void emitCode(EmitArgs&) override;
27
28 private:
29 void onSetData(const GrGLSLProgramDataManager&, const GrFragmentProcessor&) override;
30
31 typedef GrGLSLProgramDataManager::UniformHandle UniformHandle;
32
33 void emitKernelBlock(EmitArgs&, SkIPoint);
34
35 UniformHandle fKernelUni;
36 UniformHandle fKernelOffsetUni;
37 UniformHandle fGainUni;
38 UniformHandle fBiasUni;
39 UniformHandle fKernelBiasUni;
40
41 using INHERITED = ProgramImpl;
42 };
43
44 GrMatrixConvolutionEffect::KernelWrapper::MakeResult
Make(GrRecordingContext * rContext,SkISize size,const GrCaps & caps,const SkScalar * values)45 GrMatrixConvolutionEffect::KernelWrapper::Make(GrRecordingContext* rContext,
46 SkISize size,
47 const GrCaps& caps,
48 const SkScalar* values) {
49 if (!rContext || !values || size.isEmpty()) {
50 return {};
51 }
52
53 const int length = size.area();
54 // Small kernel -> just fill the array.
55 KernelWrapper result(size);
56 if (length <= kMaxUniformSize) {
57 for (int i = 0; i < length; i++) {
58 result.fArray[i] = SkScalarToFloat(values[i]);
59 }
60 return {result, nullptr};
61 }
62
63 BiasAndGain& scalableSampler = result.fBiasAndGain;
64 bool useA16 =
65 rContext->defaultBackendFormat(kA16_float_SkColorType, GrRenderable::kNo).isValid();
66 SkScalar min = values[0];
67 if (!useA16) {
68 // Determine min and max values to figure out inner gain & bias.
69 SkScalar max = values[0];
70 for (int i = 1; i < length; i++) {
71 if (values[i] < min) {
72 min = values[i];
73 }
74 if (values[i] > max) {
75 max = values[i];
76 }
77 }
78 // Treat near-0 gain (i.e. box blur) as 1, and let the kernelBias
79 // move everything up to the final value.
80 const SkScalar computedGain = max - min;
81 scalableSampler.fGain =
82 SkScalarNearlyZero(computedGain) ? 1.0f : SkScalarToFloat(computedGain);
83 // Inner bias is pre-inner-gain so we divide that out.
84 scalableSampler.fBias = SkScalarToFloat(min) / scalableSampler.fGain;
85 }
86
87 // TODO: Pick cache or dont-cache based on observed perf.
88 static constexpr bool kCacheKernelTexture = true;
89
90 skgpu::UniqueKey key;
91 if (kCacheKernelTexture) {
92 static const skgpu::UniqueKey::Domain kDomain = skgpu::UniqueKey::GenerateDomain();
93 skgpu::UniqueKey::Builder builder(&key, kDomain, length, "Matrix Convolution Kernel");
94 // Texture cache key is the exact content of the kernel.
95 static_assert(sizeof(float) == 4);
96 for (int i = 0; i < length; i++) {
97 builder[i] = *(const uint32_t*)&values[i];
98 }
99 builder.finish();
100 }
101
102 // Find or create a texture.
103 auto threadSafeCache = rContext->priv().threadSafeCache();
104
105 SkColorType colorType = useA16 ? kA16_float_SkColorType : kAlpha_8_SkColorType;
106
107 GrSurfaceProxyView view;
108 if (kCacheKernelTexture && (view = threadSafeCache->find(key))) {
109 SkASSERT(view.origin() == kTopLeft_GrSurfaceOrigin);
110 auto kernelFP = GrTextureEffect::Make(std::move(view), kUnknown_SkAlphaType);
111 return {result, std::move(kernelFP)};
112 }
113
114 SkBitmap bm;
115 auto info = SkImageInfo::Make({length, 1}, colorType, kPremul_SkAlphaType);
116 if (!bm.tryAllocPixels(info)) {
117 return {};
118 }
119 for (int i = 0; i < length; i++) {
120 if (useA16) {
121 *bm.getAddr16(i, 0) = SkFloatToHalf(values[i]);
122 } else {
123 *bm.getAddr8(i, 0) =
124 SkScalarRoundToInt((values[i] - min) / scalableSampler.fGain * 255);
125 }
126 }
127 bm.setImmutable();
128
129 view = std::get<0>(GrMakeUncachedBitmapProxyView(rContext, bm));
130 if (!view) {
131 return {};
132 }
133
134 if (kCacheKernelTexture) {
135 view = threadSafeCache->add(key, view);
136 }
137
138 SkASSERT(view.origin() == kTopLeft_GrSurfaceOrigin);
139 auto kernelFP = GrTextureEffect::Make(std::move(view), kUnknown_SkAlphaType);
140 return {result, std::move(kernelFP)};
141 }
142
operator ==(const KernelWrapper & k) const143 bool GrMatrixConvolutionEffect::KernelWrapper::operator==(const KernelWrapper& k) const {
144 if (fSize != k.fSize) {
145 return false;
146 } else if (this->isSampled()) {
147 return fBiasAndGain == k.fBiasAndGain;
148 } else {
149 return std::equal(fArray.begin(), fArray.begin() + fSize.area(), k.fArray.begin());
150 }
151 }
152
operator ==(const BiasAndGain & k) const153 bool GrMatrixConvolutionEffect::KernelWrapper::BiasAndGain::operator==(
154 const BiasAndGain& k) const {
155 return fGain == k.fGain && fBias == k.fBias;
156 }
157
158 // For sampled kernels, emit a for loop that does all the kernel accumulation.
159 // For uniform kernels, emit a single iteration. Function is called repeatedly in a for loop.
160 // loc is ignored for sampled kernels.
emitKernelBlock(EmitArgs & args,SkIPoint loc)161 void GrMatrixConvolutionEffect::Impl::emitKernelBlock(EmitArgs& args, SkIPoint loc) {
162 const GrMatrixConvolutionEffect& mce = args.fFp.cast<GrMatrixConvolutionEffect>();
163 GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
164 GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
165 int kernelWidth = mce.fKernel.size().width();
166 int kernelHeight = mce.fKernel.size().height();
167 int kernelArea = kernelWidth * kernelHeight;
168
169 if (mce.fKernel.isSampled()) {
170 fragBuilder->codeAppendf("for (int i = 0; i < %d; ++i)", (int)kernelArea);
171 }
172
173 GrGLSLShaderBuilder::ShaderBlock block(fragBuilder);
174
175 fragBuilder->codeAppend("half k;");
176 fragBuilder->codeAppend("half2 sourceOffset;");
177 if (mce.fKernel.isSampled()) {
178 const char* kernelBias = uniformHandler->getUniformCStr(fKernelBiasUni);
179 SkString kernelSample = this->invokeChild(1, args, "float2(float(i) + 0.5, 0.5)");
180 fragBuilder->codeAppendf("k = %s.w + %s;", kernelSample.c_str(), kernelBias);
181 fragBuilder->codeAppendf("sourceOffset.y = floor(half(i) / %d);", kernelWidth);
182 fragBuilder->codeAppendf("sourceOffset.x = half(i) - sourceOffset.y * %d;", kernelWidth);
183 } else {
184 fragBuilder->codeAppendf("sourceOffset = half2(%d, %d);", loc.x(), loc.y());
185 int offset = loc.y() * kernelWidth + loc.x();
186 const char* kernel = uniformHandler->getUniformCStr(fKernelUni);
187 fragBuilder->codeAppendf("k = %s[%d][%d];", kernel, offset / 4, offset & 0x3);
188 }
189
190 auto sample = this->invokeChild(0, args, "coord + sourceOffset");
191 fragBuilder->codeAppendf("half4 c = %s;", sample.c_str());
192 if (!mce.fConvolveAlpha) {
193 fragBuilder->codeAppend("c = unpremul(c);");
194 fragBuilder->codeAppend("c.rgb = saturate(c.rgb);");
195 }
196 fragBuilder->codeAppend("sum += c * k;");
197 }
198
emitCode(EmitArgs & args)199 void GrMatrixConvolutionEffect::Impl::emitCode(EmitArgs& args) {
200 const GrMatrixConvolutionEffect& mce = args.fFp.cast<GrMatrixConvolutionEffect>();
201
202 int kernelWidth = mce.fKernel.size().width();
203 int kernelHeight = mce.fKernel.size().height();
204
205 int arrayCount = (kernelWidth * kernelHeight + 3) / 4;
206 SkASSERT(4 * arrayCount >= kernelWidth * kernelHeight);
207
208 GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
209 if (mce.fKernel.isSampled()) {
210 fKernelBiasUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag,
211 SkSLType::kHalf, "KernelBias");
212 } else {
213 fKernelUni = uniformHandler->addUniformArray(&mce, kFragment_GrShaderFlag,
214 SkSLType::kHalf4, "Kernel", arrayCount);
215 }
216 fKernelOffsetUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, SkSLType::kHalf2,
217 "KernelOffset");
218 fGainUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, SkSLType::kHalf, "Gain");
219 fBiasUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, SkSLType::kHalf, "Bias");
220
221 const char* kernelOffset = uniformHandler->getUniformCStr(fKernelOffsetUni);
222 const char* gain = uniformHandler->getUniformCStr(fGainUni);
223 const char* bias = uniformHandler->getUniformCStr(fBiasUni);
224
225 GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
226 fragBuilder->codeAppend("half4 sum = half4(0);");
227 fragBuilder->codeAppendf("float2 coord = %s - %s;", args.fSampleCoord, kernelOffset);
228
229 if (mce.fKernel.isSampled()) {
230 this->emitKernelBlock(args, {});
231 } else {
232 for (int x = 0; x < kernelWidth; ++x) {
233 for (int y = 0; y < kernelHeight; ++y) {
234 this->emitKernelBlock(args, SkIPoint::Make(x, y));
235 }
236 }
237 }
238
239 fragBuilder->codeAppendf("half4 color;");
240 if (mce.fConvolveAlpha) {
241 fragBuilder->codeAppendf("color = sum * %s + %s;", gain, bias);
242 fragBuilder->codeAppendf("color.a = saturate(color.a);");
243 fragBuilder->codeAppendf("color.rgb = clamp(color.rgb, 0.0, color.a);");
244 } else {
245 auto sample = this->invokeChild(0, args);
246 fragBuilder->codeAppendf("half4 c = %s;", sample.c_str());
247 fragBuilder->codeAppendf("color.a = c.a;");
248 fragBuilder->codeAppendf("color.rgb = saturate(sum.rgb * %s + %s);", gain, bias);
249 fragBuilder->codeAppendf("color.rgb *= color.a;");
250 }
251 fragBuilder->codeAppendf("return color;");
252 }
253
onSetData(const GrGLSLProgramDataManager & pdman,const GrFragmentProcessor & processor)254 void GrMatrixConvolutionEffect::Impl::onSetData(const GrGLSLProgramDataManager& pdman,
255 const GrFragmentProcessor& processor) {
256 const GrMatrixConvolutionEffect& conv = processor.cast<GrMatrixConvolutionEffect>();
257 pdman.set2f(fKernelOffsetUni, conv.fKernelOffset.fX, conv.fKernelOffset.fY);
258 float totalGain = conv.fGain;
259 if (conv.fKernel.isSampled()) {
260 totalGain *= conv.fKernel.biasAndGain().fGain;
261 pdman.set1f(fKernelBiasUni, conv.fKernel.biasAndGain().fBias);
262 } else {
263 int kernelCount = conv.fKernel.size().area();
264 int arrayCount = (kernelCount + 3) / 4;
265 SkASSERT(4 * arrayCount >= kernelCount);
266 pdman.set4fv(fKernelUni, arrayCount, conv.fKernel.array().data());
267 }
268 pdman.set1f(fBiasUni, conv.fBias);
269 pdman.set1f(fGainUni, totalGain);
270 }
271
GrMatrixConvolutionEffect(std::unique_ptr<GrFragmentProcessor> child,const KernelWrapper & kernel,std::unique_ptr<GrFragmentProcessor> kernelFP,SkScalar gain,SkScalar bias,const SkIPoint & kernelOffset,bool convolveAlpha)272 GrMatrixConvolutionEffect::GrMatrixConvolutionEffect(std::unique_ptr<GrFragmentProcessor> child,
273 const KernelWrapper& kernel,
274 std::unique_ptr<GrFragmentProcessor> kernelFP,
275 SkScalar gain,
276 SkScalar bias,
277 const SkIPoint& kernelOffset,
278 bool convolveAlpha)
279 // To advertise either the modulation or opaqueness optimizations we'd have to examine the
280 // parameters.
281 : INHERITED(kGrMatrixConvolutionEffect_ClassID, kNone_OptimizationFlags)
282 , fKernel(kernel)
283 , fGain(SkScalarToFloat(gain))
284 , fBias(SkScalarToFloat(bias) / 255.0f)
285 , fConvolveAlpha(convolveAlpha) {
286 this->registerChild(std::move(child), SkSL::SampleUsage::Explicit());
287 this->registerChild(std::move(kernelFP), SkSL::SampleUsage::Explicit());
288 fKernelOffset = {static_cast<float>(kernelOffset.x()),
289 static_cast<float>(kernelOffset.y())};
290 this->setUsesSampleCoordsDirectly();
291 }
292
GrMatrixConvolutionEffect(const GrMatrixConvolutionEffect & that)293 GrMatrixConvolutionEffect::GrMatrixConvolutionEffect(const GrMatrixConvolutionEffect& that)
294 : INHERITED(that)
295 , fKernel(that.fKernel)
296 , fGain(that.fGain)
297 , fBias(that.fBias)
298 , fKernelOffset(that.fKernelOffset)
299 , fConvolveAlpha(that.fConvolveAlpha) {}
300
clone() const301 std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::clone() const {
302 return std::unique_ptr<GrFragmentProcessor>(new GrMatrixConvolutionEffect(*this));
303 }
304
onAddToKey(const GrShaderCaps & caps,skgpu::KeyBuilder * b) const305 void GrMatrixConvolutionEffect::onAddToKey(const GrShaderCaps& caps,
306 skgpu::KeyBuilder* b) const {
307 SkASSERT(this->fKernel.size().width() <= 0x7FFF && this->fKernel.size().height() <= 0xFFFF);
308 uint32_t key = this->fKernel.size().width() << 16 | this->fKernel.size().height();
309 key |= fConvolveAlpha ? 1U << 31 : 0;
310 b->add32(key);
311 }
312
313 std::unique_ptr<GrFragmentProcessor::ProgramImpl>
onMakeProgramImpl() const314 GrMatrixConvolutionEffect::onMakeProgramImpl() const {
315 return std::make_unique<Impl>();
316 }
317
onIsEqual(const GrFragmentProcessor & sBase) const318 bool GrMatrixConvolutionEffect::onIsEqual(const GrFragmentProcessor& sBase) const {
319 const GrMatrixConvolutionEffect& s = sBase.cast<GrMatrixConvolutionEffect>();
320 return fKernel == s.fKernel &&
321 fGain == s.fGain &&
322 fBias == s.fBias &&
323 fKernelOffset == s.fKernelOffset &&
324 fConvolveAlpha == s.fConvolveAlpha;
325 }
326
Make(GrRecordingContext * context,GrSurfaceProxyView srcView,const SkIRect & srcBounds,const SkISize & kernelSize,const SkScalar * kernel,SkScalar gain,SkScalar bias,const SkIPoint & kernelOffset,GrSamplerState::WrapMode wm,bool convolveAlpha,const GrCaps & caps)327 std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::Make(GrRecordingContext* context,
328 GrSurfaceProxyView srcView,
329 const SkIRect& srcBounds,
330 const SkISize& kernelSize,
331 const SkScalar* kernel,
332 SkScalar gain,
333 SkScalar bias,
334 const SkIPoint& kernelOffset,
335 GrSamplerState::WrapMode wm,
336 bool convolveAlpha,
337 const GrCaps& caps) {
338 auto [kernelWrapper, kernelFP] = KernelWrapper::Make(context, kernelSize, caps, kernel);
339 if (!kernelWrapper.isValid()) {
340 return nullptr;
341 }
342 GrSamplerState sampler(wm, GrSamplerState::Filter::kNearest);
343 auto child = GrTextureEffect::MakeSubset(std::move(srcView), kPremul_SkAlphaType, SkMatrix::I(),
344 sampler, SkRect::Make(srcBounds), caps);
345 return std::unique_ptr<GrFragmentProcessor>(
346 new GrMatrixConvolutionEffect(std::move(child), kernelWrapper, std::move(kernelFP),
347 gain, bias, kernelOffset, convolveAlpha));
348 }
349
GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrMatrixConvolutionEffect)350 GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrMatrixConvolutionEffect)
351
352 #if GR_TEST_UTILS
353 std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::TestCreate(GrProcessorTestData* d) {
354 auto [view, ct, at] = d->randomView();
355
356 static constexpr size_t kMaxTestKernelSize = 2 * kMaxUniformSize;
357 int width = d->fRandom->nextRangeU(1, kMaxTestKernelSize);
358 int height = d->fRandom->nextRangeU(1, kMaxTestKernelSize / width);
359 SkISize kernelSize = SkISize::Make(width, height);
360 std::unique_ptr<SkScalar[]> kernel(new SkScalar[width * height]);
361 for (int i = 0; i < width * height; i++) {
362 kernel.get()[i] = d->fRandom->nextSScalar1();
363 }
364 SkScalar gain = d->fRandom->nextSScalar1();
365 SkScalar bias = d->fRandom->nextSScalar1();
366
367 uint32_t kernalOffsetX = d->fRandom->nextRangeU(0, kernelSize.width());
368 uint32_t kernalOffsetY = d->fRandom->nextRangeU(0, kernelSize.height());
369 SkIPoint kernelOffset = SkIPoint::Make(kernalOffsetX, kernalOffsetY);
370
371 uint32_t boundsX = d->fRandom->nextRangeU(0, view.width());
372 uint32_t boundsY = d->fRandom->nextRangeU(0, view.height());
373 uint32_t boundsW = d->fRandom->nextRangeU(0, view.width());
374 uint32_t boundsH = d->fRandom->nextRangeU(0, view.height());
375 SkIRect bounds = SkIRect::MakeXYWH(boundsX, boundsY, boundsW, boundsH);
376
377 auto wm = static_cast<GrSamplerState::WrapMode>(
378 d->fRandom->nextULessThan(GrSamplerState::kWrapModeCount));
379 bool convolveAlpha = d->fRandom->nextBool();
380 return GrMatrixConvolutionEffect::Make(d->context(),
381 std::move(view),
382 bounds,
383 kernelSize,
384 kernel.get(),
385 gain,
386 bias,
387 kernelOffset,
388 wm,
389 convolveAlpha,
390 *d->caps());
391 }
392 #endif
393