1 /*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #ifndef SkLinearBitmapPipeline_sampler_DEFINED
9 #define SkLinearBitmapPipeline_sampler_DEFINED
10
11 #include <tuple>
12
13 #include "SkAutoMalloc.h"
14 #include "SkColor.h"
15 #include "SkColorPriv.h"
16 #include "SkFixed.h" // for SkFixed1 only. Don't use SkFixed in this file.
17 #include "SkHalf.h"
18 #include "SkLinearBitmapPipeline_core.h"
19 #include "SkNx.h"
20 #include "SkPM4fPriv.h"
21
22 namespace {
23 // Explaination of the math:
24 // 1 - x x
25 // +--------+--------+
26 // | | |
27 // 1 - y | px00 | px10 |
28 // | | |
29 // +--------+--------+
30 // | | |
31 // y | px01 | px11 |
32 // | | |
33 // +--------+--------+
34 //
35 //
36 // Given a pixelxy each is multiplied by a different factor derived from the fractional part of x
37 // and y:
38 // * px00 -> (1 - x)(1 - y) = 1 - x - y + xy
39 // * px10 -> x(1 - y) = x - xy
40 // * px01 -> (1 - x)y = y - xy
41 // * px11 -> xy
42 // So x * y is calculated first and then used to calculate all the other factors.
bilerp4(Sk4s xs,Sk4s ys,Sk4f px00,Sk4f px10,Sk4f px01,Sk4f px11)43 static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10,
44 Sk4f px01, Sk4f px11) {
45 // Calculate fractional xs and ys.
46 Sk4s fxs = xs - xs.floor();
47 Sk4s fys = ys - ys.floor();
48 Sk4s fxys{fxs * fys};
49 Sk4f sum = px11 * fxys;
50 sum = sum + px01 * (fys - fxys);
51 sum = sum + px10 * (fxs - fxys);
52 sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys);
53 return sum;
54 }
55
56 ////////////////////////////////////////////////////////////////////////////////////////////////////
57 // PixelGetter is the lowest level interface to the source data. There is a PixelConverter for each
58 // of the different SkColorTypes.
59 template <SkColorType, SkGammaType> class PixelConverter;
60
61 // Alpha handling:
62 // The alpha from the paint (tintColor) is used in the blend part of the pipeline to modulate
63 // the entire bitmap. So, the tint color is given an alpha of 1.0 so that the later alpha can
64 // modulate this color later.
65 template <>
66 class PixelConverter<kAlpha_8_SkColorType, kLinear_SkGammaType> {
67 public:
68 using Element = uint8_t;
PixelConverter(const SkPixmap & srcPixmap,SkColor tintColor)69 PixelConverter(const SkPixmap& srcPixmap, SkColor tintColor) {
70 fTintColor = SkColor4f::FromColor(tintColor);
71 fTintColor.fA = 1.0f;
72 }
73
toSk4f(const Element pixel)74 Sk4f toSk4f(const Element pixel) const {
75 return Sk4f::Load(&fTintColor) * (pixel * (1.0f/255.0f));
76 }
77
78 private:
79 SkColor4f fTintColor;
80 };
81
82 template <SkGammaType gammaType>
pmcolor_to_rgba(SkPMColor pixel)83 static inline Sk4f pmcolor_to_rgba(SkPMColor pixel) {
84 return swizzle_rb_if_bgra(
85 (gammaType == kSRGB_SkGammaType) ? Sk4f_fromS32(pixel)
86 : Sk4f_fromL32(pixel));
87 }
88
89 template <SkGammaType gammaType>
90 class PixelConverter<kRGB_565_SkColorType, gammaType> {
91 public:
92 using Element = uint16_t;
PixelConverter(const SkPixmap & srcPixmap)93 PixelConverter(const SkPixmap& srcPixmap) { }
94
toSk4f(Element pixel)95 Sk4f toSk4f(Element pixel) const {
96 return pmcolor_to_rgba<gammaType>(SkPixel16ToPixel32(pixel));
97 }
98 };
99
100 template <SkGammaType gammaType>
101 class PixelConverter<kARGB_4444_SkColorType, gammaType> {
102 public:
103 using Element = uint16_t;
PixelConverter(const SkPixmap & srcPixmap)104 PixelConverter(const SkPixmap& srcPixmap) { }
105
toSk4f(Element pixel)106 Sk4f toSk4f(Element pixel) const {
107 return pmcolor_to_rgba<gammaType>(SkPixel4444ToPixel32(pixel));
108 }
109 };
110
111 template <SkGammaType gammaType>
112 class PixelConverter<kRGBA_8888_SkColorType, gammaType> {
113 public:
114 using Element = uint32_t;
PixelConverter(const SkPixmap & srcPixmap)115 PixelConverter(const SkPixmap& srcPixmap) { }
116
toSk4f(Element pixel)117 Sk4f toSk4f(Element pixel) const {
118 return gammaType == kSRGB_SkGammaType
119 ? Sk4f_fromS32(pixel)
120 : Sk4f_fromL32(pixel);
121 }
122 };
123
124 template <SkGammaType gammaType>
125 class PixelConverter<kBGRA_8888_SkColorType, gammaType> {
126 public:
127 using Element = uint32_t;
PixelConverter(const SkPixmap & srcPixmap)128 PixelConverter(const SkPixmap& srcPixmap) { }
129
toSk4f(Element pixel)130 Sk4f toSk4f(Element pixel) const {
131 return swizzle_rb(
132 gammaType == kSRGB_SkGammaType ? Sk4f_fromS32(pixel) : Sk4f_fromL32(pixel));
133 }
134 };
135
136 template <SkGammaType gammaType>
137 class PixelConverter<kGray_8_SkColorType, gammaType> {
138 public:
139 using Element = uint8_t;
PixelConverter(const SkPixmap & srcPixmap)140 PixelConverter(const SkPixmap& srcPixmap) { }
141
toSk4f(Element pixel)142 Sk4f toSk4f(Element pixel) const {
143 float gray = (gammaType == kSRGB_SkGammaType)
144 ? sk_linear_from_srgb[pixel]
145 : pixel * (1/255.0f);
146 return {gray, gray, gray, 1.0f};
147 }
148 };
149
150 template <>
151 class PixelConverter<kRGBA_F16_SkColorType, kLinear_SkGammaType> {
152 public:
153 using Element = uint64_t;
PixelConverter(const SkPixmap & srcPixmap)154 PixelConverter(const SkPixmap& srcPixmap) { }
155
toSk4f(const Element pixel)156 Sk4f toSk4f(const Element pixel) const {
157 return SkHalfToFloat_finite_ftz(pixel);
158 }
159 };
160
161 class PixelAccessorShim {
162 public:
PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface * accessor)163 explicit PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface* accessor)
164 : fPixelAccessor(accessor) { }
165
getFewPixels(int n,Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2)166 void SK_VECTORCALL getFewPixels(
167 int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const {
168 fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2);
169 }
170
get4Pixels(Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)171 void SK_VECTORCALL get4Pixels(
172 Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
173 fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3);
174 }
175
get4Pixels(const void * src,int index,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)176 void get4Pixels(
177 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
178 fPixelAccessor->get4Pixels(src, index, px0, px1, px2, px3);
179 }
180
getPixelFromRow(const void * row,int index)181 Sk4f getPixelFromRow(const void* row, int index) const {
182 return fPixelAccessor->getPixelFromRow(row, index);
183 }
184
getPixelAt(int index)185 Sk4f getPixelAt(int index) const {
186 return fPixelAccessor->getPixelAt(index);
187 }
188
row(int y)189 const void* row(int y) const {
190 return fPixelAccessor->row(y);
191 }
192
193 private:
194 SkLinearBitmapPipeline::PixelAccessorInterface* const fPixelAccessor;
195 };
196
197 ////////////////////////////////////////////////////////////////////////////////////////////////////
198 // PixelAccessor handles all the same plumbing for all the PixelGetters.
199 template <SkColorType colorType, SkGammaType gammaType>
200 class PixelAccessor final : public SkLinearBitmapPipeline::PixelAccessorInterface {
201 using Element = typename PixelConverter<colorType, gammaType>::Element;
202 public:
203 template <typename... Args>
PixelAccessor(const SkPixmap & srcPixmap,Args &&...args)204 PixelAccessor(const SkPixmap& srcPixmap, Args&&... args)
205 : fSrc{static_cast<const Element*>(srcPixmap.addr())}
206 , fWidth{srcPixmap.rowBytesAsPixels()}
207 , fConverter{srcPixmap, std::move<Args>(args)...} { }
208
getFewPixels(int n,Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2)209 void SK_VECTORCALL getFewPixels (
210 int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override {
211 Sk4i bufferLoc = ys * fWidth + xs;
212 switch (n) {
213 case 3:
214 *px2 = this->getPixelAt(bufferLoc[2]);
215 case 2:
216 *px1 = this->getPixelAt(bufferLoc[1]);
217 case 1:
218 *px0 = this->getPixelAt(bufferLoc[0]);
219 default:
220 break;
221 }
222 }
223
get4Pixels(Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)224 void SK_VECTORCALL get4Pixels(
225 Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
226 Sk4i bufferLoc = ys * fWidth + xs;
227 *px0 = this->getPixelAt(bufferLoc[0]);
228 *px1 = this->getPixelAt(bufferLoc[1]);
229 *px2 = this->getPixelAt(bufferLoc[2]);
230 *px3 = this->getPixelAt(bufferLoc[3]);
231 }
232
get4Pixels(const void * src,int index,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)233 void get4Pixels(
234 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
235 *px0 = this->getPixelFromRow(src, index + 0);
236 *px1 = this->getPixelFromRow(src, index + 1);
237 *px2 = this->getPixelFromRow(src, index + 2);
238 *px3 = this->getPixelFromRow(src, index + 3);
239 }
240
getPixelFromRow(const void * row,int index)241 Sk4f getPixelFromRow(const void* row, int index) const override {
242 const Element* src = static_cast<const Element*>(row);
243 return fConverter.toSk4f(src[index]);
244 }
245
getPixelAt(int index)246 Sk4f getPixelAt(int index) const override {
247 return this->getPixelFromRow(fSrc, index);
248 }
249
row(int y)250 const void* row(int y) const override { return fSrc + y * fWidth; }
251
252 private:
253 const Element* const fSrc;
254 const int fWidth;
255 PixelConverter<colorType, gammaType> fConverter;
256 };
257
258 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
259 // We'll never re-use pixels, but we can at least load contiguous pixels.
260 template <typename Next, typename Strategy>
src_strategy_blend(Span span,Next * next,Strategy * strategy)261 static void src_strategy_blend(Span span, Next* next, Strategy* strategy) {
262 SkPoint start;
263 SkScalar length;
264 int count;
265 std::tie(start, length, count) = span;
266 int ix = SkScalarFloorToInt(X(start));
267 const void* row = strategy->row((int)std::floor(Y(start)));
268 if (length > 0) {
269 while (count >= 4) {
270 Sk4f px0, px1, px2, px3;
271 strategy->get4Pixels(row, ix, &px0, &px1, &px2, &px3);
272 next->blend4Pixels(px0, px1, px2, px3);
273 ix += 4;
274 count -= 4;
275 }
276
277 while (count > 0) {
278 next->blendPixel(strategy->getPixelFromRow(row, ix));
279 ix += 1;
280 count -= 1;
281 }
282 } else {
283 while (count >= 4) {
284 Sk4f px0, px1, px2, px3;
285 strategy->get4Pixels(row, ix - 3, &px3, &px2, &px1, &px0);
286 next->blend4Pixels(px0, px1, px2, px3);
287 ix -= 4;
288 count -= 4;
289 }
290
291 while (count > 0) {
292 next->blendPixel(strategy->getPixelFromRow(row, ix));
293 ix -= 1;
294 count -= 1;
295 }
296 }
297 }
298
299 // -- NearestNeighborSampler -----------------------------------------------------------------------
300 // NearestNeighborSampler - use nearest neighbor filtering to create runs of destination pixels.
301 template<typename Accessor, typename Next>
302 class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
303 public:
304 template<typename... Args>
NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,Args &&...args)305 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args)
306 : fNext{next}, fAccessor{std::forward<Args>(args)...} { }
307
NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,const NearestNeighborSampler & sampler)308 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
309 const NearestNeighborSampler& sampler)
310 : fNext{next}, fAccessor{sampler.fAccessor} { }
311
pointListFew(int n,Sk4s xs,Sk4s ys)312 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
313 SkASSERT(0 < n && n < 4);
314 Sk4f px0, px1, px2;
315 fAccessor.getFewPixels(n, SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2);
316 if (n >= 1) fNext->blendPixel(px0);
317 if (n >= 2) fNext->blendPixel(px1);
318 if (n >= 3) fNext->blendPixel(px2);
319 }
320
pointList4(Sk4s xs,Sk4s ys)321 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
322 Sk4f px0, px1, px2, px3;
323 fAccessor.get4Pixels(SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2, &px3);
324 fNext->blend4Pixels(px0, px1, px2, px3);
325 }
326
pointSpan(Span span)327 void pointSpan(Span span) override {
328 SkASSERT(!span.isEmpty());
329 SkPoint start;
330 SkScalar length;
331 int count;
332 std::tie(start, length, count) = span;
333 SkScalar absLength = SkScalarAbs(length);
334 if (absLength < (count - 1)) {
335 this->spanSlowRate(span);
336 } else if (absLength == (count - 1)) {
337 src_strategy_blend(span, fNext, &fAccessor);
338 } else {
339 this->spanFastRate(span);
340 }
341 }
342
repeatSpan(Span span,int32_t repeatCount)343 void repeatSpan(Span span, int32_t repeatCount) override {
344 while (repeatCount > 0) {
345 this->pointSpan(span);
346 repeatCount--;
347 }
348 }
349
350 private:
351 // When moving through source space more slowly than dst space (zoomed in),
352 // we'll be sampling from the same source pixel more than once.
spanSlowRate(Span span)353 void spanSlowRate(Span span) {
354 SkPoint start; SkScalar length; int count;
355 std::tie(start, length, count) = span;
356 SkScalar x = X(start);
357 // fx is a fixed 48.16 number.
358 int64_t fx = static_cast<int64_t>(x * SK_Fixed1);
359 SkScalar dx = length / (count - 1);
360 // fdx is a fixed 48.16 number.
361 int64_t fdx = static_cast<int64_t>(dx * SK_Fixed1);
362
363 const void* row = fAccessor.row((int)std::floor(Y(start)));
364 Next* next = fNext;
365
366 int64_t ix = fx >> 16;
367 int64_t prevIX = ix;
368 Sk4f fpixel = fAccessor.getPixelFromRow(row, ix);
369
370 // When dx is less than one, each pixel is used more than once. Using the fixed point fx
371 // allows the code to quickly check that the same pixel is being used. The code uses this
372 // same pixel check to do the sRGB and normalization only once.
373 auto getNextPixel = [&]() {
374 if (ix != prevIX) {
375 fpixel = fAccessor.getPixelFromRow(row, ix);
376 prevIX = ix;
377 }
378 fx += fdx;
379 ix = fx >> 16;
380 return fpixel;
381 };
382
383 while (count >= 4) {
384 Sk4f px0 = getNextPixel();
385 Sk4f px1 = getNextPixel();
386 Sk4f px2 = getNextPixel();
387 Sk4f px3 = getNextPixel();
388 next->blend4Pixels(px0, px1, px2, px3);
389 count -= 4;
390 }
391 while (count > 0) {
392 next->blendPixel(getNextPixel());
393 count -= 1;
394 }
395 }
396
397 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
398 // We'll never re-use pixels, but we can at least load contiguous pixels.
spanUnitRate(Span span)399 void spanUnitRate(Span span) {
400 src_strategy_blend(span, fNext, &fAccessor);
401 }
402
403 // We're moving through source space faster than dst (zoomed out),
404 // so we'll never reuse a source pixel or be able to do contiguous loads.
spanFastRate(Span span)405 void spanFastRate(Span span) {
406 span_fallback(span, this);
407 }
408
409 Next* const fNext;
410 Accessor fAccessor;
411 };
412
413 // From an edgeType, the integer value of a pixel vs, and the integer value of the extreme edge
414 // vMax, take the point which might be off the tile by one pixel and either wrap it or pin it to
415 // generate the right pixel. The value vs is on the interval [-1, vMax + 1]. It produces a value
416 // on the interval [0, vMax].
417 // Note: vMax is not width or height, but width-1 or height-1 because it is the largest valid pixel.
adjust_edge(SkShader::TileMode edgeType,int vs,int vMax)418 static inline int adjust_edge(SkShader::TileMode edgeType, int vs, int vMax) {
419 SkASSERT(-1 <= vs && vs <= vMax + 1);
420 switch (edgeType) {
421 case SkShader::kClamp_TileMode:
422 case SkShader::kMirror_TileMode:
423 vs = std::max(vs, 0);
424 vs = std::min(vs, vMax);
425 break;
426 case SkShader::kRepeat_TileMode:
427 vs = (vs <= vMax) ? vs : 0;
428 vs = (vs >= 0) ? vs : vMax;
429 break;
430 }
431 SkASSERT(0 <= vs && vs <= vMax);
432 return vs;
433 }
434
435 // From a sample point on the tile, return the top or left filter value.
436 // The result r should be in the range (0, 1]. Since this represents the weight given to the top
437 // left element, then if x == 0.5 the filter value should be 1.0.
438 // The input sample point must be on the tile, therefore it must be >= 0.
sample_to_filter(SkScalar x)439 static SkScalar sample_to_filter(SkScalar x) {
440 SkASSERT(x >= 0.0f);
441 // The usual form of the top or left edge is x - .5, but since we are working on the unit
442 // square, then x + .5 works just as well. This also guarantees that v > 0.0 allowing the use
443 // of trunc.
444 SkScalar v = x + 0.5f;
445 // Produce the top or left offset a value on the range [0, 1).
446 SkScalar f = v - SkScalarTruncToScalar(v);
447 // Produce the filter value which is on the range (0, 1].
448 SkScalar r = 1.0f - f;
449 SkASSERT(0.0f < r && r <= 1.0f);
450 return r;
451 }
452
453 // -- BilerpSampler --------------------------------------------------------------------------------
454 // BilerpSampler - use a bilerp filter to create runs of destination pixels.
455 // Note: in the code below, there are two types of points
456 // * sample points - these are the points passed in by pointList* and Spans.
457 // * filter points - are created from a sample point to form the coordinates of the points
458 // to use in the filter and to generate the filter values.
459 template<typename Accessor, typename Next>
460 class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
461 public:
462 template<typename... Args>
BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,SkISize dimensions,SkShader::TileMode xTile,SkShader::TileMode yTile,Args &&...args)463 BilerpSampler(
464 SkLinearBitmapPipeline::BlendProcessorInterface* next,
465 SkISize dimensions,
466 SkShader::TileMode xTile, SkShader::TileMode yTile,
467 Args&& ... args
468 )
469 : fNext{next}
470 , fXEdgeType{xTile}
471 , fXMax{dimensions.width() - 1}
472 , fYEdgeType{yTile}
473 , fYMax{dimensions.height() - 1}
474 , fAccessor{std::forward<Args>(args)...} { }
475
BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,const BilerpSampler & sampler)476 BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
477 const BilerpSampler& sampler)
478 : fNext{next}
479 , fXEdgeType{sampler.fXEdgeType}
480 , fXMax{sampler.fXMax}
481 , fYEdgeType{sampler.fYEdgeType}
482 , fYMax{sampler.fYMax}
483 , fAccessor{sampler.fAccessor} { }
484
pointListFew(int n,Sk4s xs,Sk4s ys)485 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
486 SkASSERT(0 < n && n < 4);
487 auto bilerpPixel = [&](int index) {
488 return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
489 };
490
491 if (n >= 1) fNext->blendPixel(bilerpPixel(0));
492 if (n >= 2) fNext->blendPixel(bilerpPixel(1));
493 if (n >= 3) fNext->blendPixel(bilerpPixel(2));
494 }
495
pointList4(Sk4s xs,Sk4s ys)496 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
497 auto bilerpPixel = [&](int index) {
498 return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
499 };
500 fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3));
501 }
502
pointSpan(Span span)503 void pointSpan(Span span) override {
504 SkASSERT(!span.isEmpty());
505 SkPoint start;
506 SkScalar length;
507 int count;
508 std::tie(start, length, count) = span;
509
510 // Nothing to do.
511 if (count == 0) {
512 return;
513 }
514
515 // Trivial case. No sample points are generated other than start.
516 if (count == 1) {
517 fNext->blendPixel(this->bilerpSamplePoint(start));
518 return;
519 }
520
521 // Note: the following code could be done in terms of dx = length / (count -1), but that
522 // would introduce a divide that is not needed for the most common dx == 1 cases.
523 SkScalar absLength = SkScalarAbs(length);
524 if (absLength == 0.0f) {
525 // |dx| == 0
526 // length is zero, so clamp an edge pixel.
527 this->spanZeroRate(span);
528 } else if (absLength < (count - 1)) {
529 // 0 < |dx| < 1.
530 this->spanSlowRate(span);
531 } else if (absLength == (count - 1)) {
532 // |dx| == 1.
533 if (sample_to_filter(span.startX()) == 1.0f
534 && sample_to_filter(span.startY()) == 1.0f) {
535 // All the pixels are aligned with the dest; go fast.
536 src_strategy_blend(span, fNext, &fAccessor);
537 } else {
538 // There is some sub-pixel offsets, so bilerp.
539 this->spanUnitRate(span);
540 }
541 } else if (absLength < 2.0f * (count - 1)) {
542 // 1 < |dx| < 2.
543 this->spanMediumRate(span);
544 } else {
545 // |dx| >= 2.
546 this->spanFastRate(span);
547 }
548 }
549
repeatSpan(Span span,int32_t repeatCount)550 void repeatSpan(Span span, int32_t repeatCount) override {
551 while (repeatCount > 0) {
552 this->pointSpan(span);
553 repeatCount--;
554 }
555 }
556
557 private:
558
559 // Convert a sample point to the points used by the filter.
filterPoints(SkPoint sample,Sk4i * filterXs,Sk4i * filterYs)560 void filterPoints(SkPoint sample, Sk4i* filterXs, Sk4i* filterYs) {
561 // May be less than zero. Be careful to use Floor.
562 int x0 = adjust_edge(fXEdgeType, SkScalarFloorToInt(X(sample) - 0.5), fXMax);
563 // Always greater than zero. Use the faster Trunc.
564 int x1 = adjust_edge(fXEdgeType, SkScalarTruncToInt(X(sample) + 0.5), fXMax);
565 int y0 = adjust_edge(fYEdgeType, SkScalarFloorToInt(Y(sample) - 0.5), fYMax);
566 int y1 = adjust_edge(fYEdgeType, SkScalarTruncToInt(Y(sample) + 0.5), fYMax);
567
568 *filterXs = Sk4i{x0, x1, x0, x1};
569 *filterYs = Sk4i{y0, y0, y1, y1};
570 }
571
572 // Given a sample point, generate a color by bilerping the four filter points.
bilerpSamplePoint(SkPoint sample)573 Sk4f bilerpSamplePoint(SkPoint sample) {
574 Sk4i iXs, iYs;
575 filterPoints(sample, &iXs, &iYs);
576 Sk4f px00, px10, px01, px11;
577 fAccessor.get4Pixels(iXs, iYs, &px00, &px10, &px01, &px11);
578 return bilerp4(Sk4f{X(sample) - 0.5f}, Sk4f{Y(sample) - 0.5f}, px00, px10, px01, px11);
579 }
580
581 // Get two pixels at x from row0 and row1.
get2PixelColumn(const void * row0,const void * row1,int x,Sk4f * px0,Sk4f * px1)582 void get2PixelColumn(const void* row0, const void* row1, int x, Sk4f* px0, Sk4f* px1) {
583 *px0 = fAccessor.getPixelFromRow(row0, x);
584 *px1 = fAccessor.getPixelFromRow(row1, x);
585 }
586
587 // |dx| == 0. This code assumes that length is zero.
spanZeroRate(Span span)588 void spanZeroRate(Span span) {
589 SkPoint start; SkScalar length; int count;
590 std::tie(start, length, count) = span;
591 SkASSERT(length == 0.0f);
592
593 // Filter for the blending of the top and bottom pixels.
594 SkScalar filterY = sample_to_filter(Y(start));
595
596 // Generate the four filter points from the sample point start. Generate the row* values.
597 Sk4i iXs, iYs;
598 this->filterPoints(start, &iXs, &iYs);
599 const void* const row0 = fAccessor.row(iYs[0]);
600 const void* const row1 = fAccessor.row(iYs[2]);
601
602 // Get the two pixels that make up the clamping pixel.
603 Sk4f pxTop, pxBottom;
604 this->get2PixelColumn(row0, row1, SkScalarFloorToInt(X(start)), &pxTop, &pxBottom);
605 Sk4f pixel = pxTop * filterY + (1.0f - filterY) * pxBottom;
606
607 while (count >= 4) {
608 fNext->blend4Pixels(pixel, pixel, pixel, pixel);
609 count -= 4;
610 }
611 while (count > 0) {
612 fNext->blendPixel(pixel);
613 count -= 1;
614 }
615 }
616
617 // 0 < |dx| < 1. This code reuses the calculations from previous pixels to reduce
618 // computation. In particular, several destination pixels maybe generated from the same four
619 // source pixels.
620 // In the following code a "part" is a combination of two pixels from the same column of the
621 // filter.
spanSlowRate(Span span)622 void spanSlowRate(Span span) {
623 SkPoint start; SkScalar length; int count;
624 std::tie(start, length, count) = span;
625
626 // Calculate the distance between each sample point.
627 const SkScalar dx = length / (count - 1);
628 SkASSERT(-1.0f < dx && dx < 1.0f && dx != 0.0f);
629
630 // Generate the filter values for the top-left corner.
631 // Note: these values are in filter space; this has implications about how to adjust
632 // these values at each step. For example, as the sample point increases, the filter
633 // value decreases, this is because the filter and position are related by
634 // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
635 // direction of the sample point which is increasing by dx.
636 SkScalar filterX = sample_to_filter(X(start));
637 SkScalar filterY = sample_to_filter(Y(start));
638
639 // Generate the four filter points from the sample point start. Generate the row* values.
640 Sk4i iXs, iYs;
641 this->filterPoints(start, &iXs, &iYs);
642 const void* const row0 = fAccessor.row(iYs[0]);
643 const void* const row1 = fAccessor.row(iYs[2]);
644
645 // Generate part of the filter value at xColumn.
646 auto partAtColumn = [&](int xColumn) {
647 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
648 Sk4f pxTop, pxBottom;
649 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
650 return pxTop * filterY + (1.0f - filterY) * pxBottom;
651 };
652
653 // The leftPart is made up of two pixels from the left column of the filter, right part
654 // is similar. The top and bottom pixels in the *Part are created as a linear blend of
655 // the top and bottom pixels using filterY. See the partAtColumn function above.
656 Sk4f leftPart = partAtColumn(iXs[0]);
657 Sk4f rightPart = partAtColumn(iXs[1]);
658
659 // Create a destination color by blending together a left and right part using filterX.
660 auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) {
661 Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
662 return check_pixel(pixel);
663 };
664
665 // Send the first pixel to the destination. This simplifies the loop structure so that no
666 // extra pixels are fetched for the last iteration of the loop.
667 fNext->blendPixel(bilerp(leftPart, rightPart));
668 count -= 1;
669
670 if (dx > 0.0f) {
671 // * positive direction - generate destination pixels by sliding the filter from left
672 // to right.
673 int rightPartCursor = iXs[1];
674
675 // Advance the filter from left to right. Remember that moving the top-left corner of
676 // the filter to the right actually makes the filter value smaller.
677 auto advanceFilter = [&]() {
678 filterX -= dx;
679 if (filterX <= 0.0f) {
680 filterX += 1.0f;
681 leftPart = rightPart;
682 rightPartCursor += 1;
683 rightPart = partAtColumn(rightPartCursor);
684 }
685 SkASSERT(0.0f < filterX && filterX <= 1.0f);
686
687 return bilerp(leftPart, rightPart);
688 };
689
690 while (count >= 4) {
691 Sk4f px0 = advanceFilter(),
692 px1 = advanceFilter(),
693 px2 = advanceFilter(),
694 px3 = advanceFilter();
695 fNext->blend4Pixels(px0, px1, px2, px3);
696 count -= 4;
697 }
698
699 while (count > 0) {
700 fNext->blendPixel(advanceFilter());
701 count -= 1;
702 }
703 } else {
704 // * negative direction - generate destination pixels by sliding the filter from
705 // right to left.
706 int leftPartCursor = iXs[0];
707
708 // Advance the filter from right to left. Remember that moving the top-left corner of
709 // the filter to the left actually makes the filter value larger.
710 auto advanceFilter = [&]() {
711 // Remember, dx < 0 therefore this adds |dx| to filterX.
712 filterX -= dx;
713 // At this point filterX may be > 1, and needs to be wrapped back on to the filter
714 // interval, and the next column in the filter is calculated.
715 if (filterX > 1.0f) {
716 filterX -= 1.0f;
717 rightPart = leftPart;
718 leftPartCursor -= 1;
719 leftPart = partAtColumn(leftPartCursor);
720 }
721 SkASSERT(0.0f < filterX && filterX <= 1.0f);
722
723 return bilerp(leftPart, rightPart);
724 };
725
726 while (count >= 4) {
727 Sk4f px0 = advanceFilter(),
728 px1 = advanceFilter(),
729 px2 = advanceFilter(),
730 px3 = advanceFilter();
731 fNext->blend4Pixels(px0, px1, px2, px3);
732 count -= 4;
733 }
734
735 while (count > 0) {
736 fNext->blendPixel(advanceFilter());
737 count -= 1;
738 }
739 }
740 }
741
742 // |dx| == 1. Moving through source space at a rate of 1 source pixel per 1 dst pixel.
743 // Every filter part is used for two destination pixels, and the code can bulk load four
744 // pixels at a time.
spanUnitRate(Span span)745 void spanUnitRate(Span span) {
746 SkPoint start; SkScalar length; int count;
747 std::tie(start, length, count) = span;
748 SkASSERT(SkScalarAbs(length) == (count - 1));
749
750 // Calculate the four filter points of start, and use the two different Y values to
751 // generate the row pointers.
752 Sk4i iXs, iYs;
753 filterPoints(start, &iXs, &iYs);
754 const void* row0 = fAccessor.row(iYs[0]);
755 const void* row1 = fAccessor.row(iYs[2]);
756
757 // Calculate the filter values for the top-left filter element.
758 const SkScalar filterX = sample_to_filter(X(start));
759 const SkScalar filterY = sample_to_filter(Y(start));
760
761 // Generate part of the filter value at xColumn.
762 auto partAtColumn = [&](int xColumn) {
763 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
764 Sk4f pxTop, pxBottom;
765 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
766 return pxTop * filterY + (1.0f - filterY) * pxBottom;
767 };
768
769 auto get4Parts = [&](int ix, Sk4f* part0, Sk4f* part1, Sk4f* part2, Sk4f* part3) {
770 // Check if the pixels needed are near the edges. If not go fast using bulk pixels,
771 // otherwise be careful.
772 if (0 <= ix && ix <= fXMax - 3) {
773 Sk4f px00, px10, px20, px30,
774 px01, px11, px21, px31;
775 fAccessor.get4Pixels(row0, ix, &px00, &px10, &px20, &px30);
776 fAccessor.get4Pixels(row1, ix, &px01, &px11, &px21, &px31);
777 *part0 = filterY * px00 + (1.0f - filterY) * px01;
778 *part1 = filterY * px10 + (1.0f - filterY) * px11;
779 *part2 = filterY * px20 + (1.0f - filterY) * px21;
780 *part3 = filterY * px30 + (1.0f - filterY) * px31;
781 } else {
782 *part0 = partAtColumn(ix + 0);
783 *part1 = partAtColumn(ix + 1);
784 *part2 = partAtColumn(ix + 2);
785 *part3 = partAtColumn(ix + 3);
786 }
787 };
788
789 auto bilerp = [&](const Sk4f& part0, const Sk4f& part1) {
790 return part0 * filterX + part1 * (1.0f - filterX);
791 };
792
793 if (length > 0) {
794 // * positive direction - generate destination pixels by sliding the filter from left
795 // to right.
796
797 // overlapPart is the filter part from the end of the previous four pixels used at
798 // the start of the next four pixels.
799 Sk4f overlapPart = partAtColumn(iXs[0]);
800 int rightColumnCursor = iXs[1];
801 while (count >= 4) {
802 Sk4f part0, part1, part2, part3;
803 get4Parts(rightColumnCursor, &part0, &part1, &part2, &part3);
804 Sk4f px0 = bilerp(overlapPart, part0);
805 Sk4f px1 = bilerp(part0, part1);
806 Sk4f px2 = bilerp(part1, part2);
807 Sk4f px3 = bilerp(part2, part3);
808 overlapPart = part3;
809 fNext->blend4Pixels(px0, px1, px2, px3);
810 rightColumnCursor += 4;
811 count -= 4;
812 }
813
814 while (count > 0) {
815 Sk4f rightPart = partAtColumn(rightColumnCursor);
816
817 fNext->blendPixel(bilerp(overlapPart, rightPart));
818 overlapPart = rightPart;
819 rightColumnCursor += 1;
820 count -= 1;
821 }
822 } else {
823 // * negative direction - generate destination pixels by sliding the filter from
824 // right to left.
825 Sk4f overlapPart = partAtColumn(iXs[1]);
826 int leftColumnCursor = iXs[0];
827
828 while (count >= 4) {
829 Sk4f part0, part1, part2, part3;
830 get4Parts(leftColumnCursor - 3, &part3, &part2, &part1, &part0);
831 Sk4f px0 = bilerp(part0, overlapPart);
832 Sk4f px1 = bilerp(part1, part0);
833 Sk4f px2 = bilerp(part2, part1);
834 Sk4f px3 = bilerp(part3, part2);
835 overlapPart = part3;
836 fNext->blend4Pixels(px0, px1, px2, px3);
837 leftColumnCursor -= 4;
838 count -= 4;
839 }
840
841 while (count > 0) {
842 Sk4f leftPart = partAtColumn(leftColumnCursor);
843
844 fNext->blendPixel(bilerp(leftPart, overlapPart));
845 overlapPart = leftPart;
846 leftColumnCursor -= 1;
847 count -= 1;
848 }
849 }
850 }
851
852 // 1 < |dx| < 2. Going through the source pixels at a faster rate than the dest pixels, but
853 // still slow enough to take advantage of previous calculations.
spanMediumRate(Span span)854 void spanMediumRate(Span span) {
855 SkPoint start; SkScalar length; int count;
856 std::tie(start, length, count) = span;
857
858 // Calculate the distance between each sample point.
859 const SkScalar dx = length / (count - 1);
860 SkASSERT((-2.0f < dx && dx < -1.0f) || (1.0f < dx && dx < 2.0f));
861
862 // Generate the filter values for the top-left corner.
863 // Note: these values are in filter space; this has implications about how to adjust
864 // these values at each step. For example, as the sample point increases, the filter
865 // value decreases, this is because the filter and position are related by
866 // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
867 // direction of the sample point which is increasing by dx.
868 SkScalar filterX = sample_to_filter(X(start));
869 SkScalar filterY = sample_to_filter(Y(start));
870
871 // Generate the four filter points from the sample point start. Generate the row* values.
872 Sk4i iXs, iYs;
873 this->filterPoints(start, &iXs, &iYs);
874 const void* const row0 = fAccessor.row(iYs[0]);
875 const void* const row1 = fAccessor.row(iYs[2]);
876
877 // Generate part of the filter value at xColumn.
878 auto partAtColumn = [&](int xColumn) {
879 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
880 Sk4f pxTop, pxBottom;
881 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
882 return pxTop * filterY + (1.0f - filterY) * pxBottom;
883 };
884
885 // The leftPart is made up of two pixels from the left column of the filter, right part
886 // is similar. The top and bottom pixels in the *Part are created as a linear blend of
887 // the top and bottom pixels using filterY. See the nextPart function below.
888 Sk4f leftPart = partAtColumn(iXs[0]);
889 Sk4f rightPart = partAtColumn(iXs[1]);
890
891 // Create a destination color by blending together a left and right part using filterX.
892 auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) {
893 Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
894 return check_pixel(pixel);
895 };
896
897 // Send the first pixel to the destination. This simplifies the loop structure so that no
898 // extra pixels are fetched for the last iteration of the loop.
899 fNext->blendPixel(bilerp(leftPart, rightPart));
900 count -= 1;
901
902 if (dx > 0.0f) {
903 // * positive direction - generate destination pixels by sliding the filter from left
904 // to right.
905 int rightPartCursor = iXs[1];
906
907 // Advance the filter from left to right. Remember that moving the top-left corner of
908 // the filter to the right actually makes the filter value smaller.
909 auto advanceFilter = [&]() {
910 filterX -= dx;
911 // At this point filterX is less than zero, but might actually be less than -1.
912 if (filterX > -1.0f) {
913 filterX += 1.0f;
914 leftPart = rightPart;
915 rightPartCursor += 1;
916 rightPart = partAtColumn(rightPartCursor);
917 } else {
918 filterX += 2.0f;
919 rightPartCursor += 2;
920 leftPart = partAtColumn(rightPartCursor - 1);
921 rightPart = partAtColumn(rightPartCursor);
922 }
923 SkASSERT(0.0f < filterX && filterX <= 1.0f);
924
925 return bilerp(leftPart, rightPart);
926 };
927
928 while (count >= 4) {
929 Sk4f px0 = advanceFilter(),
930 px1 = advanceFilter(),
931 px2 = advanceFilter(),
932 px3 = advanceFilter();
933 fNext->blend4Pixels(px0, px1, px2, px3);
934 count -= 4;
935 }
936
937 while (count > 0) {
938 fNext->blendPixel(advanceFilter());
939 count -= 1;
940 }
941 } else {
942 // * negative direction - generate destination pixels by sliding the filter from
943 // right to left.
944 int leftPartCursor = iXs[0];
945
946 auto advanceFilter = [&]() {
947 // Remember, dx < 0 therefore this adds |dx| to filterX.
948 filterX -= dx;
949 // At this point, filterX is greater than one, but may actually be greater than two.
950 if (filterX < 2.0f) {
951 filterX -= 1.0f;
952 rightPart = leftPart;
953 leftPartCursor -= 1;
954 leftPart = partAtColumn(leftPartCursor);
955 } else {
956 filterX -= 2.0f;
957 leftPartCursor -= 2;
958 rightPart = partAtColumn(leftPartCursor - 1);
959 leftPart = partAtColumn(leftPartCursor);
960 }
961 SkASSERT(0.0f < filterX && filterX <= 1.0f);
962 return bilerp(leftPart, rightPart);
963 };
964
965 while (count >= 4) {
966 Sk4f px0 = advanceFilter(),
967 px1 = advanceFilter(),
968 px2 = advanceFilter(),
969 px3 = advanceFilter();
970 fNext->blend4Pixels(px0, px1, px2, px3);
971 count -= 4;
972 }
973
974 while (count > 0) {
975 fNext->blendPixel(advanceFilter());
976 count -= 1;
977 }
978 }
979 }
980
981 // We're moving through source space faster than dst (zoomed out),
982 // so we'll never reuse a source pixel or be able to do contiguous loads.
spanFastRate(Span span)983 void spanFastRate(Span span) {
984 SkPoint start; SkScalar length; int count;
985 std::tie(start, length, count) = span;
986 SkScalar x = X(start);
987 SkScalar y = Y(start);
988
989 SkScalar dx = length / (count - 1);
990 while (count > 0) {
991 fNext->blendPixel(this->bilerpSamplePoint(SkPoint{x, y}));
992 x += dx;
993 count -= 1;
994 }
995 }
996
997 Next* const fNext;
998 const SkShader::TileMode fXEdgeType;
999 const int fXMax;
1000 const SkShader::TileMode fYEdgeType;
1001 const int fYMax;
1002 Accessor fAccessor;
1003 };
1004
1005 } // namespace
1006
1007 #endif // SkLinearBitmapPipeline_sampler_DEFINED
1008