• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef SkLinearBitmapPipeline_sampler_DEFINED
9 #define SkLinearBitmapPipeline_sampler_DEFINED
10 
11 #include <tuple>
12 
13 #include "SkAutoMalloc.h"
14 #include "SkColor.h"
15 #include "SkColorPriv.h"
16 #include "SkFixed.h"  // for SkFixed1 only. Don't use SkFixed in this file.
17 #include "SkHalf.h"
18 #include "SkLinearBitmapPipeline_core.h"
19 #include "SkNx.h"
20 #include "SkPM4fPriv.h"
21 
22 namespace {
23 // Explaination of the math:
24 //              1 - x      x
25 //           +--------+--------+
26 //           |        |        |
27 //  1 - y    |  px00  |  px10  |
28 //           |        |        |
29 //           +--------+--------+
30 //           |        |        |
31 //    y      |  px01  |  px11  |
32 //           |        |        |
33 //           +--------+--------+
34 //
35 //
36 // Given a pixelxy each is multiplied by a different factor derived from the fractional part of x
37 // and y:
38 // * px00 -> (1 - x)(1 - y) = 1 - x - y + xy
39 // * px10 -> x(1 - y) = x - xy
40 // * px01 -> (1 - x)y = y - xy
41 // * px11 -> xy
42 // So x * y is calculated first and then used to calculate all the other factors.
bilerp4(Sk4s xs,Sk4s ys,Sk4f px00,Sk4f px10,Sk4f px01,Sk4f px11)43 static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10,
44                                                     Sk4f px01, Sk4f px11) {
45     // Calculate fractional xs and ys.
46     Sk4s fxs = xs - xs.floor();
47     Sk4s fys = ys - ys.floor();
48     Sk4s fxys{fxs * fys};
49     Sk4f sum = px11 * fxys;
50     sum = sum + px01 * (fys - fxys);
51     sum = sum + px10 * (fxs - fxys);
52     sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys);
53     return sum;
54 }
55 
56 ////////////////////////////////////////////////////////////////////////////////////////////////////
57 // PixelGetter is the lowest level interface to the source data. There is a PixelConverter for each
58 // of the different SkColorTypes.
59 template <SkColorType, SkGammaType> class PixelConverter;
60 
61 // Alpha handling:
62 //   The alpha from the paint (tintColor) is used in the blend part of the pipeline to modulate
63 // the entire bitmap. So, the tint color is given an alpha of 1.0 so that the later alpha can
64 // modulate this color later.
65 template <>
66 class PixelConverter<kAlpha_8_SkColorType, kLinear_SkGammaType> {
67 public:
68     using Element = uint8_t;
PixelConverter(const SkPixmap & srcPixmap,SkColor tintColor)69     PixelConverter(const SkPixmap& srcPixmap, SkColor tintColor) {
70         fTintColor = SkColor4f::FromColor(tintColor);
71         fTintColor.fA = 1.0f;
72     }
73 
toSk4f(const Element pixel)74     Sk4f toSk4f(const Element pixel) const {
75         return Sk4f::Load(&fTintColor) * (pixel * (1.0f/255.0f));
76     }
77 
78 private:
79     SkColor4f fTintColor;
80 };
81 
82 template <SkGammaType gammaType>
pmcolor_to_rgba(SkPMColor pixel)83 static inline Sk4f pmcolor_to_rgba(SkPMColor pixel) {
84     return swizzle_rb_if_bgra(
85             (gammaType == kSRGB_SkGammaType) ? Sk4f_fromS32(pixel)
86                                              : Sk4f_fromL32(pixel));
87 }
88 
89 template <SkGammaType gammaType>
90 class PixelConverter<kRGB_565_SkColorType, gammaType> {
91 public:
92     using Element = uint16_t;
PixelConverter(const SkPixmap & srcPixmap)93     PixelConverter(const SkPixmap& srcPixmap) { }
94 
toSk4f(Element pixel)95     Sk4f toSk4f(Element pixel) const {
96         return pmcolor_to_rgba<gammaType>(SkPixel16ToPixel32(pixel));
97     }
98 };
99 
100 template <SkGammaType gammaType>
101 class PixelConverter<kARGB_4444_SkColorType, gammaType> {
102 public:
103     using Element = uint16_t;
PixelConverter(const SkPixmap & srcPixmap)104     PixelConverter(const SkPixmap& srcPixmap) { }
105 
toSk4f(Element pixel)106     Sk4f toSk4f(Element pixel) const {
107         return pmcolor_to_rgba<gammaType>(SkPixel4444ToPixel32(pixel));
108     }
109 };
110 
111 template <SkGammaType gammaType>
112 class PixelConverter<kRGBA_8888_SkColorType, gammaType> {
113 public:
114     using Element = uint32_t;
PixelConverter(const SkPixmap & srcPixmap)115     PixelConverter(const SkPixmap& srcPixmap) { }
116 
toSk4f(Element pixel)117     Sk4f toSk4f(Element pixel) const {
118         return gammaType == kSRGB_SkGammaType
119                ? Sk4f_fromS32(pixel)
120                : Sk4f_fromL32(pixel);
121     }
122 };
123 
124 template <SkGammaType gammaType>
125 class PixelConverter<kBGRA_8888_SkColorType, gammaType> {
126 public:
127     using Element = uint32_t;
PixelConverter(const SkPixmap & srcPixmap)128     PixelConverter(const SkPixmap& srcPixmap) { }
129 
toSk4f(Element pixel)130     Sk4f toSk4f(Element pixel) const {
131         return swizzle_rb(
132                    gammaType == kSRGB_SkGammaType ? Sk4f_fromS32(pixel) : Sk4f_fromL32(pixel));
133     }
134 };
135 
136 template <SkGammaType gammaType>
137 class PixelConverter<kGray_8_SkColorType, gammaType> {
138 public:
139     using Element = uint8_t;
PixelConverter(const SkPixmap & srcPixmap)140     PixelConverter(const SkPixmap& srcPixmap) { }
141 
toSk4f(Element pixel)142     Sk4f toSk4f(Element pixel) const {
143         float gray = (gammaType == kSRGB_SkGammaType)
144             ? sk_linear_from_srgb[pixel]
145             : pixel * (1/255.0f);
146         return {gray, gray, gray, 1.0f};
147     }
148 };
149 
150 template <>
151 class PixelConverter<kRGBA_F16_SkColorType, kLinear_SkGammaType> {
152 public:
153     using Element = uint64_t;
PixelConverter(const SkPixmap & srcPixmap)154     PixelConverter(const SkPixmap& srcPixmap) { }
155 
toSk4f(const Element pixel)156     Sk4f toSk4f(const Element pixel) const {
157         return SkHalfToFloat_finite_ftz(pixel);
158     }
159 };
160 
161 class PixelAccessorShim {
162 public:
PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface * accessor)163     explicit PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface* accessor)
164         : fPixelAccessor(accessor) { }
165 
getFewPixels(int n,Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2)166     void SK_VECTORCALL getFewPixels(
167         int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const {
168         fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2);
169     }
170 
get4Pixels(Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)171     void SK_VECTORCALL get4Pixels(
172         Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
173         fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3);
174     }
175 
get4Pixels(const void * src,int index,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)176     void get4Pixels(
177         const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
178         fPixelAccessor->get4Pixels(src, index, px0, px1, px2, px3);
179     }
180 
getPixelFromRow(const void * row,int index)181     Sk4f getPixelFromRow(const void* row, int index) const {
182         return fPixelAccessor->getPixelFromRow(row, index);
183     }
184 
getPixelAt(int index)185     Sk4f getPixelAt(int index) const {
186         return fPixelAccessor->getPixelAt(index);
187     }
188 
row(int y)189     const void* row(int y) const {
190         return fPixelAccessor->row(y);
191     }
192 
193 private:
194     SkLinearBitmapPipeline::PixelAccessorInterface* const fPixelAccessor;
195 };
196 
197 ////////////////////////////////////////////////////////////////////////////////////////////////////
198 // PixelAccessor handles all the same plumbing for all the PixelGetters.
199 template <SkColorType colorType, SkGammaType gammaType>
200 class PixelAccessor final : public SkLinearBitmapPipeline::PixelAccessorInterface {
201     using Element = typename PixelConverter<colorType, gammaType>::Element;
202 public:
203     template <typename... Args>
PixelAccessor(const SkPixmap & srcPixmap,Args &&...args)204     PixelAccessor(const SkPixmap& srcPixmap, Args&&... args)
205         : fSrc{static_cast<const Element*>(srcPixmap.addr())}
206         , fWidth{srcPixmap.rowBytesAsPixels()}
207         , fConverter{srcPixmap, std::move<Args>(args)...} { }
208 
getFewPixels(int n,Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2)209     void SK_VECTORCALL getFewPixels (
210         int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override {
211         Sk4i bufferLoc = ys * fWidth + xs;
212         switch (n) {
213             case 3:
214                 *px2 = this->getPixelAt(bufferLoc[2]);
215             case 2:
216                 *px1 = this->getPixelAt(bufferLoc[1]);
217             case 1:
218                 *px0 = this->getPixelAt(bufferLoc[0]);
219             default:
220                 break;
221         }
222     }
223 
get4Pixels(Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)224     void SK_VECTORCALL get4Pixels(
225         Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
226         Sk4i bufferLoc = ys * fWidth + xs;
227         *px0 = this->getPixelAt(bufferLoc[0]);
228         *px1 = this->getPixelAt(bufferLoc[1]);
229         *px2 = this->getPixelAt(bufferLoc[2]);
230         *px3 = this->getPixelAt(bufferLoc[3]);
231     }
232 
get4Pixels(const void * src,int index,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)233     void get4Pixels(
234         const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
235         *px0 = this->getPixelFromRow(src, index + 0);
236         *px1 = this->getPixelFromRow(src, index + 1);
237         *px2 = this->getPixelFromRow(src, index + 2);
238         *px3 = this->getPixelFromRow(src, index + 3);
239     }
240 
getPixelFromRow(const void * row,int index)241     Sk4f getPixelFromRow(const void* row, int index) const override {
242         const Element* src = static_cast<const Element*>(row);
243         return fConverter.toSk4f(src[index]);
244     }
245 
getPixelAt(int index)246     Sk4f getPixelAt(int index) const override {
247         return this->getPixelFromRow(fSrc, index);
248     }
249 
row(int y)250     const void* row(int y) const override { return fSrc + y * fWidth; }
251 
252 private:
253     const Element* const                 fSrc;
254     const int                            fWidth;
255     PixelConverter<colorType, gammaType> fConverter;
256 };
257 
258 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
259 // We'll never re-use pixels, but we can at least load contiguous pixels.
260 template <typename Next, typename Strategy>
src_strategy_blend(Span span,Next * next,Strategy * strategy)261 static void src_strategy_blend(Span span, Next* next, Strategy* strategy) {
262     SkPoint start;
263     SkScalar length;
264     int count;
265     std::tie(start, length, count) = span;
266     int ix = SkScalarFloorToInt(X(start));
267     const void* row = strategy->row((int)std::floor(Y(start)));
268     if (length > 0) {
269         while (count >= 4) {
270             Sk4f px0, px1, px2, px3;
271             strategy->get4Pixels(row, ix, &px0, &px1, &px2, &px3);
272             next->blend4Pixels(px0, px1, px2, px3);
273             ix += 4;
274             count -= 4;
275         }
276 
277         while (count > 0) {
278             next->blendPixel(strategy->getPixelFromRow(row, ix));
279             ix += 1;
280             count -= 1;
281         }
282     } else {
283         while (count >= 4) {
284             Sk4f px0, px1, px2, px3;
285             strategy->get4Pixels(row, ix - 3, &px3, &px2, &px1, &px0);
286             next->blend4Pixels(px0, px1, px2, px3);
287             ix -= 4;
288             count -= 4;
289         }
290 
291         while (count > 0) {
292             next->blendPixel(strategy->getPixelFromRow(row, ix));
293             ix -= 1;
294             count -= 1;
295         }
296     }
297 }
298 
299 // -- NearestNeighborSampler -----------------------------------------------------------------------
300 // NearestNeighborSampler - use nearest neighbor filtering to create runs of destination pixels.
301 template<typename Accessor, typename Next>
302 class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
303 public:
304     template<typename... Args>
NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,Args &&...args)305     NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args)
306     : fNext{next}, fAccessor{std::forward<Args>(args)...} { }
307 
NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,const NearestNeighborSampler & sampler)308     NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
309     const NearestNeighborSampler& sampler)
310     : fNext{next}, fAccessor{sampler.fAccessor} { }
311 
pointListFew(int n,Sk4s xs,Sk4s ys)312     void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
313         SkASSERT(0 < n && n < 4);
314         Sk4f px0, px1, px2;
315         fAccessor.getFewPixels(n, SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2);
316         if (n >= 1) fNext->blendPixel(px0);
317         if (n >= 2) fNext->blendPixel(px1);
318         if (n >= 3) fNext->blendPixel(px2);
319     }
320 
pointList4(Sk4s xs,Sk4s ys)321     void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
322         Sk4f px0, px1, px2, px3;
323         fAccessor.get4Pixels(SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2, &px3);
324         fNext->blend4Pixels(px0, px1, px2, px3);
325     }
326 
pointSpan(Span span)327     void pointSpan(Span span) override {
328         SkASSERT(!span.isEmpty());
329         SkPoint start;
330         SkScalar length;
331         int count;
332         std::tie(start, length, count) = span;
333         SkScalar absLength = SkScalarAbs(length);
334         if (absLength < (count - 1)) {
335             this->spanSlowRate(span);
336         } else if (absLength == (count - 1)) {
337             src_strategy_blend(span, fNext, &fAccessor);
338         } else {
339             this->spanFastRate(span);
340         }
341     }
342 
repeatSpan(Span span,int32_t repeatCount)343     void repeatSpan(Span span, int32_t repeatCount) override {
344         while (repeatCount > 0) {
345             this->pointSpan(span);
346             repeatCount--;
347         }
348     }
349 
350 private:
351     // When moving through source space more slowly than dst space (zoomed in),
352     // we'll be sampling from the same source pixel more than once.
spanSlowRate(Span span)353     void spanSlowRate(Span span) {
354         SkPoint start; SkScalar length; int count;
355         std::tie(start, length, count) = span;
356         SkScalar x = X(start);
357         // fx is a fixed 48.16 number.
358         int64_t fx = static_cast<int64_t>(x * SK_Fixed1);
359         SkScalar dx = length / (count - 1);
360         // fdx is a fixed 48.16 number.
361         int64_t fdx = static_cast<int64_t>(dx * SK_Fixed1);
362 
363         const void* row = fAccessor.row((int)std::floor(Y(start)));
364         Next* next = fNext;
365 
366         int64_t ix = fx >> 16;
367         int64_t prevIX = ix;
368         Sk4f fpixel = fAccessor.getPixelFromRow(row, ix);
369 
370         // When dx is less than one, each pixel is used more than once. Using the fixed point fx
371         // allows the code to quickly check that the same pixel is being used. The code uses this
372         // same pixel check to do the sRGB and normalization only once.
373         auto getNextPixel = [&]() {
374             if (ix != prevIX) {
375                 fpixel = fAccessor.getPixelFromRow(row, ix);
376                 prevIX = ix;
377             }
378             fx += fdx;
379             ix = fx >> 16;
380             return fpixel;
381         };
382 
383         while (count >= 4) {
384             Sk4f px0 = getNextPixel();
385             Sk4f px1 = getNextPixel();
386             Sk4f px2 = getNextPixel();
387             Sk4f px3 = getNextPixel();
388             next->blend4Pixels(px0, px1, px2, px3);
389             count -= 4;
390         }
391         while (count > 0) {
392             next->blendPixel(getNextPixel());
393             count -= 1;
394         }
395     }
396 
397     // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
398     // We'll never re-use pixels, but we can at least load contiguous pixels.
spanUnitRate(Span span)399     void spanUnitRate(Span span) {
400         src_strategy_blend(span, fNext, &fAccessor);
401     }
402 
403     // We're moving through source space faster than dst (zoomed out),
404     // so we'll never reuse a source pixel or be able to do contiguous loads.
spanFastRate(Span span)405     void spanFastRate(Span span) {
406         span_fallback(span, this);
407     }
408 
409     Next* const fNext;
410     Accessor    fAccessor;
411 };
412 
413 // From an edgeType, the integer value of a pixel vs, and the integer value of the extreme edge
414 // vMax, take the point which might be off the tile by one pixel and either wrap it or pin it to
415 // generate the right pixel. The value vs is on the interval [-1, vMax + 1]. It produces a value
416 // on the interval [0, vMax].
417 // Note: vMax is not width or height, but width-1 or height-1 because it is the largest valid pixel.
adjust_edge(SkShader::TileMode edgeType,int vs,int vMax)418 static inline int adjust_edge(SkShader::TileMode edgeType, int vs, int vMax) {
419     SkASSERT(-1 <= vs && vs <= vMax + 1);
420     switch (edgeType) {
421         case SkShader::kClamp_TileMode:
422         case SkShader::kMirror_TileMode:
423             vs = std::max(vs, 0);
424             vs = std::min(vs, vMax);
425             break;
426         case SkShader::kRepeat_TileMode:
427             vs = (vs <= vMax) ? vs : 0;
428             vs =    (vs >= 0) ? vs : vMax;
429             break;
430     }
431     SkASSERT(0 <= vs && vs <= vMax);
432     return vs;
433 }
434 
435 // From a sample point on the tile, return the top or left filter value.
436 // The result r should be in the range (0, 1]. Since this represents the weight given to the top
437 // left element, then if x == 0.5 the filter value should be 1.0.
438 // The input sample point must be on the tile, therefore it must be >= 0.
sample_to_filter(SkScalar x)439 static SkScalar sample_to_filter(SkScalar x) {
440     SkASSERT(x >= 0.0f);
441     // The usual form of the top or left edge is x - .5, but since we are working on the unit
442     // square, then x + .5 works just as well. This also guarantees that v > 0.0 allowing the use
443     // of trunc.
444     SkScalar v = x + 0.5f;
445     // Produce the top or left offset a value on the range [0, 1).
446     SkScalar f = v - SkScalarTruncToScalar(v);
447     // Produce the filter value which is on the range (0, 1].
448     SkScalar r =  1.0f - f;
449     SkASSERT(0.0f < r && r <= 1.0f);
450     return r;
451 }
452 
453 // -- BilerpSampler --------------------------------------------------------------------------------
454 // BilerpSampler - use a bilerp filter to create runs of destination pixels.
455 // Note: in the code below, there are two types of points
456 //       * sample points - these are the points passed in by pointList* and Spans.
457 //       * filter points - are created from a sample point to form the coordinates of the points
458 //                         to use in the filter and to generate the filter values.
459 template<typename Accessor, typename Next>
460 class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
461 public:
462     template<typename... Args>
BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,SkISize dimensions,SkShader::TileMode xTile,SkShader::TileMode yTile,Args &&...args)463     BilerpSampler(
464         SkLinearBitmapPipeline::BlendProcessorInterface* next,
465         SkISize dimensions,
466         SkShader::TileMode xTile, SkShader::TileMode yTile,
467         Args&& ... args
468     )
469         : fNext{next}
470         , fXEdgeType{xTile}
471         , fXMax{dimensions.width() - 1}
472         , fYEdgeType{yTile}
473         , fYMax{dimensions.height() - 1}
474         , fAccessor{std::forward<Args>(args)...} { }
475 
BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,const BilerpSampler & sampler)476     BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
477                    const BilerpSampler& sampler)
478         : fNext{next}
479         , fXEdgeType{sampler.fXEdgeType}
480         , fXMax{sampler.fXMax}
481         , fYEdgeType{sampler.fYEdgeType}
482         , fYMax{sampler.fYMax}
483         , fAccessor{sampler.fAccessor} { }
484 
pointListFew(int n,Sk4s xs,Sk4s ys)485     void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
486         SkASSERT(0 < n && n < 4);
487         auto bilerpPixel = [&](int index) {
488             return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
489         };
490 
491         if (n >= 1) fNext->blendPixel(bilerpPixel(0));
492         if (n >= 2) fNext->blendPixel(bilerpPixel(1));
493         if (n >= 3) fNext->blendPixel(bilerpPixel(2));
494     }
495 
pointList4(Sk4s xs,Sk4s ys)496     void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
497         auto bilerpPixel = [&](int index) {
498             return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
499         };
500         fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3));
501     }
502 
pointSpan(Span span)503     void pointSpan(Span span) override {
504         SkASSERT(!span.isEmpty());
505         SkPoint start;
506         SkScalar length;
507         int count;
508         std::tie(start, length, count) = span;
509 
510         // Nothing to do.
511         if (count == 0) {
512             return;
513         }
514 
515         // Trivial case. No sample points are generated other than start.
516         if (count == 1) {
517             fNext->blendPixel(this->bilerpSamplePoint(start));
518             return;
519         }
520 
521         // Note: the following code could be done in terms of dx = length / (count -1), but that
522         // would introduce a divide that is not needed for the most common dx == 1 cases.
523         SkScalar absLength = SkScalarAbs(length);
524         if (absLength == 0.0f) {
525             // |dx| == 0
526             // length is zero, so clamp an edge pixel.
527             this->spanZeroRate(span);
528         } else if (absLength < (count - 1)) {
529             // 0 < |dx| < 1.
530             this->spanSlowRate(span);
531         } else if (absLength == (count - 1)) {
532             // |dx| == 1.
533             if (sample_to_filter(span.startX()) == 1.0f
534                 && sample_to_filter(span.startY()) == 1.0f) {
535                 // All the pixels are aligned with the dest; go fast.
536                 src_strategy_blend(span, fNext, &fAccessor);
537             } else {
538                 // There is some sub-pixel offsets, so bilerp.
539                 this->spanUnitRate(span);
540             }
541         } else if (absLength < 2.0f * (count - 1)) {
542             // 1 < |dx| < 2.
543             this->spanMediumRate(span);
544         } else {
545             // |dx| >= 2.
546             this->spanFastRate(span);
547         }
548     }
549 
repeatSpan(Span span,int32_t repeatCount)550     void repeatSpan(Span span, int32_t repeatCount) override {
551         while (repeatCount > 0) {
552             this->pointSpan(span);
553             repeatCount--;
554         }
555     }
556 
557 private:
558 
559     // Convert a sample point to the points used by the filter.
filterPoints(SkPoint sample,Sk4i * filterXs,Sk4i * filterYs)560     void filterPoints(SkPoint sample, Sk4i* filterXs, Sk4i* filterYs) {
561         // May be less than zero. Be careful to use Floor.
562         int x0 = adjust_edge(fXEdgeType, SkScalarFloorToInt(X(sample) - 0.5), fXMax);
563         // Always greater than zero. Use the faster Trunc.
564         int x1 = adjust_edge(fXEdgeType, SkScalarTruncToInt(X(sample) + 0.5), fXMax);
565         int y0 = adjust_edge(fYEdgeType, SkScalarFloorToInt(Y(sample) - 0.5), fYMax);
566         int y1 = adjust_edge(fYEdgeType, SkScalarTruncToInt(Y(sample) + 0.5), fYMax);
567 
568         *filterXs = Sk4i{x0, x1, x0, x1};
569         *filterYs = Sk4i{y0, y0, y1, y1};
570     }
571 
572     // Given a sample point, generate a color by bilerping the four filter points.
bilerpSamplePoint(SkPoint sample)573     Sk4f bilerpSamplePoint(SkPoint sample) {
574         Sk4i iXs, iYs;
575         filterPoints(sample, &iXs, &iYs);
576         Sk4f px00, px10, px01, px11;
577         fAccessor.get4Pixels(iXs, iYs, &px00, &px10, &px01, &px11);
578         return bilerp4(Sk4f{X(sample) - 0.5f}, Sk4f{Y(sample) - 0.5f}, px00, px10, px01, px11);
579     }
580 
581     // Get two pixels at x from row0 and row1.
get2PixelColumn(const void * row0,const void * row1,int x,Sk4f * px0,Sk4f * px1)582     void get2PixelColumn(const void* row0, const void* row1, int x, Sk4f* px0, Sk4f* px1) {
583         *px0 = fAccessor.getPixelFromRow(row0, x);
584         *px1 = fAccessor.getPixelFromRow(row1, x);
585     }
586 
587     // |dx| == 0. This code assumes that length is zero.
spanZeroRate(Span span)588     void spanZeroRate(Span span) {
589         SkPoint start; SkScalar length; int count;
590         std::tie(start, length, count) = span;
591         SkASSERT(length == 0.0f);
592 
593         // Filter for the blending of the top and bottom pixels.
594         SkScalar filterY = sample_to_filter(Y(start));
595 
596         // Generate the four filter points from the sample point start. Generate the row* values.
597         Sk4i iXs, iYs;
598         this->filterPoints(start, &iXs, &iYs);
599         const void* const row0 = fAccessor.row(iYs[0]);
600         const void* const row1 = fAccessor.row(iYs[2]);
601 
602         // Get the two pixels that make up the clamping pixel.
603         Sk4f pxTop, pxBottom;
604         this->get2PixelColumn(row0, row1, SkScalarFloorToInt(X(start)), &pxTop, &pxBottom);
605         Sk4f pixel = pxTop * filterY + (1.0f - filterY) * pxBottom;
606 
607         while (count >= 4) {
608             fNext->blend4Pixels(pixel, pixel, pixel, pixel);
609             count -= 4;
610         }
611         while (count > 0) {
612             fNext->blendPixel(pixel);
613             count -= 1;
614         }
615     }
616 
617     // 0 < |dx| < 1. This code reuses the calculations from previous pixels to reduce
618     // computation. In particular, several destination pixels maybe generated from the same four
619     // source pixels.
620     // In the following code a "part" is a combination of two pixels from the same column of the
621     // filter.
spanSlowRate(Span span)622     void spanSlowRate(Span span) {
623         SkPoint start; SkScalar length; int count;
624         std::tie(start, length, count) = span;
625 
626         // Calculate the distance between each sample point.
627         const SkScalar dx = length / (count - 1);
628         SkASSERT(-1.0f < dx && dx < 1.0f && dx != 0.0f);
629 
630         // Generate the filter values for the top-left corner.
631         // Note: these values are in filter space; this has implications about how to adjust
632         // these values at each step. For example, as the sample point increases, the filter
633         // value decreases, this is because the filter and position are related by
634         // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
635         // direction of the sample point which is increasing by dx.
636         SkScalar filterX = sample_to_filter(X(start));
637         SkScalar filterY = sample_to_filter(Y(start));
638 
639         // Generate the four filter points from the sample point start. Generate the row* values.
640         Sk4i iXs, iYs;
641         this->filterPoints(start, &iXs, &iYs);
642         const void* const row0 = fAccessor.row(iYs[0]);
643         const void* const row1 = fAccessor.row(iYs[2]);
644 
645         // Generate part of the filter value at xColumn.
646         auto partAtColumn = [&](int xColumn) {
647             int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
648             Sk4f pxTop, pxBottom;
649             this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
650             return pxTop * filterY + (1.0f - filterY) * pxBottom;
651         };
652 
653         // The leftPart is made up of two pixels from the left column of the filter, right part
654         // is similar. The top and bottom pixels in the *Part are created as a linear blend of
655         // the top and bottom pixels using filterY. See the partAtColumn function above.
656         Sk4f leftPart  = partAtColumn(iXs[0]);
657         Sk4f rightPart = partAtColumn(iXs[1]);
658 
659         // Create a destination color by blending together a left and right part using filterX.
660         auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) {
661             Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
662             return check_pixel(pixel);
663         };
664 
665         // Send the first pixel to the destination. This simplifies the loop structure so that no
666         // extra pixels are fetched for the last iteration of the loop.
667         fNext->blendPixel(bilerp(leftPart, rightPart));
668         count -= 1;
669 
670         if (dx > 0.0f) {
671             // * positive direction - generate destination pixels by sliding the filter from left
672             //                        to right.
673             int rightPartCursor = iXs[1];
674 
675             // Advance the filter from left to right. Remember that moving the top-left corner of
676             // the filter to the right actually makes the filter value smaller.
677             auto advanceFilter = [&]() {
678                 filterX -= dx;
679                 if (filterX <= 0.0f) {
680                     filterX += 1.0f;
681                     leftPart = rightPart;
682                     rightPartCursor += 1;
683                     rightPart = partAtColumn(rightPartCursor);
684                 }
685                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
686 
687                 return bilerp(leftPart, rightPart);
688             };
689 
690             while (count >= 4) {
691                 Sk4f px0 = advanceFilter(),
692                      px1 = advanceFilter(),
693                      px2 = advanceFilter(),
694                      px3 = advanceFilter();
695                 fNext->blend4Pixels(px0, px1, px2, px3);
696                 count -= 4;
697             }
698 
699             while (count > 0) {
700                 fNext->blendPixel(advanceFilter());
701                 count -= 1;
702             }
703         } else {
704             // * negative direction - generate destination pixels by sliding the filter from
705             //                        right to left.
706             int leftPartCursor = iXs[0];
707 
708             // Advance the filter from right to left. Remember that moving the top-left corner of
709             // the filter to the left actually makes the filter value larger.
710             auto advanceFilter = [&]() {
711                 // Remember, dx < 0 therefore this adds |dx| to filterX.
712                 filterX -= dx;
713                 // At this point filterX may be > 1, and needs to be wrapped back on to the filter
714                 // interval, and the next column in the filter is calculated.
715                 if (filterX > 1.0f) {
716                     filterX -= 1.0f;
717                     rightPart = leftPart;
718                     leftPartCursor -= 1;
719                     leftPart = partAtColumn(leftPartCursor);
720                 }
721                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
722 
723                 return bilerp(leftPart, rightPart);
724             };
725 
726             while (count >= 4) {
727                 Sk4f px0 = advanceFilter(),
728                      px1 = advanceFilter(),
729                      px2 = advanceFilter(),
730                      px3 = advanceFilter();
731                 fNext->blend4Pixels(px0, px1, px2, px3);
732                 count -= 4;
733             }
734 
735             while (count > 0) {
736                 fNext->blendPixel(advanceFilter());
737                 count -= 1;
738             }
739         }
740     }
741 
742     // |dx| == 1. Moving through source space at a rate of 1 source pixel per 1 dst pixel.
743     // Every filter part is used for two destination pixels, and the code can bulk load four
744     // pixels at a time.
spanUnitRate(Span span)745     void spanUnitRate(Span span) {
746         SkPoint start; SkScalar length; int count;
747         std::tie(start, length, count) = span;
748         SkASSERT(SkScalarAbs(length) == (count - 1));
749 
750         // Calculate the four filter points of start, and use the two different Y values to
751         // generate the row pointers.
752         Sk4i iXs, iYs;
753         filterPoints(start, &iXs, &iYs);
754         const void* row0 = fAccessor.row(iYs[0]);
755         const void* row1 = fAccessor.row(iYs[2]);
756 
757         // Calculate the filter values for the top-left filter element.
758         const SkScalar filterX = sample_to_filter(X(start));
759         const SkScalar filterY = sample_to_filter(Y(start));
760 
761         // Generate part of the filter value at xColumn.
762         auto partAtColumn = [&](int xColumn) {
763             int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
764             Sk4f pxTop, pxBottom;
765             this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
766             return pxTop * filterY + (1.0f - filterY) * pxBottom;
767         };
768 
769         auto get4Parts = [&](int ix, Sk4f* part0, Sk4f* part1, Sk4f* part2, Sk4f* part3) {
770             // Check if the pixels needed are near the edges. If not go fast using bulk pixels,
771             // otherwise be careful.
772             if (0 <= ix && ix <= fXMax - 3) {
773                 Sk4f px00, px10, px20, px30,
774                      px01, px11, px21, px31;
775                 fAccessor.get4Pixels(row0, ix, &px00, &px10, &px20, &px30);
776                 fAccessor.get4Pixels(row1, ix, &px01, &px11, &px21, &px31);
777                 *part0 = filterY * px00 + (1.0f - filterY) * px01;
778                 *part1 = filterY * px10 + (1.0f - filterY) * px11;
779                 *part2 = filterY * px20 + (1.0f - filterY) * px21;
780                 *part3 = filterY * px30 + (1.0f - filterY) * px31;
781             } else {
782                 *part0 = partAtColumn(ix + 0);
783                 *part1 = partAtColumn(ix + 1);
784                 *part2 = partAtColumn(ix + 2);
785                 *part3 = partAtColumn(ix + 3);
786             }
787         };
788 
789         auto bilerp = [&](const Sk4f& part0, const Sk4f& part1) {
790             return part0 * filterX + part1 * (1.0f - filterX);
791         };
792 
793         if (length > 0) {
794             // * positive direction - generate destination pixels by sliding the filter from left
795             //                        to right.
796 
797             // overlapPart is the filter part from the end of the previous four pixels used at
798             // the start of the next four pixels.
799             Sk4f overlapPart = partAtColumn(iXs[0]);
800             int rightColumnCursor = iXs[1];
801             while (count >= 4) {
802                 Sk4f part0, part1, part2, part3;
803                 get4Parts(rightColumnCursor, &part0, &part1, &part2, &part3);
804                 Sk4f px0 = bilerp(overlapPart, part0);
805                 Sk4f px1 = bilerp(part0, part1);
806                 Sk4f px2 = bilerp(part1, part2);
807                 Sk4f px3 = bilerp(part2, part3);
808                 overlapPart = part3;
809                 fNext->blend4Pixels(px0, px1, px2, px3);
810                 rightColumnCursor += 4;
811                 count -= 4;
812             }
813 
814             while (count > 0) {
815                 Sk4f rightPart = partAtColumn(rightColumnCursor);
816 
817                 fNext->blendPixel(bilerp(overlapPart, rightPart));
818                 overlapPart = rightPart;
819                 rightColumnCursor += 1;
820                 count -= 1;
821             }
822         } else {
823             // * negative direction - generate destination pixels by sliding the filter from
824             //                        right to left.
825             Sk4f overlapPart = partAtColumn(iXs[1]);
826             int leftColumnCursor = iXs[0];
827 
828             while (count >= 4) {
829                 Sk4f part0, part1, part2, part3;
830                 get4Parts(leftColumnCursor - 3, &part3, &part2, &part1, &part0);
831                 Sk4f px0 = bilerp(part0, overlapPart);
832                 Sk4f px1 = bilerp(part1, part0);
833                 Sk4f px2 = bilerp(part2, part1);
834                 Sk4f px3 = bilerp(part3, part2);
835                 overlapPart = part3;
836                 fNext->blend4Pixels(px0, px1, px2, px3);
837                 leftColumnCursor -= 4;
838                 count -= 4;
839             }
840 
841             while (count > 0) {
842                 Sk4f leftPart = partAtColumn(leftColumnCursor);
843 
844                 fNext->blendPixel(bilerp(leftPart, overlapPart));
845                 overlapPart = leftPart;
846                 leftColumnCursor -= 1;
847                 count -= 1;
848             }
849         }
850     }
851 
852     // 1 < |dx| < 2. Going through the source pixels at a faster rate than the dest pixels, but
853     // still slow enough to take advantage of previous calculations.
spanMediumRate(Span span)854     void spanMediumRate(Span span) {
855         SkPoint start; SkScalar length; int count;
856         std::tie(start, length, count) = span;
857 
858         // Calculate the distance between each sample point.
859         const SkScalar dx = length / (count - 1);
860         SkASSERT((-2.0f < dx && dx < -1.0f) || (1.0f < dx && dx < 2.0f));
861 
862         // Generate the filter values for the top-left corner.
863         // Note: these values are in filter space; this has implications about how to adjust
864         // these values at each step. For example, as the sample point increases, the filter
865         // value decreases, this is because the filter and position are related by
866         // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
867         // direction of the sample point which is increasing by dx.
868         SkScalar filterX = sample_to_filter(X(start));
869         SkScalar filterY = sample_to_filter(Y(start));
870 
871         // Generate the four filter points from the sample point start. Generate the row* values.
872         Sk4i iXs, iYs;
873         this->filterPoints(start, &iXs, &iYs);
874         const void* const row0 = fAccessor.row(iYs[0]);
875         const void* const row1 = fAccessor.row(iYs[2]);
876 
877         // Generate part of the filter value at xColumn.
878         auto partAtColumn = [&](int xColumn) {
879             int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
880             Sk4f pxTop, pxBottom;
881             this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
882             return pxTop * filterY + (1.0f - filterY) * pxBottom;
883         };
884 
885         // The leftPart is made up of two pixels from the left column of the filter, right part
886         // is similar. The top and bottom pixels in the *Part are created as a linear blend of
887         // the top and bottom pixels using filterY. See the nextPart function below.
888         Sk4f leftPart  = partAtColumn(iXs[0]);
889         Sk4f rightPart = partAtColumn(iXs[1]);
890 
891         // Create a destination color by blending together a left and right part using filterX.
892         auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) {
893             Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
894             return check_pixel(pixel);
895         };
896 
897         // Send the first pixel to the destination. This simplifies the loop structure so that no
898         // extra pixels are fetched for the last iteration of the loop.
899         fNext->blendPixel(bilerp(leftPart, rightPart));
900         count -= 1;
901 
902         if (dx > 0.0f) {
903             // * positive direction - generate destination pixels by sliding the filter from left
904             //                        to right.
905             int rightPartCursor = iXs[1];
906 
907             // Advance the filter from left to right. Remember that moving the top-left corner of
908             // the filter to the right actually makes the filter value smaller.
909             auto advanceFilter = [&]() {
910                 filterX -= dx;
911                 // At this point filterX is less than zero, but might actually be less than -1.
912                 if (filterX > -1.0f) {
913                     filterX += 1.0f;
914                     leftPart = rightPart;
915                     rightPartCursor += 1;
916                     rightPart = partAtColumn(rightPartCursor);
917                 } else {
918                     filterX += 2.0f;
919                     rightPartCursor += 2;
920                     leftPart = partAtColumn(rightPartCursor - 1);
921                     rightPart = partAtColumn(rightPartCursor);
922                 }
923                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
924 
925                 return bilerp(leftPart, rightPart);
926             };
927 
928             while (count >= 4) {
929                 Sk4f px0 = advanceFilter(),
930                      px1 = advanceFilter(),
931                      px2 = advanceFilter(),
932                      px3 = advanceFilter();
933                 fNext->blend4Pixels(px0, px1, px2, px3);
934                 count -= 4;
935             }
936 
937             while (count > 0) {
938                 fNext->blendPixel(advanceFilter());
939                 count -= 1;
940             }
941         } else {
942             // * negative direction - generate destination pixels by sliding the filter from
943             //                        right to left.
944             int leftPartCursor = iXs[0];
945 
946             auto advanceFilter = [&]() {
947                 // Remember, dx < 0 therefore this adds |dx| to filterX.
948                 filterX -= dx;
949                 // At this point, filterX is greater than one, but may actually be greater than two.
950                 if (filterX < 2.0f) {
951                     filterX -= 1.0f;
952                     rightPart = leftPart;
953                     leftPartCursor -= 1;
954                     leftPart = partAtColumn(leftPartCursor);
955                 } else {
956                     filterX -= 2.0f;
957                     leftPartCursor -= 2;
958                     rightPart = partAtColumn(leftPartCursor - 1);
959                     leftPart = partAtColumn(leftPartCursor);
960                 }
961                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
962                 return bilerp(leftPart, rightPart);
963             };
964 
965             while (count >= 4) {
966                 Sk4f px0 = advanceFilter(),
967                      px1 = advanceFilter(),
968                      px2 = advanceFilter(),
969                      px3 = advanceFilter();
970                 fNext->blend4Pixels(px0, px1, px2, px3);
971                 count -= 4;
972             }
973 
974             while (count > 0) {
975                 fNext->blendPixel(advanceFilter());
976                 count -= 1;
977             }
978         }
979     }
980 
981     // We're moving through source space faster than dst (zoomed out),
982     // so we'll never reuse a source pixel or be able to do contiguous loads.
spanFastRate(Span span)983     void spanFastRate(Span span) {
984         SkPoint start; SkScalar length; int count;
985         std::tie(start, length, count) = span;
986         SkScalar x = X(start);
987         SkScalar y = Y(start);
988 
989         SkScalar dx = length / (count - 1);
990         while (count > 0) {
991             fNext->blendPixel(this->bilerpSamplePoint(SkPoint{x, y}));
992             x += dx;
993             count -= 1;
994         }
995     }
996 
997     Next* const              fNext;
998     const SkShader::TileMode fXEdgeType;
999     const int                fXMax;
1000     const SkShader::TileMode fYEdgeType;
1001     const int                fYMax;
1002     Accessor                 fAccessor;
1003 };
1004 
1005 }  // namespace
1006 
1007 #endif  // SkLinearBitmapPipeline_sampler_DEFINED
1008