• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "include/private/base/SkTo.h"
9 #include "src/base/SkHalf.h"
10 #include "src/base/SkUtils.h"
11 #include "src/core/SkOpts.h"
12 #include "src/core/SkRasterPipeline.h"
13 #include "src/gpu/Swizzle.h"
14 #include "tests/Test.h"
15 
16 #include <cmath>
17 #include <numeric>
18 
DEF_TEST(SkRasterPipeline,r)19 DEF_TEST(SkRasterPipeline, r) {
20     // Build and run a simple pipeline to exercise SkRasterPipeline,
21     // drawing 50% transparent blue over opaque red in half-floats.
22     uint64_t red  = 0x3c00000000003c00ull,
23              blue = 0x3800380000000000ull,
24              result;
25 
26     SkRasterPipeline_MemoryCtx load_s_ctx = { &blue, 0 },
27                                load_d_ctx = { &red, 0 },
28                                store_ctx  = { &result, 0 };
29 
30     SkRasterPipeline_<256> p;
31     p.append(SkRasterPipelineOp::load_f16,     &load_s_ctx);
32     p.append(SkRasterPipelineOp::load_f16_dst, &load_d_ctx);
33     p.append(SkRasterPipelineOp::srcover);
34     p.append(SkRasterPipelineOp::store_f16, &store_ctx);
35     p.run(0,0,1,1);
36 
37     // We should see half-intensity magenta.
38     REPORTER_ASSERT(r, ((result >>  0) & 0xffff) == 0x3800);
39     REPORTER_ASSERT(r, ((result >> 16) & 0xffff) == 0x0000);
40     REPORTER_ASSERT(r, ((result >> 32) & 0xffff) == 0x3800);
41     REPORTER_ASSERT(r, ((result >> 48) & 0xffff) == 0x3c00);
42 }
43 
DEF_TEST(SkRasterPipeline_LoadStoreConditionMask,r)44 DEF_TEST(SkRasterPipeline_LoadStoreConditionMask, r) {
45     alignas(64) int32_t mask[]  = {~0, 0, ~0,  0, ~0, ~0, ~0,  0};
46     alignas(64) int32_t maskCopy[SkRasterPipeline_kMaxStride_highp] = {};
47     alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
48 
49     static_assert(std::size(mask) == SkRasterPipeline_kMaxStride_highp);
50 
51     SkRasterPipeline_<256> p;
52     p.append(SkRasterPipelineOp::init_lane_masks);
53     p.append(SkRasterPipelineOp::load_condition_mask, mask);
54     p.append(SkRasterPipelineOp::store_condition_mask, maskCopy);
55     p.append(SkRasterPipelineOp::store_dst, dst);
56     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
57 
58     {
59         // `maskCopy` should be populated with `mask` in the frontmost positions
60         // (depending on the architecture that SkRasterPipeline is targeting).
61         size_t index = 0;
62         for (; index < SkOpts::raster_pipeline_highp_stride; ++index) {
63             REPORTER_ASSERT(r, maskCopy[index] == mask[index]);
64         }
65 
66         // The remaining slots should have been left alone.
67         for (; index < std::size(maskCopy); ++index) {
68             REPORTER_ASSERT(r, maskCopy[index] == 0);
69         }
70     }
71     {
72         // `dr` and `da` should be populated with `mask`.
73         // `dg` and `db` should remain initialized to true.
74         const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
75         const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
76         const int db = 2 * SkOpts::raster_pipeline_highp_stride;
77         const int da = 3 * SkOpts::raster_pipeline_highp_stride;
78         for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
79             REPORTER_ASSERT(r, dst[dr + index] == mask[index]);
80             REPORTER_ASSERT(r, dst[dg + index] == ~0);
81             REPORTER_ASSERT(r, dst[db + index] == ~0);
82             REPORTER_ASSERT(r, dst[da + index] == mask[index]);
83         }
84     }
85 }
86 
DEF_TEST(SkRasterPipeline_LoadStoreLoopMask,r)87 DEF_TEST(SkRasterPipeline_LoadStoreLoopMask, r) {
88     alignas(64) int32_t mask[]  = {~0, 0, ~0,  0, ~0, ~0, ~0,  0};
89     alignas(64) int32_t maskCopy[SkRasterPipeline_kMaxStride_highp] = {};
90     alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
91 
92     static_assert(std::size(mask) == SkRasterPipeline_kMaxStride_highp);
93 
94     SkRasterPipeline_<256> p;
95     p.append(SkRasterPipelineOp::init_lane_masks);
96     p.append(SkRasterPipelineOp::load_loop_mask, mask);
97     p.append(SkRasterPipelineOp::store_loop_mask, maskCopy);
98     p.append(SkRasterPipelineOp::store_dst, dst);
99     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
100 
101     {
102         // `maskCopy` should be populated with `mask` in the frontmost positions
103         // (depending on the architecture that SkRasterPipeline is targeting).
104         size_t index = 0;
105         for (; index < SkOpts::raster_pipeline_highp_stride; ++index) {
106             REPORTER_ASSERT(r, maskCopy[index] == mask[index]);
107         }
108 
109         // The remaining slots should have been left alone.
110         for (; index < std::size(maskCopy); ++index) {
111             REPORTER_ASSERT(r, maskCopy[index] == 0);
112         }
113     }
114     {
115         // `dg` and `da` should be populated with `mask`.
116         // `dr` and `db` should remain initialized to true.
117         const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
118         const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
119         const int db = 2 * SkOpts::raster_pipeline_highp_stride;
120         const int da = 3 * SkOpts::raster_pipeline_highp_stride;
121         for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
122             REPORTER_ASSERT(r, dst[dr + index] == ~0);
123             REPORTER_ASSERT(r, dst[dg + index] == mask[index]);
124             REPORTER_ASSERT(r, dst[db + index] == ~0);
125             REPORTER_ASSERT(r, dst[da + index] == mask[index]);
126         }
127     }
128 }
129 
DEF_TEST(SkRasterPipeline_LoadStoreReturnMask,r)130 DEF_TEST(SkRasterPipeline_LoadStoreReturnMask, r) {
131     alignas(64) int32_t mask[]  = {~0, 0, ~0,  0, ~0, ~0, ~0,  0};
132     alignas(64) int32_t maskCopy[SkRasterPipeline_kMaxStride_highp] = {};
133     alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
134 
135     static_assert(std::size(mask) == SkRasterPipeline_kMaxStride_highp);
136 
137     SkRasterPipeline_<256> p;
138     p.append(SkRasterPipelineOp::init_lane_masks);
139     p.append(SkRasterPipelineOp::load_return_mask, mask);
140     p.append(SkRasterPipelineOp::store_return_mask, maskCopy);
141     p.append(SkRasterPipelineOp::store_dst, dst);
142     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
143 
144     {
145         // `maskCopy` should be populated with `mask` in the frontmost positions
146         // (depending on the architecture that SkRasterPipeline is targeting).
147         size_t index = 0;
148         for (; index < SkOpts::raster_pipeline_highp_stride; ++index) {
149             REPORTER_ASSERT(r, maskCopy[index] == mask[index]);
150         }
151 
152         // The remaining slots should have been left alone.
153         for (; index < std::size(maskCopy); ++index) {
154             REPORTER_ASSERT(r, maskCopy[index] == 0);
155         }
156     }
157     {
158         // `db` and `da` should be populated with `mask`.
159         // `dr` and `dg` should remain initialized to true.
160         const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
161         const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
162         const int db = 2 * SkOpts::raster_pipeline_highp_stride;
163         const int da = 3 * SkOpts::raster_pipeline_highp_stride;
164         for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
165             REPORTER_ASSERT(r, dst[dr + index] == ~0);
166             REPORTER_ASSERT(r, dst[dg + index] == ~0);
167             REPORTER_ASSERT(r, dst[db + index] == mask[index]);
168             REPORTER_ASSERT(r, dst[da + index] == mask[index]);
169         }
170     }
171 }
172 
DEF_TEST(SkRasterPipeline_MergeConditionMask,r)173 DEF_TEST(SkRasterPipeline_MergeConditionMask, r) {
174     alignas(64) int32_t mask[]  = { 0,  0, ~0, ~0, 0, ~0, 0, ~0,
175                                    ~0, ~0, ~0, ~0, 0,  0, 0,  0};
176     alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
177     static_assert(std::size(mask) == (2 * SkRasterPipeline_kMaxStride_highp));
178 
179     SkRasterPipeline_<256> p;
180     p.append(SkRasterPipelineOp::init_lane_masks);
181     p.append(SkRasterPipelineOp::merge_condition_mask, mask);
182     p.append(SkRasterPipelineOp::store_dst, dst);
183     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
184 
185     // `dr` and `da` should be populated with `mask[x] & mask[y]` in the frontmost positions.
186     // `dg` and `db` should remain initialized to true.
187     const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
188     const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
189     const int db = 2 * SkOpts::raster_pipeline_highp_stride;
190     const int da = 3 * SkOpts::raster_pipeline_highp_stride;
191     for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
192         int32_t expected = mask[index] & mask[index + SkOpts::raster_pipeline_highp_stride];
193         REPORTER_ASSERT(r, dst[dr + index] == expected);
194         REPORTER_ASSERT(r, dst[dg + index] == ~0);
195         REPORTER_ASSERT(r, dst[db + index] == ~0);
196         REPORTER_ASSERT(r, dst[da + index] == expected);
197     }
198 }
199 
DEF_TEST(SkRasterPipeline_MergeLoopMask,r)200 DEF_TEST(SkRasterPipeline_MergeLoopMask, r) {
201     alignas(64) int32_t initial[]  = {~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,  // dr (condition)
202                                       ~0,  0, ~0,  0, ~0, ~0, ~0, ~0,  // dg (loop)
203                                       ~0, ~0, ~0, ~0, ~0, ~0,  0, ~0,  // db (return)
204                                       ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0}; // da (combined)
205     alignas(64) int32_t mask[]     = { 0, ~0, ~0,  0, ~0, ~0, ~0, ~0};
206     alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
207     static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp));
208 
209     SkRasterPipeline_<256> p;
210     p.append(SkRasterPipelineOp::load_dst, initial);
211     p.append(SkRasterPipelineOp::merge_loop_mask, mask);
212     p.append(SkRasterPipelineOp::store_dst, dst);
213     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
214 
215     const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
216     const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
217     const int db = 2 * SkOpts::raster_pipeline_highp_stride;
218     const int da = 3 * SkOpts::raster_pipeline_highp_stride;
219     for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
220         // `dg` should contain `dg & mask` in each lane.
221         REPORTER_ASSERT(r, dst[dg + index] == (initial[dg + index] & mask[index]));
222 
223         // `dr` and `db` should be unchanged.
224         REPORTER_ASSERT(r, dst[dr + index] == initial[dr + index]);
225         REPORTER_ASSERT(r, dst[db + index] == initial[db + index]);
226 
227         // `da` should contain `dr & dg & gb`.
228         REPORTER_ASSERT(r, dst[da + index] == (dst[dr+index] & dst[dg+index] & dst[db+index]));
229     }
230 }
231 
DEF_TEST(SkRasterPipeline_ReenableLoopMask,r)232 DEF_TEST(SkRasterPipeline_ReenableLoopMask, r) {
233     alignas(64) int32_t initial[]  = {~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,  // dr (condition)
234                                       ~0,  0, ~0,  0, ~0, ~0,  0, ~0,  // dg (loop)
235                                        0, ~0, ~0, ~0,  0,  0,  0, ~0,  // db (return)
236                                        0,  0, ~0,  0,  0,  0,  0, ~0}; // da (combined)
237     alignas(64) int32_t mask[]     = { 0, ~0,  0,  0,  0,  0, ~0,  0};
238     alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
239     static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp));
240 
241     SkRasterPipeline_<256> p;
242     p.append(SkRasterPipelineOp::load_dst, initial);
243     p.append(SkRasterPipelineOp::reenable_loop_mask, mask);
244     p.append(SkRasterPipelineOp::store_dst, dst);
245     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
246 
247     const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
248     const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
249     const int db = 2 * SkOpts::raster_pipeline_highp_stride;
250     const int da = 3 * SkOpts::raster_pipeline_highp_stride;
251     for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
252         // `dg` should contain `dg | mask` in each lane.
253         REPORTER_ASSERT(r, dst[dg + index] == (initial[dg + index] | mask[index]));
254 
255         // `dr` and `db` should be unchanged.
256         REPORTER_ASSERT(r, dst[dr + index] == initial[dr + index]);
257         REPORTER_ASSERT(r, dst[db + index] == initial[db + index]);
258 
259         // `da` should contain `dr & dg & gb`.
260         REPORTER_ASSERT(r, dst[da + index] == (dst[dr+index] & dst[dg+index] & dst[db+index]));
261     }
262 }
263 
DEF_TEST(SkRasterPipeline_CaseOp,r)264 DEF_TEST(SkRasterPipeline_CaseOp, r) {
265     alignas(64) int32_t initial[]        = {~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,  // dr (condition)
266                                              0, ~0, ~0,  0, ~0, ~0,  0, ~0,  // dg (loop)
267                                             ~0,  0, ~0, ~0,  0,  0,  0, ~0,  // db (return)
268                                              0,  0, ~0,  0,  0,  0,  0, ~0}; // da (combined)
269     alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
270     static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp));
271 
272     constexpr int32_t actualValues[] = { 2,  1,  2,  4,  5,  2,  2,  8};
273     static_assert(std::size(actualValues) == SkRasterPipeline_kMaxStride_highp);
274 
275     alignas(64) int32_t caseOpData[2 * SkRasterPipeline_kMaxStride_highp];
276     for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
277         caseOpData[0 * SkOpts::raster_pipeline_highp_stride + index] = actualValues[index];
278         caseOpData[1 * SkOpts::raster_pipeline_highp_stride + index] = ~0;
279     }
280 
281     SkRasterPipeline_CaseOpCtx ctx;
282     ctx.ptr = caseOpData;
283     ctx.expectedValue = 2;
284 
285     SkRasterPipeline_<256> p;
286     p.append(SkRasterPipelineOp::load_dst, initial);
287     p.append(SkRasterPipelineOp::case_op, &ctx);
288     p.append(SkRasterPipelineOp::store_dst, dst);
289     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
290 
291     const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
292     const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
293     const int db = 2 * SkOpts::raster_pipeline_highp_stride;
294     const int da = 3 * SkOpts::raster_pipeline_highp_stride;
295     const int actualValueIdx = 0 * SkOpts::raster_pipeline_highp_stride;
296     const int defaultMaskIdx = 1 * SkOpts::raster_pipeline_highp_stride;
297 
298     for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
299         // `dg` should have been set to true for each lane containing 2.
300         int32_t expected = (actualValues[index] == 2) ? ~0 : initial[dg + index];
301         REPORTER_ASSERT(r, dst[dg + index] == expected);
302 
303         // `dr` and `db` should be unchanged.
304         REPORTER_ASSERT(r, dst[dr + index] == initial[dr + index]);
305         REPORTER_ASSERT(r, dst[db + index] == initial[db + index]);
306 
307         // `da` should contain `dr & dg & gb`.
308         REPORTER_ASSERT(r, dst[da + index] == (dst[dr+index] & dst[dg+index] & dst[db+index]));
309 
310         // The actual-value part of `caseOpData` should be unchanged from the inputs.
311         REPORTER_ASSERT(r, caseOpData[actualValueIdx + index] == actualValues[index]);
312 
313         // The default-mask part of `caseOpData` should have been zeroed where the values matched.
314         expected = (actualValues[index] == 2) ? 0 : ~0;
315         REPORTER_ASSERT(r, caseOpData[defaultMaskIdx + index] == expected);
316     }
317 }
318 
DEF_TEST(SkRasterPipeline_MaskOffLoopMask,r)319 DEF_TEST(SkRasterPipeline_MaskOffLoopMask, r) {
320     alignas(64) int32_t initial[]  = {~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,  // dr (condition)
321                                       ~0,  0, ~0, ~0,  0,  0,  0, ~0,  // dg (loop)
322                                       ~0, ~0,  0, ~0,  0,  0, ~0, ~0,  // db (return)
323                                       ~0,  0,  0, ~0,  0,  0,  0, ~0}; // da (combined)
324     alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
325     static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp));
326 
327     SkRasterPipeline_<256> p;
328     p.append(SkRasterPipelineOp::load_dst, initial);
329     p.append(SkRasterPipelineOp::mask_off_loop_mask);
330     p.append(SkRasterPipelineOp::store_dst, dst);
331     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
332 
333     const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
334     const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
335     const int db = 2 * SkOpts::raster_pipeline_highp_stride;
336     const int da = 3 * SkOpts::raster_pipeline_highp_stride;
337     for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
338         // `dg` should have masked off any lanes that are currently executing.
339         int32_t expected = initial[dg + index] & ~initial[da + index];
340         REPORTER_ASSERT(r, dst[dg + index] == expected);
341 
342         // `da` should contain `dr & dg & gb`.
343         expected = dst[dr + index] & dst[dg + index] & dst[db + index];
344         REPORTER_ASSERT(r, dst[da + index] == expected);
345     }
346 }
347 
DEF_TEST(SkRasterPipeline_MaskOffReturnMask,r)348 DEF_TEST(SkRasterPipeline_MaskOffReturnMask, r) {
349     alignas(64) int32_t initial[]  = {~0, ~0, ~0, ~0, ~0,  0, ~0, ~0,  // dr (condition)
350                                       ~0,  0, ~0, ~0,  0,  0,  0, ~0,  // dg (loop)
351                                       ~0, ~0,  0, ~0,  0,  0, ~0, ~0,  // db (return)
352                                       ~0,  0,  0, ~0,  0,  0,  0, ~0}; // da (combined)
353     alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
354     static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp));
355 
356     SkRasterPipeline_<256> p;
357     p.append(SkRasterPipelineOp::load_dst, initial);
358     p.append(SkRasterPipelineOp::mask_off_return_mask);
359     p.append(SkRasterPipelineOp::store_dst, dst);
360     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
361 
362     const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
363     const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
364     const int db = 2 * SkOpts::raster_pipeline_highp_stride;
365     const int da = 3 * SkOpts::raster_pipeline_highp_stride;
366     for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
367         // `db` should have masked off any lanes that are currently executing.
368         int32_t expected = initial[db + index] & ~initial[da + index];
369         REPORTER_ASSERT(r, dst[db + index] == expected);
370 
371         // `da` should contain `dr & dg & gb`.
372         expected = dst[dr + index] & dst[dg + index] & dst[db + index];
373         REPORTER_ASSERT(r, dst[da + index] == expected);
374     }
375 }
376 
DEF_TEST(SkRasterPipeline_InitLaneMasks,r)377 DEF_TEST(SkRasterPipeline_InitLaneMasks, r) {
378     for (size_t width = 1; width <= SkOpts::raster_pipeline_highp_stride; ++width) {
379         SkRasterPipeline_<256> p;
380 
381         // Initialize dRGBA to unrelated values.
382         SkRasterPipeline_UniformColorCtx uniformCtx;
383         uniformCtx.a = 0.0f;
384         uniformCtx.r = 0.25f;
385         uniformCtx.g = 0.50f;
386         uniformCtx.b = 0.75f;
387         p.append(SkRasterPipelineOp::uniform_color_dst, &uniformCtx);
388 
389         // Overwrite dRGB with lane masks up to the tail width.
390         p.append(SkRasterPipelineOp::init_lane_masks);
391 
392         // Use the store_dst command to write out dRGBA for inspection.
393         alignas(64) int32_t dRGBA[4 * SkRasterPipeline_kMaxStride_highp] = {};
394         p.append(SkRasterPipelineOp::store_dst, dRGBA);
395 
396         // Execute our program.
397         p.run(0,0,width,1);
398 
399         // Initialized data should look like on/on/on/on (RGBA are all set) and is
400         // striped by the raster pipeline stride because we wrote it using store_dst.
401         size_t index = 0;
402         int32_t* channelR = dRGBA;
403         int32_t* channelG = channelR + SkOpts::raster_pipeline_highp_stride;
404         int32_t* channelB = channelG + SkOpts::raster_pipeline_highp_stride;
405         int32_t* channelA = channelB + SkOpts::raster_pipeline_highp_stride;
406         for (; index < width; ++index) {
407             REPORTER_ASSERT(r, *channelR++ == ~0);
408             REPORTER_ASSERT(r, *channelG++ == ~0);
409             REPORTER_ASSERT(r, *channelB++ == ~0);
410             REPORTER_ASSERT(r, *channelA++ == ~0);
411         }
412 
413         // The rest of the output array should be untouched (all zero).
414         for (; index < SkOpts::raster_pipeline_highp_stride; ++index) {
415             REPORTER_ASSERT(r, *channelR++ == 0);
416             REPORTER_ASSERT(r, *channelG++ == 0);
417             REPORTER_ASSERT(r, *channelB++ == 0);
418             REPORTER_ASSERT(r, *channelA++ == 0);
419         }
420     }
421 }
422 
DEF_TEST(SkRasterPipeline_CopyFromIndirectMasked,r)423 DEF_TEST(SkRasterPipeline_CopyFromIndirectMasked, r) {
424     // Allocate space for 5 source slots, and 5 dest slots.
425     alignas(64) float src[5 * SkRasterPipeline_kMaxStride_highp];
426     alignas(64) float dst[5 * SkRasterPipeline_kMaxStride_highp];
427 
428     // Test with various mixes of indirect offsets.
429     static_assert(SkRasterPipeline_kMaxStride_highp == 8);
430     alignas(64) const uint32_t kOffsets1[8] = {0, 0, 0, 0, 0, 0, 0, 0};
431     alignas(64) const uint32_t kOffsets2[8] = {2, 2, 2, 2, 2, 2, 2, 2};
432     alignas(64) const uint32_t kOffsets3[8] = {0, 2, 0, 2, 0, 2, 0, 2};
433     alignas(64) const uint32_t kOffsets4[8] = {99, 99, 0, 0, 99, 99, 0, 0};
434 
435     alignas(64) const int32_t kMask1[8] = {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0};
436     alignas(64) const int32_t kMask2[8] = { 0,  0,  0,  0,  0,  0,  0,  0};
437     alignas(64) const int32_t kMask3[8] = {~0,  0, ~0, ~0, ~0, ~0,  0, ~0};
438     alignas(64) const int32_t kMask4[8] = { 0, ~0,  0,  0,  0, ~0, ~0,  0};
439 
440     const int N = SkOpts::raster_pipeline_highp_stride;
441 
442     for (const uint32_t* offsets : {kOffsets1, kOffsets2, kOffsets3, kOffsets4}) {
443         for (const int32_t* mask : {kMask1, kMask2, kMask3, kMask4}) {
444             for (int copySize = 1; copySize <= 5; ++copySize) {
445                 // Initialize the destination slots to 0,1,2.. and the source slots
446                 // to 1000,1001,1002...
447                 std::iota(&dst[0], &dst[5 * N], 0.0f);
448                 std::iota(&src[0], &src[5 * N],  1000.0f);
449 
450                 // Run `copy_from_indirect_masked` over our data.
451                 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
452                 SkRasterPipeline p(&alloc);
453                 auto* ctx = alloc.make<SkRasterPipeline_CopyIndirectCtx>();
454                 ctx->dst = &dst[0];
455                 ctx->src = &src[0];
456                 ctx->indirectOffset = offsets;
457                 ctx->indirectLimit = 5 - copySize;
458                 ctx->slots = copySize;
459 
460                 p.append(SkRasterPipelineOp::init_lane_masks);
461                 p.append(SkRasterPipelineOp::load_condition_mask, mask);
462                 p.append(SkRasterPipelineOp::copy_from_indirect_masked, ctx);
463                 p.run(0,0,N,1);
464 
465                 // If the offset plus copy-size would overflow the source data, the results don't
466                 // matter; indexing off the end of the buffer is UB, and we don't make any promises
467                 // about the values you get. If we didn't crash, that's success. (In practice, we
468                 // will have clamped the source pointer so that we don't read past the end.)
469                 int maxOffset = *std::max_element(offsets, offsets + N);
470                 if (copySize + maxOffset > 5) {
471                     continue;
472                 }
473 
474                 // Verify that the destination has been overwritten in the mask-on fields, and has
475                 // not been overwritten in the mask-off fields, for each destination slot.
476                 float expectedUnchanged = 0.0f;
477                 float expectedFromZero = src[0 * N], expectedFromTwo = src[2 * N];
478                 float* destPtr = dst;
479                 for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
480                     for (int checkLane = 0; checkLane < N; ++checkLane) {
481                         if (checkSlot < copySize && mask[checkLane]) {
482                             if (offsets[checkLane] == 0) {
483                                 REPORTER_ASSERT(r, *destPtr == expectedFromZero);
484                             } else if (offsets[checkLane] == 2) {
485                                 REPORTER_ASSERT(r, *destPtr == expectedFromTwo);
486                             } else {
487                                 ERRORF(r, "unexpected offset value");
488                             }
489                         } else {
490                             REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
491                         }
492 
493                         ++destPtr;
494                         expectedUnchanged += 1.0f;
495                         expectedFromZero += 1.0f;
496                         expectedFromTwo += 1.0f;
497                     }
498                 }
499             }
500         }
501     }
502 }
503 
DEF_TEST(SkRasterPipeline_CopySlotsMasked,r)504 DEF_TEST(SkRasterPipeline_CopySlotsMasked, r) {
505     // Allocate space for 5 source slots and 5 dest slots.
506     alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
507     const int srcIndex = 0, dstIndex = 5;
508 
509     struct CopySlotsOp {
510         SkRasterPipelineOp stage;
511         int numSlotsAffected;
512     };
513 
514     static const CopySlotsOp kCopyOps[] = {
515         {SkRasterPipelineOp::copy_slot_masked,    1},
516         {SkRasterPipelineOp::copy_2_slots_masked, 2},
517         {SkRasterPipelineOp::copy_3_slots_masked, 3},
518         {SkRasterPipelineOp::copy_4_slots_masked, 4},
519     };
520 
521     static_assert(SkRasterPipeline_kMaxStride_highp == 8);
522     alignas(64) const int32_t kMask1[8] = {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0};
523     alignas(64) const int32_t kMask2[8] = { 0,  0,  0,  0,  0,  0,  0,  0};
524     alignas(64) const int32_t kMask3[8] = {~0,  0, ~0, ~0, ~0, ~0,  0, ~0};
525     alignas(64) const int32_t kMask4[8] = { 0, ~0,  0,  0,  0, ~0, ~0,  0};
526 
527     const int N = SkOpts::raster_pipeline_highp_stride;
528 
529     for (const CopySlotsOp& op : kCopyOps) {
530         for (const int32_t* mask : {kMask1, kMask2, kMask3, kMask4}) {
531             // Initialize the destination slots to 0,1,2.. and the source slots to 1000,1001,1002...
532             std::iota(&slots[N * dstIndex],  &slots[N * (dstIndex + 5)], 0.0f);
533             std::iota(&slots[N * srcIndex],  &slots[N * (srcIndex + 5)], 1000.0f);
534 
535             // Run `copy_slots_masked` over our data.
536             SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
537             SkRasterPipeline p(&alloc);
538             auto* ctx = alloc.make<SkRasterPipeline_BinaryOpCtx>();
539             ctx->dst = &slots[N * dstIndex];
540             ctx->src = &slots[N * srcIndex];
541 
542             p.append(SkRasterPipelineOp::init_lane_masks);
543             p.append(SkRasterPipelineOp::load_condition_mask, mask);
544             p.append(op.stage, ctx);
545             p.run(0,0,N,1);
546 
547             // Verify that the destination has been overwritten in the mask-on fields, and has not
548             // been overwritten in the mask-off fields, for each destination slot.
549             float expectedUnchanged = 0.0f, expectedChanged = 1000.0f;
550             float* destPtr = &slots[N * dstIndex];
551             for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
552                 for (int checkMask = 0; checkMask < N; ++checkMask) {
553                     if (checkSlot < op.numSlotsAffected && mask[checkMask]) {
554                         REPORTER_ASSERT(r, *destPtr == expectedChanged);
555                     } else {
556                         REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
557                     }
558 
559                     ++destPtr;
560                     expectedUnchanged += 1.0f;
561                     expectedChanged += 1.0f;
562                 }
563             }
564         }
565     }
566 }
567 
DEF_TEST(SkRasterPipeline_CopySlotsUnmasked,r)568 DEF_TEST(SkRasterPipeline_CopySlotsUnmasked, r) {
569     // Allocate space for 5 source slots and 5 dest slots.
570     alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
571     const int srcIndex = 0, dstIndex = 5;
572     const int N = SkOpts::raster_pipeline_highp_stride;
573 
574     struct CopySlotsOp {
575         SkRasterPipelineOp stage;
576         int numSlotsAffected;
577     };
578 
579     static const CopySlotsOp kCopyOps[] = {
580         {SkRasterPipelineOp::copy_slot_unmasked,    1},
581         {SkRasterPipelineOp::copy_2_slots_unmasked, 2},
582         {SkRasterPipelineOp::copy_3_slots_unmasked, 3},
583         {SkRasterPipelineOp::copy_4_slots_unmasked, 4},
584     };
585 
586     for (const CopySlotsOp& op : kCopyOps) {
587         // Initialize the destination slots to 0,1,2.. and the source slots to 1000,1001,1002...
588         std::iota(&slots[N * dstIndex],  &slots[N * (dstIndex + 5)], 0.0f);
589         std::iota(&slots[N * srcIndex],  &slots[N * (srcIndex + 5)], 1000.0f);
590 
591         // Run `copy_slots_unmasked` over our data.
592         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
593         SkRasterPipeline p(&alloc);
594         auto* ctx = alloc.make<SkRasterPipeline_BinaryOpCtx>();
595         ctx->dst = &slots[N * dstIndex];
596         ctx->src = &slots[N * srcIndex];
597         p.append(op.stage, ctx);
598         p.run(0,0,1,1);
599 
600         // Verify that the destination has been overwritten in each slot.
601         float expectedUnchanged = 0.0f, expectedChanged = 1000.0f;
602         float* destPtr = &slots[N * dstIndex];
603         for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
604             for (int checkLane = 0; checkLane < N; ++checkLane) {
605                 if (checkSlot < op.numSlotsAffected) {
606                     REPORTER_ASSERT(r, *destPtr == expectedChanged);
607                 } else {
608                     REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
609                 }
610 
611                 ++destPtr;
612                 expectedUnchanged += 1.0f;
613                 expectedChanged += 1.0f;
614             }
615         }
616     }
617 }
618 
DEF_TEST(SkRasterPipeline_ZeroSlotsUnmasked,r)619 DEF_TEST(SkRasterPipeline_ZeroSlotsUnmasked, r) {
620     // Allocate space for 5 dest slots.
621     alignas(64) float slots[5 * SkRasterPipeline_kMaxStride_highp];
622     const int N = SkOpts::raster_pipeline_highp_stride;
623 
624     struct ZeroSlotsOp {
625         SkRasterPipelineOp stage;
626         int numSlotsAffected;
627     };
628 
629     static const ZeroSlotsOp kZeroOps[] = {
630         {SkRasterPipelineOp::zero_slot_unmasked,    1},
631         {SkRasterPipelineOp::zero_2_slots_unmasked, 2},
632         {SkRasterPipelineOp::zero_3_slots_unmasked, 3},
633         {SkRasterPipelineOp::zero_4_slots_unmasked, 4},
634     };
635 
636     for (const ZeroSlotsOp& op : kZeroOps) {
637         // Initialize the destination slots to 1,2,3...
638         std::iota(&slots[0], &slots[5 * N], 1.0f);
639 
640         // Run `zero_slots_unmasked` over our data.
641         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
642         SkRasterPipeline p(&alloc);
643         p.append(op.stage, &slots[0]);
644         p.run(0,0,1,1);
645 
646         // Verify that the destination has been zeroed out in each slot.
647         float expectedUnchanged = 1.0f;
648         float* destPtr = &slots[0];
649         for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
650             for (int checkLane = 0; checkLane < N; ++checkLane) {
651                 if (checkSlot < op.numSlotsAffected) {
652                     REPORTER_ASSERT(r, *destPtr == 0.0f);
653                 } else {
654                     REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
655                 }
656 
657                 ++destPtr;
658                 expectedUnchanged += 1.0f;
659             }
660         }
661     }
662 }
663 
DEF_TEST(SkRasterPipeline_CopyConstants,r)664 DEF_TEST(SkRasterPipeline_CopyConstants, r) {
665     // Allocate space for 5 dest slots.
666     alignas(64) float slots[5 * SkRasterPipeline_kMaxStride_highp];
667     float constants[5];
668     const int N = SkOpts::raster_pipeline_highp_stride;
669 
670     struct CopySlotsOp {
671         SkRasterPipelineOp stage;
672         int numSlotsAffected;
673     };
674 
675     static const CopySlotsOp kCopyOps[] = {
676         {SkRasterPipelineOp::copy_constant,    1},
677         {SkRasterPipelineOp::copy_2_constants, 2},
678         {SkRasterPipelineOp::copy_3_constants, 3},
679         {SkRasterPipelineOp::copy_4_constants, 4},
680     };
681 
682     for (const CopySlotsOp& op : kCopyOps) {
683         // Initialize the destination slots to 1,2,3...
684         std::iota(&slots[0], &slots[5 * N], 1.0f);
685         // Initialize the constant buffer to 1000,1001,1002...
686         std::iota(&constants[0], &constants[5], 1000.0f);
687 
688         // Run `copy_constants` over our data.
689         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
690         SkRasterPipeline p(&alloc);
691         auto* ctx = alloc.make<SkRasterPipeline_BinaryOpCtx>();
692         ctx->dst = slots;
693         ctx->src = constants;
694         p.append(op.stage, ctx);
695         p.run(0,0,1,1);
696 
697         // Verify that our constants have been broadcast into each slot.
698         float expectedUnchanged = 1.0f;
699         float expectedChanged = 1000.0f;
700         float* destPtr = &slots[0];
701         for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
702             for (int checkLane = 0; checkLane < N; ++checkLane) {
703                 if (checkSlot < op.numSlotsAffected) {
704                     REPORTER_ASSERT(r, *destPtr == expectedChanged);
705                 } else {
706                     REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
707                 }
708 
709                 ++destPtr;
710                 expectedUnchanged += 1.0f;
711             }
712             expectedChanged += 1.0f;
713         }
714     }
715 }
716 
DEF_TEST(SkRasterPipeline_Swizzle,r)717 DEF_TEST(SkRasterPipeline_Swizzle, r) {
718     // Allocate space for 4 dest slots.
719     alignas(64) float slots[4 * SkRasterPipeline_kMaxStride_highp];
720     const int N = SkOpts::raster_pipeline_highp_stride;
721 
722     struct TestPattern {
723         SkRasterPipelineOp stage;
724         uint16_t swizzle[4];
725         uint16_t expectation[4];
726     };
727     static const TestPattern kPatterns[] = {
728         {SkRasterPipelineOp::swizzle_1, {3},          {3, 1, 2, 3}}, // (1,2,3,4).w    = (4)
729         {SkRasterPipelineOp::swizzle_2, {1, 0},       {1, 0, 2, 3}}, // (1,2,3,4).yx   = (2,1)
730         {SkRasterPipelineOp::swizzle_3, {2, 2, 2},    {2, 2, 2, 3}}, // (1,2,3,4).zzz  = (3,3,3)
731         {SkRasterPipelineOp::swizzle_4, {0, 0, 1, 2}, {0, 0, 1, 2}}, // (1,2,3,4).xxyz = (1,1,2,3)
732     };
733     static_assert(sizeof(TestPattern::swizzle) == sizeof(SkRasterPipeline_SwizzleCtx::offsets));
734 
735     for (const TestPattern& pattern : kPatterns) {
736         // Initialize the destination slots to 0,1,2,3...
737         std::iota(&slots[0], &slots[4 * N], 0.0f);
738 
739         // Apply the test-pattern swizzle.
740         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
741         SkRasterPipeline p(&alloc);
742         SkRasterPipeline_SwizzleCtx ctx;
743         ctx.ptr = slots;
744         for (size_t index = 0; index < std::size(ctx.offsets); ++index) {
745             ctx.offsets[index] = pattern.swizzle[index] * N * sizeof(float);
746         }
747         p.append(pattern.stage, &ctx);
748         p.run(0,0,1,1);
749 
750         // Verify that the swizzle has been applied in each slot.
751         float* destPtr = &slots[0];
752         for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
753             float expected = pattern.expectation[checkSlot] * N;
754             for (int checkLane = 0; checkLane < N; ++checkLane) {
755                 REPORTER_ASSERT(r, *destPtr == expected);
756 
757                 ++destPtr;
758                 expected += 1.0f;
759             }
760         }
761     }
762 }
763 
DEF_TEST(SkRasterPipeline_SwizzleCopy,r)764 DEF_TEST(SkRasterPipeline_SwizzleCopy, r) {
765     const int N = SkOpts::raster_pipeline_highp_stride;
766 
767     struct TestPattern {
768         SkRasterPipelineOp op;
769         uint16_t swizzle[4];
770         uint16_t expectation[4];
771     };
772     constexpr uint16_t _ = ~0;
773     static const TestPattern kPatterns[] = {
774         {SkRasterPipelineOp::swizzle_copy_slot_masked,    {3,_,_,_}, {_,_,_,0}},//v.w    = (1)
775         {SkRasterPipelineOp::swizzle_copy_2_slots_masked, {1,0,_,_}, {1,0,_,_}},//v.yx   = (1,2)
776         {SkRasterPipelineOp::swizzle_copy_3_slots_masked, {2,3,0,_}, {2,_,0,1}},//v.zwy  = (1,2,3)
777         {SkRasterPipelineOp::swizzle_copy_4_slots_masked, {3,0,1,2}, {1,2,3,0}},//v.wxyz = (1,2,3,4)
778     };
779     static_assert(sizeof(TestPattern::swizzle) == sizeof(SkRasterPipeline_SwizzleCopyCtx::offsets));
780 
781     for (const TestPattern& pattern : kPatterns) {
782         // Allocate space for 4 dest slots, and initialize them to zero.
783         alignas(64) float dest[4 * SkRasterPipeline_kMaxStride_highp] = {};
784 
785         // Allocate 4 source slots and initialize them to 1, 2, 3, 4...
786         alignas(64) float source[4 * SkRasterPipeline_kMaxStride_highp] = {};
787         std::iota(&source[0 * N], &source[4 * N], 1.0f);
788 
789         // Apply the dest-swizzle pattern.
790         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
791         SkRasterPipeline p(&alloc);
792         SkRasterPipeline_SwizzleCopyCtx ctx = {};
793         ctx.src = source;
794         ctx.dst = dest;
795         for (size_t index = 0; index < std::size(ctx.offsets); ++index) {
796             if (pattern.swizzle[index] != _) {
797                 ctx.offsets[index] = pattern.swizzle[index] * N * sizeof(float);
798             }
799         }
800         p.append(SkRasterPipelineOp::init_lane_masks);
801         p.append(pattern.op, &ctx);
802         p.run(0,0,N,1);
803 
804         // Verify that the swizzle has been applied in each slot.
805         float* destPtr = &dest[0];
806         for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
807             for (int checkLane = 0; checkLane < N; ++checkLane) {
808                 if (pattern.expectation[checkSlot] == _) {
809                     REPORTER_ASSERT(r, *destPtr == 0);
810                 } else {
811                     int expectedIdx = pattern.expectation[checkSlot] * N + checkLane;
812                     REPORTER_ASSERT(r, *destPtr == source[expectedIdx]);
813                 }
814 
815                 ++destPtr;
816             }
817         }
818     }
819 }
820 
DEF_TEST(SkRasterPipeline_Shuffle,r)821 DEF_TEST(SkRasterPipeline_Shuffle, r) {
822     // Allocate space for 16 dest slots.
823     alignas(64) float slots[16 * SkRasterPipeline_kMaxStride_highp];
824     const int N = SkOpts::raster_pipeline_highp_stride;
825 
826     struct TestPattern {
827         int count;
828         uint16_t shuffle[16];
829         uint16_t expectation[16];
830     };
831     static const TestPattern kPatterns[] = {
832         {9,  { 0,  3,  6,
833                1,  4,  7,
834                2,  5,  8, /* past end: */  0,  0,  0,  0,  0,  0,  0},
835              { 0,  3,  6,
836                1,  4,  7,
837                2,  5,  8, /* unchanged: */ 9, 10, 11, 12, 13, 14, 15}},
838         {16, { 0,  4,  8, 12,
839                1,  5,  9, 13,
840                2,  6, 10, 14,
841                3,  7, 11, 15},
842              { 0,  4,  8, 12,
843                1,  5,  9, 13,
844                2,  6, 10, 14,
845                3,  7, 11, 15}},
846     };
847     static_assert(sizeof(TestPattern::shuffle) == sizeof(SkRasterPipeline_ShuffleCtx::offsets));
848 
849     for (const TestPattern& pattern : kPatterns) {
850         // Initialize the destination slots to 1,2,3...
851         std::iota(&slots[0], &slots[16 * N], 1.0f);
852 
853         // Apply the shuffle.
854         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
855         SkRasterPipeline p(&alloc);
856         SkRasterPipeline_ShuffleCtx ctx;
857         ctx.ptr = slots;
858         ctx.count = pattern.count;
859         for (size_t index = 0; index < std::size(ctx.offsets); ++index) {
860             ctx.offsets[index] = pattern.shuffle[index] * N * sizeof(float);
861         }
862         p.append(SkRasterPipelineOp::shuffle, &ctx);
863         p.run(0,0,1,1);
864 
865         // Verify that the shuffle has been applied in each slot.
866         float* destPtr = &slots[0];
867         for (int checkSlot = 0; checkSlot < 16; ++checkSlot) {
868             float expected = pattern.expectation[checkSlot] * N + 1;
869             for (int checkLane = 0; checkLane < N; ++checkLane) {
870                 REPORTER_ASSERT(r, *destPtr == expected);
871 
872                 ++destPtr;
873                 expected += 1.0f;
874             }
875         }
876     }
877 }
878 
DEF_TEST(SkRasterPipeline_FloatArithmeticWithNSlots,r)879 DEF_TEST(SkRasterPipeline_FloatArithmeticWithNSlots, r) {
880     // Allocate space for 5 dest and 5 source slots.
881     alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
882     const int N = SkOpts::raster_pipeline_highp_stride;
883 
884     struct ArithmeticOp {
885         SkRasterPipelineOp stage;
886         std::function<float(float, float)> verify;
887     };
888 
889     static const ArithmeticOp kArithmeticOps[] = {
890         {SkRasterPipelineOp::add_n_floats, [](float a, float b) { return a + b; }},
891         {SkRasterPipelineOp::sub_n_floats, [](float a, float b) { return a - b; }},
892         {SkRasterPipelineOp::mul_n_floats, [](float a, float b) { return a * b; }},
893         {SkRasterPipelineOp::div_n_floats, [](float a, float b) { return a / b; }},
894     };
895 
896     for (const ArithmeticOp& op : kArithmeticOps) {
897         for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) {
898             // Initialize the slot values to 1,2,3...
899             std::iota(&slots[0], &slots[10 * N], 1.0f);
900 
901             // Run the arithmetic op over our data.
902             SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
903             SkRasterPipeline p(&alloc);
904             auto* ctx = alloc.make<SkRasterPipeline_BinaryOpCtx>();
905             ctx->dst = &slots[0];
906             ctx->src = &slots[numSlotsAffected * N];
907             p.append(op.stage, ctx);
908             p.run(0,0,1,1);
909 
910             // Verify that the affected slots now equal (1,2,3...) op (4,5,6...).
911             float leftValue = 1.0f;
912             float rightValue = float(numSlotsAffected * N) + 1.0f;
913             float* destPtr = &slots[0];
914             for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
915                 for (int checkLane = 0; checkLane < N; ++checkLane) {
916                     if (checkSlot < numSlotsAffected) {
917                         REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue));
918                     } else {
919                         REPORTER_ASSERT(r, *destPtr == leftValue);
920                     }
921 
922                     ++destPtr;
923                     leftValue += 1.0f;
924                     rightValue += 1.0f;
925                 }
926             }
927         }
928     }
929 }
930 
DEF_TEST(SkRasterPipeline_FloatArithmeticWithHardcodedSlots,r)931 DEF_TEST(SkRasterPipeline_FloatArithmeticWithHardcodedSlots, r) {
932     // Allocate space for 5 dest and 5 source slots.
933     alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
934     const int N = SkOpts::raster_pipeline_highp_stride;
935 
936     struct ArithmeticOp {
937         SkRasterPipelineOp stage;
938         int numSlotsAffected;
939         std::function<float(float, float)> verify;
940     };
941 
942     static const ArithmeticOp kArithmeticOps[] = {
943         {SkRasterPipelineOp::add_float,    1, [](float a, float b) { return a + b; }},
944         {SkRasterPipelineOp::sub_float,    1, [](float a, float b) { return a - b; }},
945         {SkRasterPipelineOp::mul_float,    1, [](float a, float b) { return a * b; }},
946         {SkRasterPipelineOp::div_float,    1, [](float a, float b) { return a / b; }},
947 
948         {SkRasterPipelineOp::add_2_floats, 2, [](float a, float b) { return a + b; }},
949         {SkRasterPipelineOp::sub_2_floats, 2, [](float a, float b) { return a - b; }},
950         {SkRasterPipelineOp::mul_2_floats, 2, [](float a, float b) { return a * b; }},
951         {SkRasterPipelineOp::div_2_floats, 2, [](float a, float b) { return a / b; }},
952 
953         {SkRasterPipelineOp::add_3_floats, 3, [](float a, float b) { return a + b; }},
954         {SkRasterPipelineOp::sub_3_floats, 3, [](float a, float b) { return a - b; }},
955         {SkRasterPipelineOp::mul_3_floats, 3, [](float a, float b) { return a * b; }},
956         {SkRasterPipelineOp::div_3_floats, 3, [](float a, float b) { return a / b; }},
957 
958         {SkRasterPipelineOp::add_4_floats, 4, [](float a, float b) { return a + b; }},
959         {SkRasterPipelineOp::sub_4_floats, 4, [](float a, float b) { return a - b; }},
960         {SkRasterPipelineOp::mul_4_floats, 4, [](float a, float b) { return a * b; }},
961         {SkRasterPipelineOp::div_4_floats, 4, [](float a, float b) { return a / b; }},
962     };
963 
964     for (const ArithmeticOp& op : kArithmeticOps) {
965         // Initialize the slot values to 1,2,3...
966         std::iota(&slots[0], &slots[10 * N], 1.0f);
967 
968         // Run the arithmetic op over our data.
969         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
970         SkRasterPipeline p(&alloc);
971         p.append(op.stage, &slots[0]);
972         p.run(0,0,1,1);
973 
974         // Verify that the affected slots now equal (1,2,3...) op (4,5,6...).
975         float leftValue = 1.0f;
976         float rightValue = float(op.numSlotsAffected * N) + 1.0f;
977         float* destPtr = &slots[0];
978         for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
979             for (int checkLane = 0; checkLane < N; ++checkLane) {
980                 if (checkSlot < op.numSlotsAffected) {
981                     REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue));
982                 } else {
983                     REPORTER_ASSERT(r, *destPtr == leftValue);
984                 }
985 
986                 ++destPtr;
987                 leftValue += 1.0f;
988                 rightValue += 1.0f;
989             }
990         }
991     }
992 }
993 
divide_unsigned(int a,int b)994 static int divide_unsigned(int a, int b) { return int(uint32_t(a) / uint32_t(b)); }
min_unsigned(int a,int b)995 static int min_unsigned   (int a, int b) { return uint32_t(a) < uint32_t(b) ? a : b; }
max_unsigned(int a,int b)996 static int max_unsigned   (int a, int b) { return uint32_t(a) > uint32_t(b) ? a : b; }
997 
DEF_TEST(SkRasterPipeline_IntArithmeticWithNSlots,r)998 DEF_TEST(SkRasterPipeline_IntArithmeticWithNSlots, r) {
999     // Allocate space for 5 dest and 5 source slots.
1000     alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp];
1001     const int N = SkOpts::raster_pipeline_highp_stride;
1002 
1003     struct ArithmeticOp {
1004         SkRasterPipelineOp stage;
1005         std::function<int(int, int)> verify;
1006     };
1007 
1008     static const ArithmeticOp kArithmeticOps[] = {
1009         {SkRasterPipelineOp::add_n_ints,         [](int a, int b) { return a + b; }},
1010         {SkRasterPipelineOp::sub_n_ints,         [](int a, int b) { return a - b; }},
1011         {SkRasterPipelineOp::mul_n_ints,         [](int a, int b) { return a * b; }},
1012         {SkRasterPipelineOp::div_n_ints,         [](int a, int b) { return a / b; }},
1013         {SkRasterPipelineOp::div_n_uints,        divide_unsigned},
1014         {SkRasterPipelineOp::bitwise_and_n_ints, [](int a, int b) { return a & b; }},
1015         {SkRasterPipelineOp::bitwise_or_n_ints,  [](int a, int b) { return a | b; }},
1016         {SkRasterPipelineOp::bitwise_xor_n_ints, [](int a, int b) { return a ^ b; }},
1017         {SkRasterPipelineOp::min_n_ints,         [](int a, int b) { return a < b ? a : b; }},
1018         {SkRasterPipelineOp::min_n_uints,        min_unsigned},
1019         {SkRasterPipelineOp::max_n_ints,         [](int a, int b) { return a > b ? a : b; }},
1020         {SkRasterPipelineOp::max_n_uints,        max_unsigned},
1021     };
1022 
1023     for (const ArithmeticOp& op : kArithmeticOps) {
1024         for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) {
1025             // Initialize the slot values to 1,2,3...
1026             std::iota(&slots[0], &slots[10 * N], 1);
1027             int leftValue = slots[0];
1028             int rightValue = slots[numSlotsAffected * N];
1029 
1030             // Run the op (e.g. `add_n_ints`) over our data.
1031             SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1032             SkRasterPipeline p(&alloc);
1033             auto* ctx = alloc.make<SkRasterPipeline_BinaryOpCtx>();
1034             ctx->dst = (float*)&slots[0];
1035             ctx->src = (float*)&slots[numSlotsAffected * N];
1036             p.append(op.stage, ctx);
1037             p.run(0,0,1,1);
1038 
1039             // Verify that the affected slots now equal (1,2,3...) op (4,5,6...).
1040             int* destPtr = &slots[0];
1041             for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1042                 for (int checkLane = 0; checkLane < N; ++checkLane) {
1043                     if (checkSlot < numSlotsAffected) {
1044                         REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue));
1045                     } else {
1046                         REPORTER_ASSERT(r, *destPtr == leftValue);
1047                     }
1048 
1049                     ++destPtr;
1050                     leftValue += 1;
1051                     rightValue += 1;
1052                 }
1053             }
1054         }
1055     }
1056 }
1057 
DEF_TEST(SkRasterPipeline_IntArithmeticWithHardcodedSlots,r)1058 DEF_TEST(SkRasterPipeline_IntArithmeticWithHardcodedSlots, r) {
1059     // Allocate space for 5 dest and 5 source slots.
1060     alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp];
1061     const int N = SkOpts::raster_pipeline_highp_stride;
1062 
1063     struct ArithmeticOp {
1064         SkRasterPipelineOp stage;
1065         int numSlotsAffected;
1066         std::function<int(int, int)> verify;
1067     };
1068 
1069     static const ArithmeticOp kArithmeticOps[] = {
1070         {SkRasterPipelineOp::add_int,            1, [](int a, int b) { return a + b; }},
1071         {SkRasterPipelineOp::sub_int,            1, [](int a, int b) { return a - b; }},
1072         {SkRasterPipelineOp::mul_int,            1, [](int a, int b) { return a * b; }},
1073         {SkRasterPipelineOp::div_int,            1, [](int a, int b) { return a / b; }},
1074         {SkRasterPipelineOp::div_uint,           1, divide_unsigned},
1075         {SkRasterPipelineOp::bitwise_and_int,    1, [](int a, int b) { return a & b; }},
1076         {SkRasterPipelineOp::bitwise_or_int,     1, [](int a, int b) { return a | b; }},
1077         {SkRasterPipelineOp::bitwise_xor_int,    1, [](int a, int b) { return a ^ b; }},
1078         {SkRasterPipelineOp::min_int,            1, [](int a, int b) { return a < b ? a: b; }},
1079         {SkRasterPipelineOp::min_uint,           1, min_unsigned},
1080         {SkRasterPipelineOp::max_int,            1, [](int a, int b) { return a > b ? a: b; }},
1081         {SkRasterPipelineOp::max_uint,           1, max_unsigned},
1082 
1083         {SkRasterPipelineOp::add_2_ints,         2, [](int a, int b) { return a + b; }},
1084         {SkRasterPipelineOp::sub_2_ints,         2, [](int a, int b) { return a - b; }},
1085         {SkRasterPipelineOp::mul_2_ints,         2, [](int a, int b) { return a * b; }},
1086         {SkRasterPipelineOp::div_2_ints,         2, [](int a, int b) { return a / b; }},
1087         {SkRasterPipelineOp::div_2_uints,        2, divide_unsigned},
1088         {SkRasterPipelineOp::bitwise_and_2_ints, 2, [](int a, int b) { return a & b; }},
1089         {SkRasterPipelineOp::bitwise_or_2_ints,  2, [](int a, int b) { return a | b; }},
1090         {SkRasterPipelineOp::bitwise_xor_2_ints, 2, [](int a, int b) { return a ^ b; }},
1091         {SkRasterPipelineOp::min_2_ints,         2, [](int a, int b) { return a < b ? a: b; }},
1092         {SkRasterPipelineOp::min_2_uints,        2, min_unsigned},
1093         {SkRasterPipelineOp::max_2_ints,         2, [](int a, int b) { return a > b ? a: b; }},
1094         {SkRasterPipelineOp::max_2_uints,        2, max_unsigned},
1095 
1096         {SkRasterPipelineOp::add_3_ints,         3, [](int a, int b) { return a + b; }},
1097         {SkRasterPipelineOp::sub_3_ints,         3, [](int a, int b) { return a - b; }},
1098         {SkRasterPipelineOp::mul_3_ints,         3, [](int a, int b) { return a * b; }},
1099         {SkRasterPipelineOp::div_3_ints,         3, [](int a, int b) { return a / b; }},
1100         {SkRasterPipelineOp::div_3_uints,        3, divide_unsigned},
1101         {SkRasterPipelineOp::bitwise_and_3_ints, 3, [](int a, int b) { return a & b; }},
1102         {SkRasterPipelineOp::bitwise_or_3_ints,  3, [](int a, int b) { return a | b; }},
1103         {SkRasterPipelineOp::bitwise_xor_3_ints, 3, [](int a, int b) { return a ^ b; }},
1104         {SkRasterPipelineOp::min_3_ints,         3, [](int a, int b) { return a < b ? a: b; }},
1105         {SkRasterPipelineOp::min_3_uints,        3, min_unsigned},
1106         {SkRasterPipelineOp::max_3_ints,         3, [](int a, int b) { return a > b ? a: b; }},
1107         {SkRasterPipelineOp::max_3_uints,        3, max_unsigned},
1108 
1109         {SkRasterPipelineOp::add_4_ints,         4, [](int a, int b) { return a + b; }},
1110         {SkRasterPipelineOp::sub_4_ints,         4, [](int a, int b) { return a - b; }},
1111         {SkRasterPipelineOp::mul_4_ints,         4, [](int a, int b) { return a * b; }},
1112         {SkRasterPipelineOp::div_4_ints,         4, [](int a, int b) { return a / b; }},
1113         {SkRasterPipelineOp::div_4_uints,        4, divide_unsigned},
1114         {SkRasterPipelineOp::bitwise_and_4_ints, 4, [](int a, int b) { return a & b; }},
1115         {SkRasterPipelineOp::bitwise_or_4_ints,  4, [](int a, int b) { return a | b; }},
1116         {SkRasterPipelineOp::bitwise_xor_4_ints, 4, [](int a, int b) { return a ^ b; }},
1117         {SkRasterPipelineOp::min_4_ints,         4, [](int a, int b) { return a < b ? a: b; }},
1118         {SkRasterPipelineOp::min_4_uints,        4, min_unsigned},
1119         {SkRasterPipelineOp::max_4_ints,         4, [](int a, int b) { return a > b ? a: b; }},
1120         {SkRasterPipelineOp::max_4_uints,        4, max_unsigned},
1121     };
1122 
1123     for (const ArithmeticOp& op : kArithmeticOps) {
1124         // Initialize the slot values to 1,2,3...
1125         std::iota(&slots[0], &slots[10 * N], 1);
1126         int leftValue = slots[0];
1127         int rightValue = slots[op.numSlotsAffected * N];
1128 
1129         // Run the op (e.g. `add_2_ints`) over our data.
1130         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1131         SkRasterPipeline p(&alloc);
1132         p.append(op.stage, &slots[0]);
1133         p.run(0,0,1,1);
1134 
1135         // Verify that the affected slots now equal (1,2,3...) op (4,5,6...).
1136         int* destPtr = &slots[0];
1137         for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1138             for (int checkLane = 0; checkLane < N; ++checkLane) {
1139                 if (checkSlot < op.numSlotsAffected) {
1140                     REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue));
1141                 } else {
1142                     REPORTER_ASSERT(r, *destPtr == leftValue);
1143                 }
1144 
1145                 ++destPtr;
1146                 leftValue += 1;
1147                 rightValue += 1;
1148             }
1149         }
1150     }
1151 }
1152 
DEF_TEST(SkRasterPipeline_CompareFloatsWithNSlots,r)1153 DEF_TEST(SkRasterPipeline_CompareFloatsWithNSlots, r) {
1154     // Allocate space for 5 dest and 5 source slots.
1155     alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
1156     const int N = SkOpts::raster_pipeline_highp_stride;
1157 
1158     struct CompareOp {
1159         SkRasterPipelineOp stage;
1160         std::function<bool(float, float)> verify;
1161     };
1162 
1163     static const CompareOp kCompareOps[] = {
1164         {SkRasterPipelineOp::cmpeq_n_floats, [](float a, float b) { return a == b; }},
1165         {SkRasterPipelineOp::cmpne_n_floats, [](float a, float b) { return a != b; }},
1166         {SkRasterPipelineOp::cmplt_n_floats, [](float a, float b) { return a <  b; }},
1167         {SkRasterPipelineOp::cmple_n_floats, [](float a, float b) { return a <= b; }},
1168     };
1169 
1170     for (const CompareOp& op : kCompareOps) {
1171         for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) {
1172             // Initialize the slot values to 0,1,2,0,1,2,0,1,2...
1173             for (int index = 0; index < 10 * N; ++index) {
1174                 slots[index] = std::fmod(index, 3.0f);
1175             }
1176 
1177             float leftValue  = slots[0];
1178             float rightValue = slots[numSlotsAffected * N];
1179 
1180             // Run the comparison op over our data.
1181             SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1182             SkRasterPipeline p(&alloc);
1183             auto* ctx = alloc.make<SkRasterPipeline_BinaryOpCtx>();
1184             ctx->dst = &slots[0];
1185             ctx->src = &slots[numSlotsAffected * N];
1186             p.append(op.stage, ctx);
1187             p.run(0, 0, 1, 1);
1188 
1189             // Verify that the affected slots now contain "(0,1,2,0...) op (1,2,0,1...)".
1190             float* destPtr = &slots[0];
1191             for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1192                 for (int checkLane = 0; checkLane < N; ++checkLane) {
1193                     if (checkSlot < numSlotsAffected) {
1194                         bool compareIsTrue = op.verify(leftValue, rightValue);
1195                         REPORTER_ASSERT(r, *(int*)destPtr == (compareIsTrue ? ~0 : 0));
1196                     } else {
1197                         REPORTER_ASSERT(r, *destPtr == leftValue);
1198                     }
1199 
1200                     ++destPtr;
1201                     leftValue = std::fmod(leftValue + 1.0f, 3.0f);
1202                     rightValue = std::fmod(rightValue + 1.0f, 3.0f);
1203                 }
1204             }
1205         }
1206     }
1207 }
1208 
DEF_TEST(SkRasterPipeline_CompareFloatsWithHardcodedSlots,r)1209 DEF_TEST(SkRasterPipeline_CompareFloatsWithHardcodedSlots, r) {
1210     // Allocate space for 5 dest and 5 source slots.
1211     alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
1212     const int N = SkOpts::raster_pipeline_highp_stride;
1213 
1214     struct CompareOp {
1215         SkRasterPipelineOp stage;
1216         int numSlotsAffected;
1217         std::function<bool(float, float)> verify;
1218     };
1219 
1220     static const CompareOp kCompareOps[] = {
1221         {SkRasterPipelineOp::cmpeq_float,    1, [](float a, float b) { return a == b; }},
1222         {SkRasterPipelineOp::cmpne_float,    1, [](float a, float b) { return a != b; }},
1223         {SkRasterPipelineOp::cmplt_float,    1, [](float a, float b) { return a <  b; }},
1224         {SkRasterPipelineOp::cmple_float,    1, [](float a, float b) { return a <= b; }},
1225 
1226         {SkRasterPipelineOp::cmpeq_2_floats, 2, [](float a, float b) { return a == b; }},
1227         {SkRasterPipelineOp::cmpne_2_floats, 2, [](float a, float b) { return a != b; }},
1228         {SkRasterPipelineOp::cmplt_2_floats, 2, [](float a, float b) { return a <  b; }},
1229         {SkRasterPipelineOp::cmple_2_floats, 2, [](float a, float b) { return a <= b; }},
1230 
1231         {SkRasterPipelineOp::cmpeq_3_floats, 3, [](float a, float b) { return a == b; }},
1232         {SkRasterPipelineOp::cmpne_3_floats, 3, [](float a, float b) { return a != b; }},
1233         {SkRasterPipelineOp::cmplt_3_floats, 3, [](float a, float b) { return a <  b; }},
1234         {SkRasterPipelineOp::cmple_3_floats, 3, [](float a, float b) { return a <= b; }},
1235 
1236         {SkRasterPipelineOp::cmpeq_4_floats, 4, [](float a, float b) { return a == b; }},
1237         {SkRasterPipelineOp::cmpne_4_floats, 4, [](float a, float b) { return a != b; }},
1238         {SkRasterPipelineOp::cmplt_4_floats, 4, [](float a, float b) { return a <  b; }},
1239         {SkRasterPipelineOp::cmple_4_floats, 4, [](float a, float b) { return a <= b; }},
1240     };
1241 
1242     for (const CompareOp& op : kCompareOps) {
1243         // Initialize the slot values to 0,1,2,0,1,2,0,1,2...
1244         for (int index = 0; index < 10 * N; ++index) {
1245             slots[index] = std::fmod(index, 3.0f);
1246         }
1247 
1248         float leftValue  = slots[0];
1249         float rightValue = slots[op.numSlotsAffected * N];
1250 
1251         // Run the comparison op over our data.
1252         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1253         SkRasterPipeline p(&alloc);
1254         p.append(op.stage, &slots[0]);
1255         p.run(0, 0, 1, 1);
1256 
1257         // Verify that the affected slots now contain "(0,1,2,0...) op (1,2,0,1...)".
1258         float* destPtr = &slots[0];
1259         for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1260             for (int checkLane = 0; checkLane < N; ++checkLane) {
1261                 if (checkSlot < op.numSlotsAffected) {
1262                     bool compareIsTrue = op.verify(leftValue, rightValue);
1263                     REPORTER_ASSERT(r, *(int*)destPtr == (compareIsTrue ? ~0 : 0));
1264                 } else {
1265                     REPORTER_ASSERT(r, *destPtr == leftValue);
1266                 }
1267 
1268                 ++destPtr;
1269                 leftValue = std::fmod(leftValue + 1.0f, 3.0f);
1270                 rightValue = std::fmod(rightValue + 1.0f, 3.0f);
1271             }
1272         }
1273     }
1274 }
1275 
compare_lt_uint(int a,int b)1276 static bool compare_lt_uint  (int a, int b) { return uint32_t(a) <  uint32_t(b); }
compare_lteq_uint(int a,int b)1277 static bool compare_lteq_uint(int a, int b) { return uint32_t(a) <= uint32_t(b); }
1278 
DEF_TEST(SkRasterPipeline_CompareIntsWithNSlots,r)1279 DEF_TEST(SkRasterPipeline_CompareIntsWithNSlots, r) {
1280     // Allocate space for 5 dest and 5 source slots.
1281     alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp];
1282     const int N = SkOpts::raster_pipeline_highp_stride;
1283 
1284     struct CompareOp {
1285         SkRasterPipelineOp stage;
1286         std::function<bool(int, int)> verify;
1287     };
1288 
1289     static const CompareOp kCompareOps[] = {
1290         {SkRasterPipelineOp::cmpeq_n_ints,  [](int a, int b) { return a == b; }},
1291         {SkRasterPipelineOp::cmpne_n_ints,  [](int a, int b) { return a != b; }},
1292         {SkRasterPipelineOp::cmplt_n_ints,  [](int a, int b) { return a <  b; }},
1293         {SkRasterPipelineOp::cmple_n_ints,  [](int a, int b) { return a <= b; }},
1294         {SkRasterPipelineOp::cmplt_n_uints, compare_lt_uint},
1295         {SkRasterPipelineOp::cmple_n_uints, compare_lteq_uint},
1296     };
1297 
1298     for (const CompareOp& op : kCompareOps) {
1299         for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) {
1300             // Initialize the slot values to -1,0,1,-1,0,1,-1,0,1,-1...
1301             for (int index = 0; index < 10 * N; ++index) {
1302                 slots[index] = (index % 3) - 1;
1303             }
1304 
1305             int leftValue = slots[0];
1306             int rightValue = slots[numSlotsAffected * N];
1307 
1308             // Run the comparison op over our data.
1309             SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1310             SkRasterPipeline p(&alloc);
1311             auto* ctx = alloc.make<SkRasterPipeline_BinaryOpCtx>();
1312             ctx->dst = (float*)&slots[0];
1313             ctx->src = (float*)&slots[numSlotsAffected * N];
1314             p.append(op.stage, ctx);
1315             p.run(0, 0, 1, 1);
1316 
1317             // Verify that the affected slots now contain "(-1,0,1,-1...) op (0,1,-1,0...)".
1318             int* destPtr = &slots[0];
1319             for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1320                 for (int checkLane = 0; checkLane < N; ++checkLane) {
1321                     if (checkSlot < numSlotsAffected) {
1322                         bool compareIsTrue = op.verify(leftValue, rightValue);
1323                         REPORTER_ASSERT(r, *destPtr == (compareIsTrue ? ~0 : 0));
1324                     } else {
1325                         REPORTER_ASSERT(r, *destPtr == leftValue);
1326                     }
1327 
1328                     ++destPtr;
1329                     if (++leftValue == 2) {
1330                         leftValue = -1;
1331                     }
1332                     if (++rightValue == 2) {
1333                         rightValue = -1;
1334                     }
1335                 }
1336             }
1337         }
1338     }
1339 }
1340 
DEF_TEST(SkRasterPipeline_CompareIntsWithHardcodedSlots,r)1341 DEF_TEST(SkRasterPipeline_CompareIntsWithHardcodedSlots, r) {
1342     // Allocate space for 5 dest and 5 source slots.
1343     alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp];
1344     const int N = SkOpts::raster_pipeline_highp_stride;
1345 
1346     struct CompareOp {
1347         SkRasterPipelineOp stage;
1348         int numSlotsAffected;
1349         std::function<bool(int, int)> verify;
1350     };
1351 
1352     static const CompareOp kCompareOps[] = {
1353         {SkRasterPipelineOp::cmpeq_int,     1, [](int a, int b) { return a == b; }},
1354         {SkRasterPipelineOp::cmpne_int,     1, [](int a, int b) { return a != b; }},
1355         {SkRasterPipelineOp::cmplt_int,     1, [](int a, int b) { return a <  b; }},
1356         {SkRasterPipelineOp::cmple_int,     1, [](int a, int b) { return a <= b; }},
1357         {SkRasterPipelineOp::cmplt_uint,    1, compare_lt_uint},
1358         {SkRasterPipelineOp::cmple_uint,    1, compare_lteq_uint},
1359 
1360         {SkRasterPipelineOp::cmpeq_2_ints,  2, [](int a, int b) { return a == b; }},
1361         {SkRasterPipelineOp::cmpne_2_ints,  2, [](int a, int b) { return a != b; }},
1362         {SkRasterPipelineOp::cmplt_2_ints,  2, [](int a, int b) { return a <  b; }},
1363         {SkRasterPipelineOp::cmple_2_ints,  2, [](int a, int b) { return a <= b; }},
1364         {SkRasterPipelineOp::cmplt_2_uints, 2, compare_lt_uint},
1365         {SkRasterPipelineOp::cmple_2_uints, 2, compare_lteq_uint},
1366 
1367         {SkRasterPipelineOp::cmpeq_3_ints,  3, [](int a, int b) { return a == b; }},
1368         {SkRasterPipelineOp::cmpne_3_ints,  3, [](int a, int b) { return a != b; }},
1369         {SkRasterPipelineOp::cmplt_3_ints,  3, [](int a, int b) { return a <  b; }},
1370         {SkRasterPipelineOp::cmple_3_ints,  3, [](int a, int b) { return a <= b; }},
1371         {SkRasterPipelineOp::cmplt_3_uints, 3, compare_lt_uint},
1372         {SkRasterPipelineOp::cmple_3_uints, 3, compare_lteq_uint},
1373 
1374         {SkRasterPipelineOp::cmpeq_4_ints,  4, [](int a, int b) { return a == b; }},
1375         {SkRasterPipelineOp::cmpne_4_ints,  4, [](int a, int b) { return a != b; }},
1376         {SkRasterPipelineOp::cmplt_4_ints,  4, [](int a, int b) { return a <  b; }},
1377         {SkRasterPipelineOp::cmple_4_ints,  4, [](int a, int b) { return a <= b; }},
1378         {SkRasterPipelineOp::cmplt_4_uints, 4, compare_lt_uint},
1379         {SkRasterPipelineOp::cmple_4_uints, 4, compare_lteq_uint},
1380     };
1381 
1382     for (const CompareOp& op : kCompareOps) {
1383         // Initialize the slot values to -1,0,1,-1,0,1,-1,0,1,-1...
1384         for (int index = 0; index < 10 * N; ++index) {
1385             slots[index] = (index % 3) - 1;
1386         }
1387 
1388         int leftValue = slots[0];
1389         int rightValue = slots[op.numSlotsAffected * N];
1390 
1391         // Run the comparison op over our data.
1392         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1393         SkRasterPipeline p(&alloc);
1394         p.append(op.stage, &slots[0]);
1395         p.run(0, 0, 1, 1);
1396 
1397         // Verify that the affected slots now contain "(0,1,2,0...) op (1,2,0,1...)".
1398         int* destPtr = &slots[0];
1399         for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1400             for (int checkLane = 0; checkLane < N; ++checkLane) {
1401                 if (checkSlot < op.numSlotsAffected) {
1402                     bool compareIsTrue = op.verify(leftValue, rightValue);
1403                     REPORTER_ASSERT(r, *destPtr == (compareIsTrue ? ~0 : 0));
1404                 } else {
1405                     REPORTER_ASSERT(r, *destPtr == leftValue);
1406                 }
1407 
1408                 ++destPtr;
1409                 if (++leftValue == 2) {
1410                     leftValue = -1;
1411                 }
1412                 if (++rightValue == 2) {
1413                     rightValue = -1;
1414                 }
1415             }
1416         }
1417     }
1418 }
1419 
to_float(int a)1420 static int to_float(int a) { return sk_bit_cast<int>((float)a); }
1421 
DEF_TEST(SkRasterPipeline_UnaryIntOps,r)1422 DEF_TEST(SkRasterPipeline_UnaryIntOps, r) {
1423     // Allocate space for 5 slots.
1424     alignas(64) int slots[5 * SkRasterPipeline_kMaxStride_highp];
1425     const int N = SkOpts::raster_pipeline_highp_stride;
1426 
1427     struct UnaryOp {
1428         SkRasterPipelineOp stage;
1429         int numSlotsAffected;
1430         std::function<int(int)> verify;
1431     };
1432 
1433     static const UnaryOp kUnaryOps[] = {
1434         {SkRasterPipelineOp::bitwise_not_int,    1, [](int a) { return ~a; }},
1435         {SkRasterPipelineOp::bitwise_not_2_ints, 2, [](int a) { return ~a; }},
1436         {SkRasterPipelineOp::bitwise_not_3_ints, 3, [](int a) { return ~a; }},
1437         {SkRasterPipelineOp::bitwise_not_4_ints, 4, [](int a) { return ~a; }},
1438 
1439         {SkRasterPipelineOp::cast_to_float_from_int,    1, to_float},
1440         {SkRasterPipelineOp::cast_to_float_from_2_ints, 2, to_float},
1441         {SkRasterPipelineOp::cast_to_float_from_3_ints, 3, to_float},
1442         {SkRasterPipelineOp::cast_to_float_from_4_ints, 4, to_float},
1443 
1444         {SkRasterPipelineOp::abs_int,    1, [](int a) { return a < 0 ? -a : a; }},
1445         {SkRasterPipelineOp::abs_2_ints, 2, [](int a) { return a < 0 ? -a : a; }},
1446         {SkRasterPipelineOp::abs_3_ints, 3, [](int a) { return a < 0 ? -a : a; }},
1447         {SkRasterPipelineOp::abs_4_ints, 4, [](int a) { return a < 0 ? -a : a; }},
1448     };
1449 
1450     for (const UnaryOp& op : kUnaryOps) {
1451         // Initialize the slot values to -10,-9,-8...
1452         std::iota(&slots[0], &slots[5 * N], -10);
1453         int inputValue = slots[0];
1454 
1455         // Run the unary op over our data.
1456         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1457         SkRasterPipeline p(&alloc);
1458         p.append(op.stage, &slots[0]);
1459         p.run(0, 0, 1, 1);
1460 
1461         // Verify that the destination slots have been updated.
1462         int* destPtr = &slots[0];
1463         for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
1464             for (int checkLane = 0; checkLane < N; ++checkLane) {
1465                 if (checkSlot < op.numSlotsAffected) {
1466                     int expected = op.verify(inputValue);
1467                     REPORTER_ASSERT(r, *destPtr == expected);
1468                 } else {
1469                     REPORTER_ASSERT(r, *destPtr == inputValue);
1470                 }
1471 
1472                 ++destPtr;
1473                 ++inputValue;
1474             }
1475         }
1476     }
1477 }
1478 
to_int(float a)1479 static float to_int(float a)  { return sk_bit_cast<float>((int)a); }
to_uint(float a)1480 static float to_uint(float a) { return sk_bit_cast<float>((unsigned int)a); }
1481 
DEF_TEST(SkRasterPipeline_UnaryFloatOps,r)1482 DEF_TEST(SkRasterPipeline_UnaryFloatOps, r) {
1483     // Allocate space for 5 slots.
1484     alignas(64) float slots[5 * SkRasterPipeline_kMaxStride_highp];
1485     const int N = SkOpts::raster_pipeline_highp_stride;
1486 
1487     struct UnaryOp {
1488         SkRasterPipelineOp stage;
1489         int numSlotsAffected;
1490         std::function<float(float)> verify;
1491     };
1492 
1493     static const UnaryOp kUnaryOps[] = {
1494         {SkRasterPipelineOp::cast_to_int_from_float,    1, to_int},
1495         {SkRasterPipelineOp::cast_to_int_from_2_floats, 2, to_int},
1496         {SkRasterPipelineOp::cast_to_int_from_3_floats, 3, to_int},
1497         {SkRasterPipelineOp::cast_to_int_from_4_floats, 4, to_int},
1498 
1499         {SkRasterPipelineOp::cast_to_uint_from_float,    1, to_uint},
1500         {SkRasterPipelineOp::cast_to_uint_from_2_floats, 2, to_uint},
1501         {SkRasterPipelineOp::cast_to_uint_from_3_floats, 3, to_uint},
1502         {SkRasterPipelineOp::cast_to_uint_from_4_floats, 4, to_uint},
1503 
1504         {SkRasterPipelineOp::abs_float,    1, [](float a) { return a < 0 ? -a : a; }},
1505         {SkRasterPipelineOp::abs_2_floats, 2, [](float a) { return a < 0 ? -a : a; }},
1506         {SkRasterPipelineOp::abs_3_floats, 3, [](float a) { return a < 0 ? -a : a; }},
1507         {SkRasterPipelineOp::abs_4_floats, 4, [](float a) { return a < 0 ? -a : a; }},
1508 
1509         {SkRasterPipelineOp::floor_float,    1, [](float a) { return floorf(a); }},
1510         {SkRasterPipelineOp::floor_2_floats, 2, [](float a) { return floorf(a); }},
1511         {SkRasterPipelineOp::floor_3_floats, 3, [](float a) { return floorf(a); }},
1512         {SkRasterPipelineOp::floor_4_floats, 4, [](float a) { return floorf(a); }},
1513 
1514         {SkRasterPipelineOp::ceil_float,    1, [](float a) { return ceilf(a); }},
1515         {SkRasterPipelineOp::ceil_2_floats, 2, [](float a) { return ceilf(a); }},
1516         {SkRasterPipelineOp::ceil_3_floats, 3, [](float a) { return ceilf(a); }},
1517         {SkRasterPipelineOp::ceil_4_floats, 4, [](float a) { return ceilf(a); }},
1518     };
1519 
1520     for (const UnaryOp& op : kUnaryOps) {
1521         // The result of some ops are undefined with negative inputs, so only test positive values.
1522         bool positiveOnly = (op.stage == SkRasterPipelineOp::cast_to_uint_from_float ||
1523                              op.stage == SkRasterPipelineOp::cast_to_uint_from_2_floats ||
1524                              op.stage == SkRasterPipelineOp::cast_to_uint_from_3_floats ||
1525                              op.stage == SkRasterPipelineOp::cast_to_uint_from_4_floats);
1526 
1527         float iotaStart = positiveOnly ? 1.0f : -9.75f;
1528         std::iota(&slots[0], &slots[5 * N], iotaStart);
1529         float inputValue = slots[0];
1530 
1531         // Run the unary op over our data.
1532         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1533         SkRasterPipeline p(&alloc);
1534         p.append(op.stage, &slots[0]);
1535         p.run(0, 0, 1, 1);
1536 
1537         // Verify that the destination slots have been updated.
1538         float* destPtr = &slots[0];
1539         for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
1540             for (int checkLane = 0; checkLane < N; ++checkLane) {
1541                 if (checkSlot < op.numSlotsAffected) {
1542                     float expected = op.verify(inputValue);
1543                     // The casting tests can generate NaN, depending on the input value, so a value
1544                     // match (via ==) might not succeed.
1545                     // The ceil tests can generate negative zeros _sometimes_, depending on the
1546                     // exact implementation of ceil(), so a bitwise match might not succeed.
1547                     // Because of this, we allow either a value match or a bitwise match.
1548                     bool bitwiseMatch = (0 == memcmp(destPtr, &expected, sizeof(float)));
1549                     bool valueMatch   = (*destPtr == expected);
1550                     REPORTER_ASSERT(r, valueMatch || bitwiseMatch);
1551                 } else {
1552                     REPORTER_ASSERT(r, *destPtr == inputValue);
1553                 }
1554 
1555                 ++destPtr;
1556                 ++inputValue;
1557             }
1558         }
1559     }
1560 }
1561 
to_mix_weight(float value)1562 static float to_mix_weight(float value) {
1563     // Convert a positive value to a mix-weight (a number between 0 and 1).
1564     value /= 16.0f;
1565     return value - std::floor(value);
1566 }
1567 
DEF_TEST(SkRasterPipeline_MixTest,r)1568 DEF_TEST(SkRasterPipeline_MixTest, r) {
1569     // Allocate space for 5 dest and 10 source slots.
1570     alignas(64) float slots[15 * SkRasterPipeline_kMaxStride_highp];
1571     const int N = SkOpts::raster_pipeline_highp_stride;
1572 
1573     struct MixOp {
1574         int numSlotsAffected;
1575         std::function<void(SkRasterPipeline*, SkArenaAlloc*)> append;
1576     };
1577 
1578     static const MixOp kMixOps[] = {
1579         {1, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
1580                 p->append(SkRasterPipelineOp::mix_float, slots);
1581             }},
1582         {2, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
1583                 p->append(SkRasterPipelineOp::mix_2_floats, slots);
1584             }},
1585         {3, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
1586                 p->append(SkRasterPipelineOp::mix_3_floats, slots);
1587             }},
1588         {4, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
1589                 p->append(SkRasterPipelineOp::mix_4_floats, slots);
1590             }},
1591         {5, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
1592                 auto* ctx = alloc->make<SkRasterPipeline_TernaryOpCtx>();
1593                 ctx->dst = &slots[0];
1594                 ctx->src0 = &slots[5 * N];
1595                 ctx->src1 = &slots[10 * N];
1596                 p->append(SkRasterPipelineOp::mix_n_floats, ctx);
1597             }},
1598     };
1599 
1600     for (const MixOp& op : kMixOps) {
1601         // Initialize the values to 1,2,3...
1602         std::iota(&slots[0], &slots[15 * N], 1.0f);
1603 
1604         float weightValue = slots[0];
1605         float fromValue   = slots[1 * op.numSlotsAffected * N];
1606         float toValue     = slots[2 * op.numSlotsAffected * N];
1607 
1608         // The first group of values (the weights) must be between zero and one.
1609         for (int idx = 0; idx < 1 * op.numSlotsAffected * N; ++idx) {
1610             slots[idx] = to_mix_weight(slots[idx]);
1611         }
1612 
1613         // Run the mix op over our data.
1614         SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1615         SkRasterPipeline p(&alloc);
1616         op.append(&p, &alloc);
1617         p.run(0,0,1,1);
1618 
1619         // Verify that the affected slots now equal mix({0.25, 0.3125...}, {3,4...}, {5,6...}, ).
1620         float* destPtr = &slots[0];
1621         for (int checkSlot = 0; checkSlot < op.numSlotsAffected; ++checkSlot) {
1622             for (int checkLane = 0; checkLane < N; ++checkLane) {
1623                 float checkValue = (toValue - fromValue) * to_mix_weight(weightValue) + fromValue;
1624                 REPORTER_ASSERT(r, *destPtr == checkValue);
1625 
1626                 ++destPtr;
1627                 fromValue += 1.0f;
1628                 toValue += 1.0f;
1629                 weightValue += 1.0f;
1630             }
1631         }
1632     }
1633 }
1634 
DEF_TEST(SkRasterPipeline_Jump,r)1635 DEF_TEST(SkRasterPipeline_Jump, r) {
1636     // Allocate space for 4 slots.
1637     alignas(64) float slots[4 * SkRasterPipeline_kMaxStride_highp] = {};
1638     const int N = SkOpts::raster_pipeline_highp_stride;
1639 
1640     alignas(64) static constexpr float kColorDarkRed[4] = {0.5f, 0.0f, 0.0f, 0.75f};
1641     alignas(64) static constexpr float kColorGreen[4]   = {0.0f, 1.0f, 0.0f, 1.0f};
1642     const int offset = 2;
1643 
1644     // Make a program which jumps over an append_constant_color op.
1645     SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1646     SkRasterPipeline p(&alloc);
1647     p.append_constant_color(&alloc, kColorGreen);      // assign green
1648     p.append(SkRasterPipelineOp::jump, &offset);       // jump over the dark-red color assignment
1649     p.append_constant_color(&alloc, kColorDarkRed);    // (not executed)
1650     p.append(SkRasterPipelineOp::store_src, slots);    // store the result so we can check it
1651     p.run(0,0,1,1);
1652 
1653     // Verify that the slots contain green.
1654     float* destPtr = &slots[0];
1655     for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
1656         for (int checkLane = 0; checkLane < N; ++checkLane) {
1657             REPORTER_ASSERT(r, *destPtr == kColorGreen[checkSlot]);
1658             ++destPtr;
1659         }
1660     }
1661 }
1662 
DEF_TEST(SkRasterPipeline_BranchIfAnyActiveLanes,r)1663 DEF_TEST(SkRasterPipeline_BranchIfAnyActiveLanes, r) {
1664     // Allocate space for 4 slots.
1665     alignas(64) float slots[4 * SkRasterPipeline_kMaxStride_highp] = {};
1666     const int N = SkOpts::raster_pipeline_highp_stride;
1667 
1668     alignas(64) static constexpr float kColorDarkRed[4] = {0.5f, 0.0f, 0.0f, 0.75f};
1669     alignas(64) static constexpr float kColorGreen[4]   = {0.0f, 1.0f, 0.0f, 1.0f};
1670     SkRasterPipeline_BranchCtx ctx;
1671     ctx.offset = 2;
1672 
1673     // An array of all zeros.
1674     alignas(64) static constexpr int32_t kNoLanesActive[4 * SkRasterPipeline_kMaxStride_highp] = {};
1675 
1676     // An array of all zeros, except for a single ~0 in the first dA slot.
1677     alignas(64) int32_t oneLaneActive[4 * SkRasterPipeline_kMaxStride_highp] = {};
1678     oneLaneActive[3*N] = ~0;
1679 
1680     // Make a program which conditionally branches past two append_constant_color ops.
1681     SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1682     SkRasterPipeline p(&alloc);
1683     p.append_constant_color(&alloc, kColorDarkRed);                    // set the color to dark red
1684     p.append(SkRasterPipelineOp::load_dst, kNoLanesActive);            // make no lanes active
1685     p.append(SkRasterPipelineOp::branch_if_any_active_lanes, &ctx);    // do not skip past next line
1686     p.append_constant_color(&alloc, kColorGreen);                      // set the color to green
1687     p.append(SkRasterPipelineOp::load_dst, oneLaneActive);             // set one lane active
1688     p.append(SkRasterPipelineOp::branch_if_any_active_lanes, &ctx);    // skip past next line
1689     p.append_constant_color(&alloc, kColorDarkRed);                    // (not executed)
1690     p.append(SkRasterPipelineOp::init_lane_masks);                     // set all lanes active
1691     p.append(SkRasterPipelineOp::branch_if_any_active_lanes, &ctx);    // skip past next line
1692     p.append_constant_color(&alloc, kColorDarkRed);                    // (not executed)
1693     p.append(SkRasterPipelineOp::store_src, slots);                    // store final color
1694     p.run(0,0,1,1);
1695 
1696     // Verify that the slots contain green.
1697     float* destPtr = &slots[0];
1698     for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
1699         for (int checkLane = 0; checkLane < N; ++checkLane) {
1700             REPORTER_ASSERT(r, *destPtr == kColorGreen[checkSlot]);
1701             ++destPtr;
1702         }
1703     }
1704 }
1705 
DEF_TEST(SkRasterPipeline_BranchIfNoActiveLanes,r)1706 DEF_TEST(SkRasterPipeline_BranchIfNoActiveLanes, r) {
1707     // Allocate space for 4 slots.
1708     alignas(64) float slots[4 * SkRasterPipeline_kMaxStride_highp] = {};
1709     const int N = SkOpts::raster_pipeline_highp_stride;
1710 
1711     alignas(64) static constexpr float kColorBlack[4]   = {0.0f, 0.0f, 0.0f, 0.0f};
1712     alignas(64) static constexpr float kColorRed[4]     = {1.0f, 0.0f, 0.0f, 1.0f};
1713     alignas(64) static constexpr float kColorBlue[4]    = {0.0f, 0.0f, 1.0f, 1.0f};
1714     SkRasterPipeline_BranchCtx ctx;
1715     ctx.offset = 2;
1716 
1717     // An array of all zeros.
1718     alignas(64) static constexpr int32_t kNoLanesActive[4 * SkRasterPipeline_kMaxStride_highp] = {};
1719 
1720     // An array of all zeros, except for a single ~0 in the first dA slot.
1721     alignas(64) int32_t oneLaneActive[4 * SkRasterPipeline_kMaxStride_highp] = {};
1722     oneLaneActive[3*N] = ~0;
1723 
1724     // Make a program which conditionally branches past a append_constant_color op.
1725     SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1726     SkRasterPipeline p(&alloc);
1727     p.append_constant_color(&alloc, kColorBlack);                      // set the color to black
1728     p.append(SkRasterPipelineOp::init_lane_masks);                     // set all lanes active
1729     p.append(SkRasterPipelineOp::branch_if_no_active_lanes, &ctx);     // do not skip past next line
1730     p.append_constant_color(&alloc, kColorRed);                        // sets the color to red
1731     p.append(SkRasterPipelineOp::load_dst, oneLaneActive);             // set one lane active
1732     p.append(SkRasterPipelineOp::branch_if_no_active_lanes, &ctx);     // do not skip past next line
1733     p.append(SkRasterPipelineOp::swap_rb);                             // swap R and B (making blue)
1734     p.append(SkRasterPipelineOp::load_dst, kNoLanesActive);            // make no lanes active
1735     p.append(SkRasterPipelineOp::branch_if_no_active_lanes, &ctx);     // skip past next line
1736     p.append_constant_color(&alloc, kColorBlack);                      // (not executed)
1737     p.append(SkRasterPipelineOp::store_src, slots);                    // store final blue color
1738     p.run(0,0,1,1);
1739 
1740     // Verify that the slots contain blue.
1741     float* destPtr = &slots[0];
1742     for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
1743         for (int checkLane = 0; checkLane < N; ++checkLane) {
1744             REPORTER_ASSERT(r, *destPtr == kColorBlue[checkSlot]);
1745             ++destPtr;
1746         }
1747     }
1748 }
1749 
DEF_TEST(SkRasterPipeline_BranchIfActiveLanesEqual,r)1750 DEF_TEST(SkRasterPipeline_BranchIfActiveLanesEqual, r) {
1751     // Allocate space for 4 slots.
1752     alignas(64) float slots[4 * SkRasterPipeline_kMaxStride_highp] = {};
1753     const int N = SkOpts::raster_pipeline_highp_stride;
1754 
1755     alignas(64) static constexpr float kColorBlack[4]   = {0.0f, 0.0f, 0.0f, 0.0f};
1756     alignas(64) static constexpr float kColorRed[4]     = {1.0f, 0.0f, 0.0f, 1.0f};
1757 
1758     // An array of all 6s.
1759     alignas(64) int allSixes[SkRasterPipeline_kMaxStride_highp] = {};
1760     std::fill(std::begin(allSixes), std::end(allSixes), 6);
1761 
1762     // An array of all 6s, except for a single 5 in one lane.
1763     alignas(64) int mostlySixesWithOneFive[SkRasterPipeline_kMaxStride_highp] = {};
1764     std::fill(std::begin(mostlySixesWithOneFive), std::end(mostlySixesWithOneFive), 6);
1765     mostlySixesWithOneFive[N - 1] = 5;
1766 
1767     // A condition mask with all lanes on except for the six-lane.
1768     alignas(64) int mask[SkRasterPipeline_kMaxStride_highp] = {};
1769     std::fill(std::begin(mask), std::end(mask), ~0);
1770     mask[N - 1] = 0;
1771 
1772     SkRasterPipeline_BranchIfEqualCtx matching; // comparing all-six vs five will match
1773     matching.offset = 2;
1774     matching.value = 5;
1775     matching.ptr = allSixes;
1776 
1777     SkRasterPipeline_BranchIfEqualCtx nonmatching;  // comparing mostly-six vs five won't match
1778     nonmatching.offset = 2;
1779     nonmatching.value = 5;
1780     nonmatching.ptr = mostlySixesWithOneFive;
1781 
1782     // Make a program which conditionally branches past a swap_rb op.
1783     SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1784     SkRasterPipeline p(&alloc);
1785     p.append_constant_color(&alloc, kColorBlack);                          // set the color to black
1786     p.append(SkRasterPipelineOp::init_lane_masks);                         // set all lanes active
1787     p.append(SkRasterPipelineOp::branch_if_no_active_lanes_eq, &nonmatching);// don't skip next line
1788     p.append_constant_color(&alloc, kColorRed);                            // set the color to red
1789     p.append(SkRasterPipelineOp::branch_if_no_active_lanes_eq, &matching); // do skip next line
1790     p.append(SkRasterPipelineOp::swap_rb);                                 // swap R and B (= blue)
1791     p.append(SkRasterPipelineOp::load_condition_mask, mask);               // mask off the six
1792     p.append(SkRasterPipelineOp::branch_if_no_active_lanes_eq, &nonmatching);// do skip next line
1793     p.append(SkRasterPipelineOp::white_color);                             // set the color to white
1794     p.append(SkRasterPipelineOp::store_src, slots);                        // store final red color
1795     p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
1796 
1797     // Verify that the slots contain red.
1798     float* destPtr = &slots[0];
1799     for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
1800         for (int checkLane = 0; checkLane < N; ++checkLane) {
1801             REPORTER_ASSERT(r, *destPtr == kColorRed[checkSlot]);
1802             ++destPtr;
1803         }
1804     }
1805 }
1806 
DEF_TEST(SkRasterPipeline_empty,r)1807 DEF_TEST(SkRasterPipeline_empty, r) {
1808     // No asserts... just a test that this is safe to run.
1809     SkRasterPipeline_<256> p;
1810     p.run(0,0,20,1);
1811 }
1812 
DEF_TEST(SkRasterPipeline_nonsense,r)1813 DEF_TEST(SkRasterPipeline_nonsense, r) {
1814     // No asserts... just a test that this is safe to run and terminates.
1815     // srcover() calls st->next(); this makes sure we've always got something there to call.
1816     SkRasterPipeline_<256> p;
1817     p.append(SkRasterPipelineOp::srcover);
1818     p.run(0,0,20,1);
1819 }
1820 
DEF_TEST(SkRasterPipeline_JIT,r)1821 DEF_TEST(SkRasterPipeline_JIT, r) {
1822     // This tests a couple odd corners that a JIT backend can stumble over.
1823 
1824     uint32_t buf[72] = {
1825          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
1826          1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
1827         13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
1828          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
1829          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
1830          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
1831     };
1832 
1833     SkRasterPipeline_MemoryCtx src = { buf +  0, 0 },
1834                                dst = { buf + 36, 0 };
1835 
1836     // Copy buf[x] to buf[x+36] for x in [15,35).
1837     SkRasterPipeline_<256> p;
1838     p.append(SkRasterPipelineOp::load_8888,  &src);
1839     p.append(SkRasterPipelineOp::store_8888, &dst);
1840     p.run(15,0, 20,1);
1841 
1842     for (int i = 0; i < 36; i++) {
1843         if (i < 15 || i == 35) {
1844             REPORTER_ASSERT(r, buf[i+36] == 0);
1845         } else {
1846             REPORTER_ASSERT(r, buf[i+36] == (uint32_t)(i - 11));
1847         }
1848     }
1849 }
1850 
h(float f)1851 static uint16_t h(float f) {
1852     // Remember, a float is 1-8-23 (sign-exponent-mantissa) with 127 exponent bias.
1853     uint32_t sem;
1854     memcpy(&sem, &f, sizeof(sem));
1855     uint32_t s  = sem & 0x80000000,
1856              em = sem ^ s;
1857 
1858     // Convert to 1-5-10 half with 15 bias, flushing denorm halfs (including zero) to zero.
1859     auto denorm = (int32_t)em < 0x38800000;  // I32 comparison is often quicker, and always safe
1860     // here.
1861     return denorm ? SkTo<uint16_t>(0)
1862                   : SkTo<uint16_t>((s>>16) + (em>>13) - ((127-15)<<10));
1863 }
1864 
DEF_TEST(SkRasterPipeline_tail,r)1865 DEF_TEST(SkRasterPipeline_tail, r) {
1866     {
1867         float data[][4] = {
1868             {00, 01, 02, 03},
1869             {10, 11, 12, 13},
1870             {20, 21, 22, 23},
1871             {30, 31, 32, 33},
1872         };
1873 
1874         float buffer[4][4];
1875 
1876         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
1877                            dst = { &buffer[0][0], 0 };
1878 
1879         for (unsigned i = 1; i <= 4; i++) {
1880             memset(buffer, 0xff, sizeof(buffer));
1881             SkRasterPipeline_<256> p;
1882             p.append(SkRasterPipelineOp::load_f32, &src);
1883             p.append(SkRasterPipelineOp::store_f32, &dst);
1884             p.run(0,0, i,1);
1885             for (unsigned j = 0; j < i; j++) {
1886                 for (unsigned k = 0; k < 4; k++) {
1887                     if (buffer[j][k] != data[j][k]) {
1888                         ERRORF(r, "(%u, %u) - a: %g r: %g\n", j, k, data[j][k], buffer[j][k]);
1889                     }
1890                 }
1891             }
1892             for (int j = i; j < 4; j++) {
1893                 for (auto f : buffer[j]) {
1894                     REPORTER_ASSERT(r, SkScalarIsNaN(f));
1895                 }
1896             }
1897         }
1898     }
1899 
1900     {
1901         float data[][2] = {
1902             {00, 01},
1903             {10, 11},
1904             {20, 21},
1905             {30, 31},
1906         };
1907 
1908         float buffer[4][4];
1909 
1910         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
1911                 dst = { &buffer[0][0], 0 };
1912 
1913         for (unsigned i = 1; i <= 4; i++) {
1914             memset(buffer, 0xff, sizeof(buffer));
1915             SkRasterPipeline_<256> p;
1916             p.append(SkRasterPipelineOp::load_rgf32, &src);
1917             p.append(SkRasterPipelineOp::store_f32, &dst);
1918             p.run(0,0, i,1);
1919             for (unsigned j = 0; j < i; j++) {
1920                 for (unsigned k = 0; k < 2; k++) {
1921                     if (buffer[j][k] != data[j][k]) {
1922                         ERRORF(r, "(%u, %u) - a: %g r: %g\n", j, k, data[j][k], buffer[j][k]);
1923                     }
1924                 }
1925                 if (buffer[j][2] != 0) {
1926                     ERRORF(r, "(%u, 2) - a: 0 r: %g\n", j, buffer[j][2]);
1927                 }
1928                 if (buffer[j][3] != 1) {
1929                     ERRORF(r, "(%u, 3) - a: 1 r: %g\n", j, buffer[j][3]);
1930                 }
1931             }
1932             for (int j = i; j < 4; j++) {
1933                 for (auto f : buffer[j]) {
1934                     REPORTER_ASSERT(r, SkScalarIsNaN(f));
1935                 }
1936             }
1937         }
1938     }
1939 
1940     {
1941         float data[][4] = {
1942             {00, 01, 02, 03},
1943             {10, 11, 12, 13},
1944             {20, 21, 22, 23},
1945             {30, 31, 32, 33},
1946         };
1947 
1948         float buffer[4][2];
1949 
1950         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
1951                 dst = { &buffer[0][0], 0 };
1952 
1953         for (unsigned i = 1; i <= 4; i++) {
1954             memset(buffer, 0xff, sizeof(buffer));
1955             SkRasterPipeline_<256> p;
1956             p.append(SkRasterPipelineOp::load_f32, &src);
1957             p.append(SkRasterPipelineOp::store_rgf32, &dst);
1958             p.run(0,0, i,1);
1959             for (unsigned j = 0; j < i; j++) {
1960                 for (unsigned k = 0; k < 2; k++) {
1961                     if (buffer[j][k] != data[j][k]) {
1962                         ERRORF(r, "(%u, %u) - a: %g r: %g\n", j, k, data[j][k], buffer[j][k]);
1963                     }
1964                 }
1965             }
1966             for (int j = i; j < 4; j++) {
1967                 for (auto f : buffer[j]) {
1968                     REPORTER_ASSERT(r, SkScalarIsNaN(f));
1969                 }
1970             }
1971         }
1972     }
1973 
1974     {
1975         alignas(8) uint16_t data[][4] = {
1976             {h(00), h(01), h(02), h(03)},
1977             {h(10), h(11), h(12), h(13)},
1978             {h(20), h(21), h(22), h(23)},
1979             {h(30), h(31), h(32), h(33)},
1980         };
1981         alignas(8) uint16_t buffer[4][4];
1982         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
1983                            dst = { &buffer[0][0], 0 };
1984 
1985         for (unsigned i = 1; i <= 4; i++) {
1986             memset(buffer, 0xff, sizeof(buffer));
1987             SkRasterPipeline_<256> p;
1988             p.append(SkRasterPipelineOp::load_f16, &src);
1989             p.append(SkRasterPipelineOp::store_f16, &dst);
1990             p.run(0,0, i,1);
1991             for (unsigned j = 0; j < i; j++) {
1992                 for (int k = 0; k < 4; k++) {
1993                     REPORTER_ASSERT(r, buffer[j][k] == data[j][k]);
1994                 }
1995             }
1996             for (int j = i; j < 4; j++) {
1997                 for (auto f : buffer[j]) {
1998                     REPORTER_ASSERT(r, f == 0xffff);
1999                 }
2000             }
2001         }
2002     }
2003 
2004     {
2005         alignas(8) uint16_t data[]= {
2006             h(00),
2007             h(10),
2008             h(20),
2009             h(30),
2010         };
2011         alignas(8) uint16_t buffer[4][4];
2012         SkRasterPipeline_MemoryCtx src = { &data[0], 0 },
2013                 dst = { &buffer[0][0], 0 };
2014 
2015         for (unsigned i = 1; i <= 4; i++) {
2016             memset(buffer, 0xff, sizeof(buffer));
2017             SkRasterPipeline_<256> p;
2018             p.append(SkRasterPipelineOp::load_af16, &src);
2019             p.append(SkRasterPipelineOp::store_f16, &dst);
2020             p.run(0,0, i,1);
2021             for (unsigned j = 0; j < i; j++) {
2022                 uint16_t expected[] = {0, 0, 0, data[j]};
2023                 REPORTER_ASSERT(r, !memcmp(expected, &buffer[j][0], sizeof(buffer[j])));
2024             }
2025             for (int j = i; j < 4; j++) {
2026                 for (auto f : buffer[j]) {
2027                     REPORTER_ASSERT(r, f == 0xffff);
2028                 }
2029             }
2030         }
2031     }
2032 
2033     {
2034         alignas(8) uint16_t data[][4] = {
2035             {h(00), h(01), h(02), h(03)},
2036             {h(10), h(11), h(12), h(13)},
2037             {h(20), h(21), h(22), h(23)},
2038             {h(30), h(31), h(32), h(33)},
2039         };
2040         alignas(8) uint16_t buffer[4];
2041         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
2042                 dst = { &buffer[0], 0 };
2043 
2044         for (unsigned i = 1; i <= 4; i++) {
2045             memset(buffer, 0xff, sizeof(buffer));
2046             SkRasterPipeline_<256> p;
2047             p.append(SkRasterPipelineOp::load_f16, &src);
2048             p.append(SkRasterPipelineOp::store_af16, &dst);
2049             p.run(0,0, i,1);
2050             for (unsigned j = 0; j < i; j++) {
2051                 REPORTER_ASSERT(r, !memcmp(&data[j][3], &buffer[j], sizeof(buffer[j])));
2052             }
2053             for (int j = i; j < 4; j++) {
2054                 REPORTER_ASSERT(r, buffer[j] == 0xffff);
2055             }
2056         }
2057     }
2058 
2059     {
2060         alignas(8) uint16_t data[][4] = {
2061             {h(00), h(01), h(02), h(03)},
2062             {h(10), h(11), h(12), h(13)},
2063             {h(20), h(21), h(22), h(23)},
2064             {h(30), h(31), h(32), h(33)},
2065         };
2066         alignas(8) uint16_t buffer[4][2];
2067         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
2068                 dst = { &buffer[0][0], 0 };
2069 
2070         for (unsigned i = 1; i <= 4; i++) {
2071             memset(buffer, 0xff, sizeof(buffer));
2072             SkRasterPipeline_<256> p;
2073             p.append(SkRasterPipelineOp::load_f16, &src);
2074             p.append(SkRasterPipelineOp::store_rgf16, &dst);
2075             p.run(0,0, i,1);
2076             for (unsigned j = 0; j < i; j++) {
2077                 REPORTER_ASSERT(r, !memcmp(&buffer[j], &data[j], 2 * sizeof(uint16_t)));
2078             }
2079             for (int j = i; j < 4; j++) {
2080                 for (auto h : buffer[j]) {
2081                     REPORTER_ASSERT(r, h == 0xffff);
2082                 }
2083             }
2084         }
2085     }
2086 
2087     {
2088         alignas(8) uint16_t data[][2] = {
2089             {h(00), h(01)},
2090             {h(10), h(11)},
2091             {h(20), h(21)},
2092             {h(30), h(31)},
2093         };
2094         alignas(8) uint16_t buffer[4][4];
2095         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
2096                 dst = { &buffer[0][0], 0 };
2097 
2098         for (unsigned i = 1; i <= 4; i++) {
2099             memset(buffer, 0xff, sizeof(buffer));
2100             SkRasterPipeline_<256> p;
2101             p.append(SkRasterPipelineOp::load_rgf16, &src);
2102             p.append(SkRasterPipelineOp::store_f16, &dst);
2103             p.run(0,0, i,1);
2104             for (unsigned j = 0; j < i; j++) {
2105                 uint16_t expected[] = {data[j][0], data[j][1], h(0), h(1)};
2106                 REPORTER_ASSERT(r, !memcmp(&buffer[j], expected, sizeof(expected)));
2107             }
2108             for (int j = i; j < 4; j++) {
2109                 for (auto h : buffer[j]) {
2110                     REPORTER_ASSERT(r, h == 0xffff);
2111                 }
2112             }
2113         }
2114     }
2115 }
2116 
DEF_TEST(SkRasterPipeline_u16,r)2117 DEF_TEST(SkRasterPipeline_u16, r) {
2118     {
2119         alignas(8) uint16_t data[][2] = {
2120             {0x0000, 0x0111},
2121             {0x1010, 0x1111},
2122             {0x2020, 0x2121},
2123             {0x3030, 0x3131},
2124         };
2125         uint8_t buffer[4][4];
2126         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
2127                 dst = { &buffer[0][0], 0 };
2128 
2129         for (unsigned i = 1; i <= 4; i++) {
2130             memset(buffer, 0xab, sizeof(buffer));
2131             SkRasterPipeline_<256> p;
2132             p.append(SkRasterPipelineOp::load_rg1616, &src);
2133             p.append(SkRasterPipelineOp::store_8888, &dst);
2134             p.run(0,0, i,1);
2135             for (unsigned j = 0; j < i; j++) {
2136                 uint8_t expected[] = {
2137                     SkToU8(data[j][0] >> 8),
2138                     SkToU8(data[j][1] >> 8),
2139                     000,
2140                     0xff
2141                 };
2142                 REPORTER_ASSERT(r, !memcmp(&buffer[j], expected, sizeof(expected)));
2143             }
2144             for (int j = i; j < 4; j++) {
2145                 for (auto b : buffer[j]) {
2146                     REPORTER_ASSERT(r, b == 0xab);
2147                 }
2148             }
2149         }
2150     }
2151 
2152     {
2153         alignas(8) uint16_t data[] = {
2154                 0x0000,
2155                 0x1010,
2156                 0x2020,
2157                 0x3030,
2158         };
2159         uint8_t buffer[4][4];
2160         SkRasterPipeline_MemoryCtx src = { &data[0], 0 },
2161                 dst = { &buffer[0][0], 0 };
2162 
2163         for (unsigned i = 1; i <= 4; i++) {
2164             memset(buffer, 0xff, sizeof(buffer));
2165             SkRasterPipeline_<256> p;
2166             p.append(SkRasterPipelineOp::load_a16, &src);
2167             p.append(SkRasterPipelineOp::store_8888, &dst);
2168             p.run(0,0, i,1);
2169             for (unsigned j = 0; j < i; j++) {
2170                 uint8_t expected[] = {0x00, 0x00, 0x00, SkToU8(data[j] >> 8)};
2171                 REPORTER_ASSERT(r, !memcmp(&buffer[j], expected, sizeof(expected)));
2172             }
2173             for (int j = i; j < 4; j++) {
2174                 for (auto b : buffer[j]) {
2175                     REPORTER_ASSERT(r, b == 0xff);
2176                 }
2177             }
2178         }
2179     }
2180 
2181     {
2182         uint8_t data[][4] = {
2183             {0x00, 0x01, 0x02, 0x03},
2184             {0x10, 0x11, 0x12, 0x13},
2185             {0x20, 0x21, 0x22, 0x23},
2186             {0x30, 0x31, 0x32, 0x33},
2187         };
2188         alignas(8) uint16_t buffer[4];
2189         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
2190                 dst = { &buffer[0], 0 };
2191 
2192         for (unsigned i = 1; i <= 4; i++) {
2193             memset(buffer, 0xff, sizeof(buffer));
2194             SkRasterPipeline_<256> p;
2195             p.append(SkRasterPipelineOp::load_8888, &src);
2196             p.append(SkRasterPipelineOp::store_a16, &dst);
2197             p.run(0,0, i,1);
2198             for (unsigned j = 0; j < i; j++) {
2199                 uint16_t expected = (data[j][3] << 8) | data[j][3];
2200                 REPORTER_ASSERT(r, buffer[j] == expected);
2201             }
2202             for (int j = i; j < 4; j++) {
2203                 REPORTER_ASSERT(r, buffer[j] == 0xffff);
2204             }
2205         }
2206     }
2207 
2208     {
2209         alignas(8) uint16_t data[][4] = {
2210             {0x0000, 0x1000, 0x2000, 0x3000},
2211             {0x0001, 0x1001, 0x2001, 0x3001},
2212             {0x0002, 0x1002, 0x2002, 0x3002},
2213             {0x0003, 0x1003, 0x2003, 0x3003},
2214         };
2215         alignas(8) uint16_t buffer[4][4];
2216         SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
2217                 dst = { &buffer[0], 0 };
2218 
2219         for (unsigned i = 1; i <= 4; i++) {
2220             memset(buffer, 0xff, sizeof(buffer));
2221             SkRasterPipeline_<256> p;
2222             p.append(SkRasterPipelineOp::load_16161616, &src);
2223             p.append(SkRasterPipelineOp::swap_rb);
2224             p.append(SkRasterPipelineOp::store_16161616, &dst);
2225             p.run(0,0, i,1);
2226             for (unsigned j = 0; j < i; j++) {
2227                 uint16_t expected[4] = {data[j][2], data[j][1], data[j][0], data[j][3]};
2228                 REPORTER_ASSERT(r, !memcmp(&expected[0], &buffer[j], sizeof(expected)));
2229             }
2230             for (int j = i; j < 4; j++) {
2231                 for (uint16_t u16 : buffer[j])
2232                 REPORTER_ASSERT(r, u16 == 0xffff);
2233             }
2234         }
2235     }
2236 }
2237 
DEF_TEST(SkRasterPipeline_lowp,r)2238 DEF_TEST(SkRasterPipeline_lowp, r) {
2239     uint32_t rgba[64];
2240     for (int i = 0; i < 64; i++) {
2241         rgba[i] = (4*i+0) << 0
2242                 | (4*i+1) << 8
2243                 | (4*i+2) << 16
2244                 | (4*i+3) << 24;
2245     }
2246 
2247     SkRasterPipeline_MemoryCtx ptr = { rgba, 0 };
2248 
2249     SkRasterPipeline_<256> p;
2250     p.append(SkRasterPipelineOp::load_8888,  &ptr);
2251     p.append(SkRasterPipelineOp::swap_rb);
2252     p.append(SkRasterPipelineOp::store_8888, &ptr);
2253     p.run(0,0,64,1);
2254 
2255     for (int i = 0; i < 64; i++) {
2256         uint32_t want = (4*i+0) << 16
2257                       | (4*i+1) << 8
2258                       | (4*i+2) << 0
2259                       | (4*i+3) << 24;
2260         if (rgba[i] != want) {
2261             ERRORF(r, "got %08x, want %08x\n", rgba[i], want);
2262         }
2263     }
2264 }
2265 
DEF_TEST(SkRasterPipeline_swizzle,r)2266 DEF_TEST(SkRasterPipeline_swizzle, r) {
2267     // This takes the lowp code path
2268     {
2269         uint16_t rg[64];
2270         for (int i = 0; i < 64; i++) {
2271             rg[i] = (4*i+0) << 0
2272                   | (4*i+1) << 8;
2273         }
2274 
2275         skgpu::Swizzle swizzle("g1b1");
2276 
2277         SkRasterPipeline_MemoryCtx ptr = { rg, 0 };
2278         SkRasterPipeline_<256> p;
2279         p.append(SkRasterPipelineOp::load_rg88,  &ptr);
2280         swizzle.apply(&p);
2281         p.append(SkRasterPipelineOp::store_rg88, &ptr);
2282         p.run(0,0,64,1);
2283 
2284         for (int i = 0; i < 64; i++) {
2285             uint32_t want = 0xff    << 8
2286                           | (4*i+1) << 0;
2287             if (rg[i] != want) {
2288                 ERRORF(r, "got %08x, want %08x\n", rg[i], want);
2289             }
2290         }
2291     }
2292     // This takes the highp code path
2293     {
2294         float rg[64][2];
2295         for (int i = 0; i < 64; i++) {
2296             rg[i][0] = i + 1;
2297             rg[i][1] = 2 * i + 1;
2298         }
2299 
2300         skgpu::Swizzle swizzle("0gra");
2301 
2302         uint16_t buffer[64][4];
2303         SkRasterPipeline_MemoryCtx src = { rg,     0 },
2304                                    dst = { buffer, 0};
2305         SkRasterPipeline_<256> p;
2306         p.append(SkRasterPipelineOp::load_rgf32,  &src);
2307         swizzle.apply(&p);
2308         p.append(SkRasterPipelineOp::store_f16, &dst);
2309         p.run(0,0,64,1);
2310 
2311         for (int i = 0; i < 64; i++) {
2312             uint16_t want[4] {
2313                 h(0),
2314                 h(2 * i + 1),
2315                 h(i + 1),
2316                 h(1),
2317             };
2318             REPORTER_ASSERT(r, !memcmp(want, buffer[i], sizeof(buffer[i])));
2319         }
2320     }
2321 }
2322 
DEF_TEST(SkRasterPipeline_lowp_clamp01,r)2323 DEF_TEST(SkRasterPipeline_lowp_clamp01, r) {
2324     // This may seem like a funny pipeline to create,
2325     // but it certainly shouldn't crash when you run it.
2326 
2327     uint32_t rgba = 0xff00ff00;
2328 
2329     SkRasterPipeline_MemoryCtx ptr = { &rgba, 0 };
2330 
2331     SkRasterPipeline_<256> p;
2332     p.append(SkRasterPipelineOp::load_8888,  &ptr);
2333     p.append(SkRasterPipelineOp::swap_rb);
2334     p.append(SkRasterPipelineOp::clamp_01);
2335     p.append(SkRasterPipelineOp::store_8888, &ptr);
2336     p.run(0,0,1,1);
2337 }
2338 
2339 // Helper struct that can be used to scrape stack addresses at different points in a pipeline
2340 class StackCheckerCtx : SkRasterPipeline_CallbackCtx {
2341 public:
StackCheckerCtx()2342     StackCheckerCtx() {
2343         this->fn = [](SkRasterPipeline_CallbackCtx* self, int active_pixels) {
2344             auto ctx = (StackCheckerCtx*)self;
2345             ctx->fStackAddrs.push_back(&active_pixels);
2346         };
2347     }
2348 
2349     enum class Behavior {
2350         kGrowth,
2351         kBaseline,
2352         kUnknown,
2353     };
2354 
GrowthBehavior()2355     static Behavior GrowthBehavior() {
2356         // Only some stages use the musttail attribute, so we have no way of knowing what's going to
2357         // happen. In release builds, it's likely that the compiler will apply tail-call
2358         // optimization. Even in some debug builds (on Windows), we don't see stack growth.
2359         return Behavior::kUnknown;
2360     }
2361 
2362     // Call one of these two each time the checker callback is added:
expectGrowth()2363     StackCheckerCtx* expectGrowth() {
2364         fExpectedBehavior.push_back(GrowthBehavior());
2365         return this;
2366     }
2367 
expectBaseline()2368     StackCheckerCtx* expectBaseline() {
2369         fExpectedBehavior.push_back(Behavior::kBaseline);
2370         return this;
2371     }
2372 
validate(skiatest::Reporter * r)2373     void validate(skiatest::Reporter* r) {
2374         REPORTER_ASSERT(r, fStackAddrs.size() == fExpectedBehavior.size());
2375 
2376         // This test is storing and comparing stack pointers (to dead stack frames) as a way of
2377         // measuring stack usage. Unsurprisingly, ASAN doesn't like that. HWASAN actually inserts
2378         // tag bytes in the pointers, causing them not to match. Newer versions of vanilla ASAN
2379         // also appear to salt the stack slightly, causing repeated calls to scrape different
2380         // addresses, even though $rsp is identical on each invocation of the lambda.
2381 #if !defined(SK_SANITIZE_ADDRESS)
2382         void* baseline = fStackAddrs[0];
2383         for (size_t i = 1; i < fStackAddrs.size(); i++) {
2384             if (fExpectedBehavior[i] == Behavior::kGrowth) {
2385                 REPORTER_ASSERT(r, fStackAddrs[i] != baseline);
2386             } else if (fExpectedBehavior[i] == Behavior::kBaseline) {
2387                 REPORTER_ASSERT(r, fStackAddrs[i] == baseline);
2388             } else {
2389                 // Unknown behavior, nothing we can assert here
2390             }
2391         }
2392 #endif
2393     }
2394 
2395 private:
2396     std::vector<void*>    fStackAddrs;
2397     std::vector<Behavior> fExpectedBehavior;
2398 };
2399 
DEF_TEST(SkRasterPipeline_stack_rewind,r)2400 DEF_TEST(SkRasterPipeline_stack_rewind, r) {
2401     // This test verifies that we can control stack usage with stack_rewind
2402 
2403     // Without stack_rewind, we should (maybe) see stack growth
2404     {
2405         StackCheckerCtx stack;
2406         uint32_t rgba = 0xff0000ff;
2407         SkRasterPipeline_MemoryCtx ptr = { &rgba, 0 };
2408 
2409         SkRasterPipeline_<256> p;
2410         p.append(SkRasterPipelineOp::callback, stack.expectBaseline());
2411         p.append(SkRasterPipelineOp::load_8888,  &ptr);
2412         p.append(SkRasterPipelineOp::callback, stack.expectGrowth());
2413         p.append(SkRasterPipelineOp::swap_rb);
2414         p.append(SkRasterPipelineOp::callback, stack.expectGrowth());
2415         p.append(SkRasterPipelineOp::store_8888, &ptr);
2416         p.run(0,0,1,1);
2417 
2418         REPORTER_ASSERT(r, rgba == 0xffff0000); // Ensure the pipeline worked
2419         stack.validate(r);
2420     }
2421 
2422     // With stack_rewind, we should (always) be able to get back to baseline
2423     {
2424         StackCheckerCtx stack;
2425         uint32_t rgba = 0xff0000ff;
2426         SkRasterPipeline_MemoryCtx ptr = { &rgba, 0 };
2427 
2428         SkRasterPipeline_<256> p;
2429         p.append(SkRasterPipelineOp::callback, stack.expectBaseline());
2430         p.append(SkRasterPipelineOp::load_8888,  &ptr);
2431         p.append(SkRasterPipelineOp::callback, stack.expectGrowth());
2432         p.append_stack_rewind();
2433         p.append(SkRasterPipelineOp::callback, stack.expectBaseline());
2434         p.append(SkRasterPipelineOp::swap_rb);
2435         p.append(SkRasterPipelineOp::callback, stack.expectGrowth());
2436         p.append_stack_rewind();
2437         p.append(SkRasterPipelineOp::callback, stack.expectBaseline());
2438         p.append(SkRasterPipelineOp::store_8888, &ptr);
2439         p.run(0,0,1,1);
2440 
2441         REPORTER_ASSERT(r, rgba == 0xffff0000); // Ensure the pipeline worked
2442         stack.validate(r);
2443     }
2444 }
2445