1 /*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "include/private/base/SkTo.h"
9 #include "src/base/SkHalf.h"
10 #include "src/base/SkUtils.h"
11 #include "src/core/SkOpts.h"
12 #include "src/core/SkRasterPipeline.h"
13 #include "src/gpu/Swizzle.h"
14 #include "tests/Test.h"
15
16 #include <cmath>
17 #include <numeric>
18
DEF_TEST(SkRasterPipeline,r)19 DEF_TEST(SkRasterPipeline, r) {
20 // Build and run a simple pipeline to exercise SkRasterPipeline,
21 // drawing 50% transparent blue over opaque red in half-floats.
22 uint64_t red = 0x3c00000000003c00ull,
23 blue = 0x3800380000000000ull,
24 result;
25
26 SkRasterPipeline_MemoryCtx load_s_ctx = { &blue, 0 },
27 load_d_ctx = { &red, 0 },
28 store_ctx = { &result, 0 };
29
30 SkRasterPipeline_<256> p;
31 p.append(SkRasterPipelineOp::load_f16, &load_s_ctx);
32 p.append(SkRasterPipelineOp::load_f16_dst, &load_d_ctx);
33 p.append(SkRasterPipelineOp::srcover);
34 p.append(SkRasterPipelineOp::store_f16, &store_ctx);
35 p.run(0,0,1,1);
36
37 // We should see half-intensity magenta.
38 REPORTER_ASSERT(r, ((result >> 0) & 0xffff) == 0x3800);
39 REPORTER_ASSERT(r, ((result >> 16) & 0xffff) == 0x0000);
40 REPORTER_ASSERT(r, ((result >> 32) & 0xffff) == 0x3800);
41 REPORTER_ASSERT(r, ((result >> 48) & 0xffff) == 0x3c00);
42 }
43
DEF_TEST(SkRasterPipeline_LoadStoreConditionMask,r)44 DEF_TEST(SkRasterPipeline_LoadStoreConditionMask, r) {
45 alignas(64) int32_t mask[] = {~0, 0, ~0, 0, ~0, ~0, ~0, 0};
46 alignas(64) int32_t maskCopy[SkRasterPipeline_kMaxStride_highp] = {};
47 alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
48
49 static_assert(std::size(mask) == SkRasterPipeline_kMaxStride_highp);
50
51 SkRasterPipeline_<256> p;
52 p.append(SkRasterPipelineOp::init_lane_masks);
53 p.append(SkRasterPipelineOp::load_condition_mask, mask);
54 p.append(SkRasterPipelineOp::store_condition_mask, maskCopy);
55 p.append(SkRasterPipelineOp::store_dst, dst);
56 p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
57
58 {
59 // `maskCopy` should be populated with `mask` in the frontmost positions
60 // (depending on the architecture that SkRasterPipeline is targeting).
61 size_t index = 0;
62 for (; index < SkOpts::raster_pipeline_highp_stride; ++index) {
63 REPORTER_ASSERT(r, maskCopy[index] == mask[index]);
64 }
65
66 // The remaining slots should have been left alone.
67 for (; index < std::size(maskCopy); ++index) {
68 REPORTER_ASSERT(r, maskCopy[index] == 0);
69 }
70 }
71 {
72 // `dr` and `da` should be populated with `mask`.
73 // `dg` and `db` should remain initialized to true.
74 const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
75 const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
76 const int db = 2 * SkOpts::raster_pipeline_highp_stride;
77 const int da = 3 * SkOpts::raster_pipeline_highp_stride;
78 for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
79 REPORTER_ASSERT(r, dst[dr + index] == mask[index]);
80 REPORTER_ASSERT(r, dst[dg + index] == ~0);
81 REPORTER_ASSERT(r, dst[db + index] == ~0);
82 REPORTER_ASSERT(r, dst[da + index] == mask[index]);
83 }
84 }
85 }
86
DEF_TEST(SkRasterPipeline_LoadStoreLoopMask,r)87 DEF_TEST(SkRasterPipeline_LoadStoreLoopMask, r) {
88 alignas(64) int32_t mask[] = {~0, 0, ~0, 0, ~0, ~0, ~0, 0};
89 alignas(64) int32_t maskCopy[SkRasterPipeline_kMaxStride_highp] = {};
90 alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
91
92 static_assert(std::size(mask) == SkRasterPipeline_kMaxStride_highp);
93
94 SkRasterPipeline_<256> p;
95 p.append(SkRasterPipelineOp::init_lane_masks);
96 p.append(SkRasterPipelineOp::load_loop_mask, mask);
97 p.append(SkRasterPipelineOp::store_loop_mask, maskCopy);
98 p.append(SkRasterPipelineOp::store_dst, dst);
99 p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
100
101 {
102 // `maskCopy` should be populated with `mask` in the frontmost positions
103 // (depending on the architecture that SkRasterPipeline is targeting).
104 size_t index = 0;
105 for (; index < SkOpts::raster_pipeline_highp_stride; ++index) {
106 REPORTER_ASSERT(r, maskCopy[index] == mask[index]);
107 }
108
109 // The remaining slots should have been left alone.
110 for (; index < std::size(maskCopy); ++index) {
111 REPORTER_ASSERT(r, maskCopy[index] == 0);
112 }
113 }
114 {
115 // `dg` and `da` should be populated with `mask`.
116 // `dr` and `db` should remain initialized to true.
117 const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
118 const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
119 const int db = 2 * SkOpts::raster_pipeline_highp_stride;
120 const int da = 3 * SkOpts::raster_pipeline_highp_stride;
121 for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
122 REPORTER_ASSERT(r, dst[dr + index] == ~0);
123 REPORTER_ASSERT(r, dst[dg + index] == mask[index]);
124 REPORTER_ASSERT(r, dst[db + index] == ~0);
125 REPORTER_ASSERT(r, dst[da + index] == mask[index]);
126 }
127 }
128 }
129
DEF_TEST(SkRasterPipeline_LoadStoreReturnMask,r)130 DEF_TEST(SkRasterPipeline_LoadStoreReturnMask, r) {
131 alignas(64) int32_t mask[] = {~0, 0, ~0, 0, ~0, ~0, ~0, 0};
132 alignas(64) int32_t maskCopy[SkRasterPipeline_kMaxStride_highp] = {};
133 alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
134
135 static_assert(std::size(mask) == SkRasterPipeline_kMaxStride_highp);
136
137 SkRasterPipeline_<256> p;
138 p.append(SkRasterPipelineOp::init_lane_masks);
139 p.append(SkRasterPipelineOp::load_return_mask, mask);
140 p.append(SkRasterPipelineOp::store_return_mask, maskCopy);
141 p.append(SkRasterPipelineOp::store_dst, dst);
142 p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
143
144 {
145 // `maskCopy` should be populated with `mask` in the frontmost positions
146 // (depending on the architecture that SkRasterPipeline is targeting).
147 size_t index = 0;
148 for (; index < SkOpts::raster_pipeline_highp_stride; ++index) {
149 REPORTER_ASSERT(r, maskCopy[index] == mask[index]);
150 }
151
152 // The remaining slots should have been left alone.
153 for (; index < std::size(maskCopy); ++index) {
154 REPORTER_ASSERT(r, maskCopy[index] == 0);
155 }
156 }
157 {
158 // `db` and `da` should be populated with `mask`.
159 // `dr` and `dg` should remain initialized to true.
160 const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
161 const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
162 const int db = 2 * SkOpts::raster_pipeline_highp_stride;
163 const int da = 3 * SkOpts::raster_pipeline_highp_stride;
164 for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
165 REPORTER_ASSERT(r, dst[dr + index] == ~0);
166 REPORTER_ASSERT(r, dst[dg + index] == ~0);
167 REPORTER_ASSERT(r, dst[db + index] == mask[index]);
168 REPORTER_ASSERT(r, dst[da + index] == mask[index]);
169 }
170 }
171 }
172
DEF_TEST(SkRasterPipeline_MergeConditionMask,r)173 DEF_TEST(SkRasterPipeline_MergeConditionMask, r) {
174 alignas(64) int32_t mask[] = { 0, 0, ~0, ~0, 0, ~0, 0, ~0,
175 ~0, ~0, ~0, ~0, 0, 0, 0, 0};
176 alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
177 static_assert(std::size(mask) == (2 * SkRasterPipeline_kMaxStride_highp));
178
179 SkRasterPipeline_<256> p;
180 p.append(SkRasterPipelineOp::init_lane_masks);
181 p.append(SkRasterPipelineOp::merge_condition_mask, mask);
182 p.append(SkRasterPipelineOp::store_dst, dst);
183 p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
184
185 // `dr` and `da` should be populated with `mask[x] & mask[y]` in the frontmost positions.
186 // `dg` and `db` should remain initialized to true.
187 const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
188 const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
189 const int db = 2 * SkOpts::raster_pipeline_highp_stride;
190 const int da = 3 * SkOpts::raster_pipeline_highp_stride;
191 for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
192 int32_t expected = mask[index] & mask[index + SkOpts::raster_pipeline_highp_stride];
193 REPORTER_ASSERT(r, dst[dr + index] == expected);
194 REPORTER_ASSERT(r, dst[dg + index] == ~0);
195 REPORTER_ASSERT(r, dst[db + index] == ~0);
196 REPORTER_ASSERT(r, dst[da + index] == expected);
197 }
198 }
199
DEF_TEST(SkRasterPipeline_MergeLoopMask,r)200 DEF_TEST(SkRasterPipeline_MergeLoopMask, r) {
201 alignas(64) int32_t initial[] = {~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, // dr (condition)
202 ~0, 0, ~0, 0, ~0, ~0, ~0, ~0, // dg (loop)
203 ~0, ~0, ~0, ~0, ~0, ~0, 0, ~0, // db (return)
204 ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0}; // da (combined)
205 alignas(64) int32_t mask[] = { 0, ~0, ~0, 0, ~0, ~0, ~0, ~0};
206 alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
207 static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp));
208
209 SkRasterPipeline_<256> p;
210 p.append(SkRasterPipelineOp::load_dst, initial);
211 p.append(SkRasterPipelineOp::merge_loop_mask, mask);
212 p.append(SkRasterPipelineOp::store_dst, dst);
213 p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
214
215 const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
216 const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
217 const int db = 2 * SkOpts::raster_pipeline_highp_stride;
218 const int da = 3 * SkOpts::raster_pipeline_highp_stride;
219 for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
220 // `dg` should contain `dg & mask` in each lane.
221 REPORTER_ASSERT(r, dst[dg + index] == (initial[dg + index] & mask[index]));
222
223 // `dr` and `db` should be unchanged.
224 REPORTER_ASSERT(r, dst[dr + index] == initial[dr + index]);
225 REPORTER_ASSERT(r, dst[db + index] == initial[db + index]);
226
227 // `da` should contain `dr & dg & gb`.
228 REPORTER_ASSERT(r, dst[da + index] == (dst[dr+index] & dst[dg+index] & dst[db+index]));
229 }
230 }
231
DEF_TEST(SkRasterPipeline_ReenableLoopMask,r)232 DEF_TEST(SkRasterPipeline_ReenableLoopMask, r) {
233 alignas(64) int32_t initial[] = {~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, // dr (condition)
234 ~0, 0, ~0, 0, ~0, ~0, 0, ~0, // dg (loop)
235 0, ~0, ~0, ~0, 0, 0, 0, ~0, // db (return)
236 0, 0, ~0, 0, 0, 0, 0, ~0}; // da (combined)
237 alignas(64) int32_t mask[] = { 0, ~0, 0, 0, 0, 0, ~0, 0};
238 alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
239 static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp));
240
241 SkRasterPipeline_<256> p;
242 p.append(SkRasterPipelineOp::load_dst, initial);
243 p.append(SkRasterPipelineOp::reenable_loop_mask, mask);
244 p.append(SkRasterPipelineOp::store_dst, dst);
245 p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
246
247 const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
248 const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
249 const int db = 2 * SkOpts::raster_pipeline_highp_stride;
250 const int da = 3 * SkOpts::raster_pipeline_highp_stride;
251 for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
252 // `dg` should contain `dg | mask` in each lane.
253 REPORTER_ASSERT(r, dst[dg + index] == (initial[dg + index] | mask[index]));
254
255 // `dr` and `db` should be unchanged.
256 REPORTER_ASSERT(r, dst[dr + index] == initial[dr + index]);
257 REPORTER_ASSERT(r, dst[db + index] == initial[db + index]);
258
259 // `da` should contain `dr & dg & gb`.
260 REPORTER_ASSERT(r, dst[da + index] == (dst[dr+index] & dst[dg+index] & dst[db+index]));
261 }
262 }
263
DEF_TEST(SkRasterPipeline_CaseOp,r)264 DEF_TEST(SkRasterPipeline_CaseOp, r) {
265 alignas(64) int32_t initial[] = {~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, // dr (condition)
266 0, ~0, ~0, 0, ~0, ~0, 0, ~0, // dg (loop)
267 ~0, 0, ~0, ~0, 0, 0, 0, ~0, // db (return)
268 0, 0, ~0, 0, 0, 0, 0, ~0}; // da (combined)
269 alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
270 static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp));
271
272 constexpr int32_t actualValues[] = { 2, 1, 2, 4, 5, 2, 2, 8};
273 static_assert(std::size(actualValues) == SkRasterPipeline_kMaxStride_highp);
274
275 alignas(64) int32_t caseOpData[2 * SkRasterPipeline_kMaxStride_highp];
276 for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
277 caseOpData[0 * SkOpts::raster_pipeline_highp_stride + index] = actualValues[index];
278 caseOpData[1 * SkOpts::raster_pipeline_highp_stride + index] = ~0;
279 }
280
281 SkRasterPipeline_CaseOpCtx ctx;
282 ctx.ptr = caseOpData;
283 ctx.expectedValue = 2;
284
285 SkRasterPipeline_<256> p;
286 p.append(SkRasterPipelineOp::load_dst, initial);
287 p.append(SkRasterPipelineOp::case_op, &ctx);
288 p.append(SkRasterPipelineOp::store_dst, dst);
289 p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
290
291 const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
292 const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
293 const int db = 2 * SkOpts::raster_pipeline_highp_stride;
294 const int da = 3 * SkOpts::raster_pipeline_highp_stride;
295 const int actualValueIdx = 0 * SkOpts::raster_pipeline_highp_stride;
296 const int defaultMaskIdx = 1 * SkOpts::raster_pipeline_highp_stride;
297
298 for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
299 // `dg` should have been set to true for each lane containing 2.
300 int32_t expected = (actualValues[index] == 2) ? ~0 : initial[dg + index];
301 REPORTER_ASSERT(r, dst[dg + index] == expected);
302
303 // `dr` and `db` should be unchanged.
304 REPORTER_ASSERT(r, dst[dr + index] == initial[dr + index]);
305 REPORTER_ASSERT(r, dst[db + index] == initial[db + index]);
306
307 // `da` should contain `dr & dg & gb`.
308 REPORTER_ASSERT(r, dst[da + index] == (dst[dr+index] & dst[dg+index] & dst[db+index]));
309
310 // The actual-value part of `caseOpData` should be unchanged from the inputs.
311 REPORTER_ASSERT(r, caseOpData[actualValueIdx + index] == actualValues[index]);
312
313 // The default-mask part of `caseOpData` should have been zeroed where the values matched.
314 expected = (actualValues[index] == 2) ? 0 : ~0;
315 REPORTER_ASSERT(r, caseOpData[defaultMaskIdx + index] == expected);
316 }
317 }
318
DEF_TEST(SkRasterPipeline_MaskOffLoopMask,r)319 DEF_TEST(SkRasterPipeline_MaskOffLoopMask, r) {
320 alignas(64) int32_t initial[] = {~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, // dr (condition)
321 ~0, 0, ~0, ~0, 0, 0, 0, ~0, // dg (loop)
322 ~0, ~0, 0, ~0, 0, 0, ~0, ~0, // db (return)
323 ~0, 0, 0, ~0, 0, 0, 0, ~0}; // da (combined)
324 alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
325 static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp));
326
327 SkRasterPipeline_<256> p;
328 p.append(SkRasterPipelineOp::load_dst, initial);
329 p.append(SkRasterPipelineOp::mask_off_loop_mask);
330 p.append(SkRasterPipelineOp::store_dst, dst);
331 p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
332
333 const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
334 const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
335 const int db = 2 * SkOpts::raster_pipeline_highp_stride;
336 const int da = 3 * SkOpts::raster_pipeline_highp_stride;
337 for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
338 // `dg` should have masked off any lanes that are currently executing.
339 int32_t expected = initial[dg + index] & ~initial[da + index];
340 REPORTER_ASSERT(r, dst[dg + index] == expected);
341
342 // `da` should contain `dr & dg & gb`.
343 expected = dst[dr + index] & dst[dg + index] & dst[db + index];
344 REPORTER_ASSERT(r, dst[da + index] == expected);
345 }
346 }
347
DEF_TEST(SkRasterPipeline_MaskOffReturnMask,r)348 DEF_TEST(SkRasterPipeline_MaskOffReturnMask, r) {
349 alignas(64) int32_t initial[] = {~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, // dr (condition)
350 ~0, 0, ~0, ~0, 0, 0, 0, ~0, // dg (loop)
351 ~0, ~0, 0, ~0, 0, 0, ~0, ~0, // db (return)
352 ~0, 0, 0, ~0, 0, 0, 0, ~0}; // da (combined)
353 alignas(64) int32_t dst[4 * SkRasterPipeline_kMaxStride_highp] = {};
354 static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp));
355
356 SkRasterPipeline_<256> p;
357 p.append(SkRasterPipelineOp::load_dst, initial);
358 p.append(SkRasterPipelineOp::mask_off_return_mask);
359 p.append(SkRasterPipelineOp::store_dst, dst);
360 p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
361
362 const int dr = 0 * SkOpts::raster_pipeline_highp_stride;
363 const int dg = 1 * SkOpts::raster_pipeline_highp_stride;
364 const int db = 2 * SkOpts::raster_pipeline_highp_stride;
365 const int da = 3 * SkOpts::raster_pipeline_highp_stride;
366 for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) {
367 // `db` should have masked off any lanes that are currently executing.
368 int32_t expected = initial[db + index] & ~initial[da + index];
369 REPORTER_ASSERT(r, dst[db + index] == expected);
370
371 // `da` should contain `dr & dg & gb`.
372 expected = dst[dr + index] & dst[dg + index] & dst[db + index];
373 REPORTER_ASSERT(r, dst[da + index] == expected);
374 }
375 }
376
DEF_TEST(SkRasterPipeline_InitLaneMasks,r)377 DEF_TEST(SkRasterPipeline_InitLaneMasks, r) {
378 for (size_t width = 1; width <= SkOpts::raster_pipeline_highp_stride; ++width) {
379 SkRasterPipeline_<256> p;
380
381 // Initialize dRGBA to unrelated values.
382 SkRasterPipeline_UniformColorCtx uniformCtx;
383 uniformCtx.a = 0.0f;
384 uniformCtx.r = 0.25f;
385 uniformCtx.g = 0.50f;
386 uniformCtx.b = 0.75f;
387 p.append(SkRasterPipelineOp::uniform_color_dst, &uniformCtx);
388
389 // Overwrite dRGB with lane masks up to the tail width.
390 p.append(SkRasterPipelineOp::init_lane_masks);
391
392 // Use the store_dst command to write out dRGBA for inspection.
393 alignas(64) int32_t dRGBA[4 * SkRasterPipeline_kMaxStride_highp] = {};
394 p.append(SkRasterPipelineOp::store_dst, dRGBA);
395
396 // Execute our program.
397 p.run(0,0,width,1);
398
399 // Initialized data should look like on/on/on/on (RGBA are all set) and is
400 // striped by the raster pipeline stride because we wrote it using store_dst.
401 size_t index = 0;
402 int32_t* channelR = dRGBA;
403 int32_t* channelG = channelR + SkOpts::raster_pipeline_highp_stride;
404 int32_t* channelB = channelG + SkOpts::raster_pipeline_highp_stride;
405 int32_t* channelA = channelB + SkOpts::raster_pipeline_highp_stride;
406 for (; index < width; ++index) {
407 REPORTER_ASSERT(r, *channelR++ == ~0);
408 REPORTER_ASSERT(r, *channelG++ == ~0);
409 REPORTER_ASSERT(r, *channelB++ == ~0);
410 REPORTER_ASSERT(r, *channelA++ == ~0);
411 }
412
413 // The rest of the output array should be untouched (all zero).
414 for (; index < SkOpts::raster_pipeline_highp_stride; ++index) {
415 REPORTER_ASSERT(r, *channelR++ == 0);
416 REPORTER_ASSERT(r, *channelG++ == 0);
417 REPORTER_ASSERT(r, *channelB++ == 0);
418 REPORTER_ASSERT(r, *channelA++ == 0);
419 }
420 }
421 }
422
DEF_TEST(SkRasterPipeline_CopyFromIndirectMasked,r)423 DEF_TEST(SkRasterPipeline_CopyFromIndirectMasked, r) {
424 // Allocate space for 5 source slots, and 5 dest slots.
425 alignas(64) float src[5 * SkRasterPipeline_kMaxStride_highp];
426 alignas(64) float dst[5 * SkRasterPipeline_kMaxStride_highp];
427
428 // Test with various mixes of indirect offsets.
429 static_assert(SkRasterPipeline_kMaxStride_highp == 8);
430 alignas(64) const uint32_t kOffsets1[8] = {0, 0, 0, 0, 0, 0, 0, 0};
431 alignas(64) const uint32_t kOffsets2[8] = {2, 2, 2, 2, 2, 2, 2, 2};
432 alignas(64) const uint32_t kOffsets3[8] = {0, 2, 0, 2, 0, 2, 0, 2};
433 alignas(64) const uint32_t kOffsets4[8] = {99, 99, 0, 0, 99, 99, 0, 0};
434
435 alignas(64) const int32_t kMask1[8] = {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0};
436 alignas(64) const int32_t kMask2[8] = { 0, 0, 0, 0, 0, 0, 0, 0};
437 alignas(64) const int32_t kMask3[8] = {~0, 0, ~0, ~0, ~0, ~0, 0, ~0};
438 alignas(64) const int32_t kMask4[8] = { 0, ~0, 0, 0, 0, ~0, ~0, 0};
439
440 const int N = SkOpts::raster_pipeline_highp_stride;
441
442 for (const uint32_t* offsets : {kOffsets1, kOffsets2, kOffsets3, kOffsets4}) {
443 for (const int32_t* mask : {kMask1, kMask2, kMask3, kMask4}) {
444 for (int copySize = 1; copySize <= 5; ++copySize) {
445 // Initialize the destination slots to 0,1,2.. and the source slots
446 // to 1000,1001,1002...
447 std::iota(&dst[0], &dst[5 * N], 0.0f);
448 std::iota(&src[0], &src[5 * N], 1000.0f);
449
450 // Run `copy_from_indirect_masked` over our data.
451 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
452 SkRasterPipeline p(&alloc);
453 auto* ctx = alloc.make<SkRasterPipeline_CopyIndirectCtx>();
454 ctx->dst = &dst[0];
455 ctx->src = &src[0];
456 ctx->indirectOffset = offsets;
457 ctx->indirectLimit = 5 - copySize;
458 ctx->slots = copySize;
459
460 p.append(SkRasterPipelineOp::init_lane_masks);
461 p.append(SkRasterPipelineOp::load_condition_mask, mask);
462 p.append(SkRasterPipelineOp::copy_from_indirect_masked, ctx);
463 p.run(0,0,N,1);
464
465 // If the offset plus copy-size would overflow the source data, the results don't
466 // matter; indexing off the end of the buffer is UB, and we don't make any promises
467 // about the values you get. If we didn't crash, that's success. (In practice, we
468 // will have clamped the source pointer so that we don't read past the end.)
469 int maxOffset = *std::max_element(offsets, offsets + N);
470 if (copySize + maxOffset > 5) {
471 continue;
472 }
473
474 // Verify that the destination has been overwritten in the mask-on fields, and has
475 // not been overwritten in the mask-off fields, for each destination slot.
476 float expectedUnchanged = 0.0f;
477 float expectedFromZero = src[0 * N], expectedFromTwo = src[2 * N];
478 float* destPtr = dst;
479 for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
480 for (int checkLane = 0; checkLane < N; ++checkLane) {
481 if (checkSlot < copySize && mask[checkLane]) {
482 if (offsets[checkLane] == 0) {
483 REPORTER_ASSERT(r, *destPtr == expectedFromZero);
484 } else if (offsets[checkLane] == 2) {
485 REPORTER_ASSERT(r, *destPtr == expectedFromTwo);
486 } else {
487 ERRORF(r, "unexpected offset value");
488 }
489 } else {
490 REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
491 }
492
493 ++destPtr;
494 expectedUnchanged += 1.0f;
495 expectedFromZero += 1.0f;
496 expectedFromTwo += 1.0f;
497 }
498 }
499 }
500 }
501 }
502 }
503
DEF_TEST(SkRasterPipeline_CopySlotsMasked,r)504 DEF_TEST(SkRasterPipeline_CopySlotsMasked, r) {
505 // Allocate space for 5 source slots and 5 dest slots.
506 alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
507 const int srcIndex = 0, dstIndex = 5;
508
509 struct CopySlotsOp {
510 SkRasterPipelineOp stage;
511 int numSlotsAffected;
512 };
513
514 static const CopySlotsOp kCopyOps[] = {
515 {SkRasterPipelineOp::copy_slot_masked, 1},
516 {SkRasterPipelineOp::copy_2_slots_masked, 2},
517 {SkRasterPipelineOp::copy_3_slots_masked, 3},
518 {SkRasterPipelineOp::copy_4_slots_masked, 4},
519 };
520
521 static_assert(SkRasterPipeline_kMaxStride_highp == 8);
522 alignas(64) const int32_t kMask1[8] = {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0};
523 alignas(64) const int32_t kMask2[8] = { 0, 0, 0, 0, 0, 0, 0, 0};
524 alignas(64) const int32_t kMask3[8] = {~0, 0, ~0, ~0, ~0, ~0, 0, ~0};
525 alignas(64) const int32_t kMask4[8] = { 0, ~0, 0, 0, 0, ~0, ~0, 0};
526
527 const int N = SkOpts::raster_pipeline_highp_stride;
528
529 for (const CopySlotsOp& op : kCopyOps) {
530 for (const int32_t* mask : {kMask1, kMask2, kMask3, kMask4}) {
531 // Initialize the destination slots to 0,1,2.. and the source slots to 1000,1001,1002...
532 std::iota(&slots[N * dstIndex], &slots[N * (dstIndex + 5)], 0.0f);
533 std::iota(&slots[N * srcIndex], &slots[N * (srcIndex + 5)], 1000.0f);
534
535 // Run `copy_slots_masked` over our data.
536 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
537 SkRasterPipeline p(&alloc);
538 auto* ctx = alloc.make<SkRasterPipeline_BinaryOpCtx>();
539 ctx->dst = &slots[N * dstIndex];
540 ctx->src = &slots[N * srcIndex];
541
542 p.append(SkRasterPipelineOp::init_lane_masks);
543 p.append(SkRasterPipelineOp::load_condition_mask, mask);
544 p.append(op.stage, ctx);
545 p.run(0,0,N,1);
546
547 // Verify that the destination has been overwritten in the mask-on fields, and has not
548 // been overwritten in the mask-off fields, for each destination slot.
549 float expectedUnchanged = 0.0f, expectedChanged = 1000.0f;
550 float* destPtr = &slots[N * dstIndex];
551 for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
552 for (int checkMask = 0; checkMask < N; ++checkMask) {
553 if (checkSlot < op.numSlotsAffected && mask[checkMask]) {
554 REPORTER_ASSERT(r, *destPtr == expectedChanged);
555 } else {
556 REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
557 }
558
559 ++destPtr;
560 expectedUnchanged += 1.0f;
561 expectedChanged += 1.0f;
562 }
563 }
564 }
565 }
566 }
567
DEF_TEST(SkRasterPipeline_CopySlotsUnmasked,r)568 DEF_TEST(SkRasterPipeline_CopySlotsUnmasked, r) {
569 // Allocate space for 5 source slots and 5 dest slots.
570 alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
571 const int srcIndex = 0, dstIndex = 5;
572 const int N = SkOpts::raster_pipeline_highp_stride;
573
574 struct CopySlotsOp {
575 SkRasterPipelineOp stage;
576 int numSlotsAffected;
577 };
578
579 static const CopySlotsOp kCopyOps[] = {
580 {SkRasterPipelineOp::copy_slot_unmasked, 1},
581 {SkRasterPipelineOp::copy_2_slots_unmasked, 2},
582 {SkRasterPipelineOp::copy_3_slots_unmasked, 3},
583 {SkRasterPipelineOp::copy_4_slots_unmasked, 4},
584 };
585
586 for (const CopySlotsOp& op : kCopyOps) {
587 // Initialize the destination slots to 0,1,2.. and the source slots to 1000,1001,1002...
588 std::iota(&slots[N * dstIndex], &slots[N * (dstIndex + 5)], 0.0f);
589 std::iota(&slots[N * srcIndex], &slots[N * (srcIndex + 5)], 1000.0f);
590
591 // Run `copy_slots_unmasked` over our data.
592 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
593 SkRasterPipeline p(&alloc);
594 auto* ctx = alloc.make<SkRasterPipeline_BinaryOpCtx>();
595 ctx->dst = &slots[N * dstIndex];
596 ctx->src = &slots[N * srcIndex];
597 p.append(op.stage, ctx);
598 p.run(0,0,1,1);
599
600 // Verify that the destination has been overwritten in each slot.
601 float expectedUnchanged = 0.0f, expectedChanged = 1000.0f;
602 float* destPtr = &slots[N * dstIndex];
603 for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
604 for (int checkLane = 0; checkLane < N; ++checkLane) {
605 if (checkSlot < op.numSlotsAffected) {
606 REPORTER_ASSERT(r, *destPtr == expectedChanged);
607 } else {
608 REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
609 }
610
611 ++destPtr;
612 expectedUnchanged += 1.0f;
613 expectedChanged += 1.0f;
614 }
615 }
616 }
617 }
618
DEF_TEST(SkRasterPipeline_ZeroSlotsUnmasked,r)619 DEF_TEST(SkRasterPipeline_ZeroSlotsUnmasked, r) {
620 // Allocate space for 5 dest slots.
621 alignas(64) float slots[5 * SkRasterPipeline_kMaxStride_highp];
622 const int N = SkOpts::raster_pipeline_highp_stride;
623
624 struct ZeroSlotsOp {
625 SkRasterPipelineOp stage;
626 int numSlotsAffected;
627 };
628
629 static const ZeroSlotsOp kZeroOps[] = {
630 {SkRasterPipelineOp::zero_slot_unmasked, 1},
631 {SkRasterPipelineOp::zero_2_slots_unmasked, 2},
632 {SkRasterPipelineOp::zero_3_slots_unmasked, 3},
633 {SkRasterPipelineOp::zero_4_slots_unmasked, 4},
634 };
635
636 for (const ZeroSlotsOp& op : kZeroOps) {
637 // Initialize the destination slots to 1,2,3...
638 std::iota(&slots[0], &slots[5 * N], 1.0f);
639
640 // Run `zero_slots_unmasked` over our data.
641 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
642 SkRasterPipeline p(&alloc);
643 p.append(op.stage, &slots[0]);
644 p.run(0,0,1,1);
645
646 // Verify that the destination has been zeroed out in each slot.
647 float expectedUnchanged = 1.0f;
648 float* destPtr = &slots[0];
649 for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
650 for (int checkLane = 0; checkLane < N; ++checkLane) {
651 if (checkSlot < op.numSlotsAffected) {
652 REPORTER_ASSERT(r, *destPtr == 0.0f);
653 } else {
654 REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
655 }
656
657 ++destPtr;
658 expectedUnchanged += 1.0f;
659 }
660 }
661 }
662 }
663
DEF_TEST(SkRasterPipeline_CopyConstants,r)664 DEF_TEST(SkRasterPipeline_CopyConstants, r) {
665 // Allocate space for 5 dest slots.
666 alignas(64) float slots[5 * SkRasterPipeline_kMaxStride_highp];
667 float constants[5];
668 const int N = SkOpts::raster_pipeline_highp_stride;
669
670 struct CopySlotsOp {
671 SkRasterPipelineOp stage;
672 int numSlotsAffected;
673 };
674
675 static const CopySlotsOp kCopyOps[] = {
676 {SkRasterPipelineOp::copy_constant, 1},
677 {SkRasterPipelineOp::copy_2_constants, 2},
678 {SkRasterPipelineOp::copy_3_constants, 3},
679 {SkRasterPipelineOp::copy_4_constants, 4},
680 };
681
682 for (const CopySlotsOp& op : kCopyOps) {
683 // Initialize the destination slots to 1,2,3...
684 std::iota(&slots[0], &slots[5 * N], 1.0f);
685 // Initialize the constant buffer to 1000,1001,1002...
686 std::iota(&constants[0], &constants[5], 1000.0f);
687
688 // Run `copy_constants` over our data.
689 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
690 SkRasterPipeline p(&alloc);
691 auto* ctx = alloc.make<SkRasterPipeline_BinaryOpCtx>();
692 ctx->dst = slots;
693 ctx->src = constants;
694 p.append(op.stage, ctx);
695 p.run(0,0,1,1);
696
697 // Verify that our constants have been broadcast into each slot.
698 float expectedUnchanged = 1.0f;
699 float expectedChanged = 1000.0f;
700 float* destPtr = &slots[0];
701 for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
702 for (int checkLane = 0; checkLane < N; ++checkLane) {
703 if (checkSlot < op.numSlotsAffected) {
704 REPORTER_ASSERT(r, *destPtr == expectedChanged);
705 } else {
706 REPORTER_ASSERT(r, *destPtr == expectedUnchanged);
707 }
708
709 ++destPtr;
710 expectedUnchanged += 1.0f;
711 }
712 expectedChanged += 1.0f;
713 }
714 }
715 }
716
DEF_TEST(SkRasterPipeline_Swizzle,r)717 DEF_TEST(SkRasterPipeline_Swizzle, r) {
718 // Allocate space for 4 dest slots.
719 alignas(64) float slots[4 * SkRasterPipeline_kMaxStride_highp];
720 const int N = SkOpts::raster_pipeline_highp_stride;
721
722 struct TestPattern {
723 SkRasterPipelineOp stage;
724 uint16_t swizzle[4];
725 uint16_t expectation[4];
726 };
727 static const TestPattern kPatterns[] = {
728 {SkRasterPipelineOp::swizzle_1, {3}, {3, 1, 2, 3}}, // (1,2,3,4).w = (4)
729 {SkRasterPipelineOp::swizzle_2, {1, 0}, {1, 0, 2, 3}}, // (1,2,3,4).yx = (2,1)
730 {SkRasterPipelineOp::swizzle_3, {2, 2, 2}, {2, 2, 2, 3}}, // (1,2,3,4).zzz = (3,3,3)
731 {SkRasterPipelineOp::swizzle_4, {0, 0, 1, 2}, {0, 0, 1, 2}}, // (1,2,3,4).xxyz = (1,1,2,3)
732 };
733 static_assert(sizeof(TestPattern::swizzle) == sizeof(SkRasterPipeline_SwizzleCtx::offsets));
734
735 for (const TestPattern& pattern : kPatterns) {
736 // Initialize the destination slots to 0,1,2,3...
737 std::iota(&slots[0], &slots[4 * N], 0.0f);
738
739 // Apply the test-pattern swizzle.
740 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
741 SkRasterPipeline p(&alloc);
742 SkRasterPipeline_SwizzleCtx ctx;
743 ctx.ptr = slots;
744 for (size_t index = 0; index < std::size(ctx.offsets); ++index) {
745 ctx.offsets[index] = pattern.swizzle[index] * N * sizeof(float);
746 }
747 p.append(pattern.stage, &ctx);
748 p.run(0,0,1,1);
749
750 // Verify that the swizzle has been applied in each slot.
751 float* destPtr = &slots[0];
752 for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
753 float expected = pattern.expectation[checkSlot] * N;
754 for (int checkLane = 0; checkLane < N; ++checkLane) {
755 REPORTER_ASSERT(r, *destPtr == expected);
756
757 ++destPtr;
758 expected += 1.0f;
759 }
760 }
761 }
762 }
763
DEF_TEST(SkRasterPipeline_SwizzleCopy,r)764 DEF_TEST(SkRasterPipeline_SwizzleCopy, r) {
765 const int N = SkOpts::raster_pipeline_highp_stride;
766
767 struct TestPattern {
768 SkRasterPipelineOp op;
769 uint16_t swizzle[4];
770 uint16_t expectation[4];
771 };
772 constexpr uint16_t _ = ~0;
773 static const TestPattern kPatterns[] = {
774 {SkRasterPipelineOp::swizzle_copy_slot_masked, {3,_,_,_}, {_,_,_,0}},//v.w = (1)
775 {SkRasterPipelineOp::swizzle_copy_2_slots_masked, {1,0,_,_}, {1,0,_,_}},//v.yx = (1,2)
776 {SkRasterPipelineOp::swizzle_copy_3_slots_masked, {2,3,0,_}, {2,_,0,1}},//v.zwy = (1,2,3)
777 {SkRasterPipelineOp::swizzle_copy_4_slots_masked, {3,0,1,2}, {1,2,3,0}},//v.wxyz = (1,2,3,4)
778 };
779 static_assert(sizeof(TestPattern::swizzle) == sizeof(SkRasterPipeline_SwizzleCopyCtx::offsets));
780
781 for (const TestPattern& pattern : kPatterns) {
782 // Allocate space for 4 dest slots, and initialize them to zero.
783 alignas(64) float dest[4 * SkRasterPipeline_kMaxStride_highp] = {};
784
785 // Allocate 4 source slots and initialize them to 1, 2, 3, 4...
786 alignas(64) float source[4 * SkRasterPipeline_kMaxStride_highp] = {};
787 std::iota(&source[0 * N], &source[4 * N], 1.0f);
788
789 // Apply the dest-swizzle pattern.
790 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
791 SkRasterPipeline p(&alloc);
792 SkRasterPipeline_SwizzleCopyCtx ctx = {};
793 ctx.src = source;
794 ctx.dst = dest;
795 for (size_t index = 0; index < std::size(ctx.offsets); ++index) {
796 if (pattern.swizzle[index] != _) {
797 ctx.offsets[index] = pattern.swizzle[index] * N * sizeof(float);
798 }
799 }
800 p.append(SkRasterPipelineOp::init_lane_masks);
801 p.append(pattern.op, &ctx);
802 p.run(0,0,N,1);
803
804 // Verify that the swizzle has been applied in each slot.
805 float* destPtr = &dest[0];
806 for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
807 for (int checkLane = 0; checkLane < N; ++checkLane) {
808 if (pattern.expectation[checkSlot] == _) {
809 REPORTER_ASSERT(r, *destPtr == 0);
810 } else {
811 int expectedIdx = pattern.expectation[checkSlot] * N + checkLane;
812 REPORTER_ASSERT(r, *destPtr == source[expectedIdx]);
813 }
814
815 ++destPtr;
816 }
817 }
818 }
819 }
820
DEF_TEST(SkRasterPipeline_Shuffle,r)821 DEF_TEST(SkRasterPipeline_Shuffle, r) {
822 // Allocate space for 16 dest slots.
823 alignas(64) float slots[16 * SkRasterPipeline_kMaxStride_highp];
824 const int N = SkOpts::raster_pipeline_highp_stride;
825
826 struct TestPattern {
827 int count;
828 uint16_t shuffle[16];
829 uint16_t expectation[16];
830 };
831 static const TestPattern kPatterns[] = {
832 {9, { 0, 3, 6,
833 1, 4, 7,
834 2, 5, 8, /* past end: */ 0, 0, 0, 0, 0, 0, 0},
835 { 0, 3, 6,
836 1, 4, 7,
837 2, 5, 8, /* unchanged: */ 9, 10, 11, 12, 13, 14, 15}},
838 {16, { 0, 4, 8, 12,
839 1, 5, 9, 13,
840 2, 6, 10, 14,
841 3, 7, 11, 15},
842 { 0, 4, 8, 12,
843 1, 5, 9, 13,
844 2, 6, 10, 14,
845 3, 7, 11, 15}},
846 };
847 static_assert(sizeof(TestPattern::shuffle) == sizeof(SkRasterPipeline_ShuffleCtx::offsets));
848
849 for (const TestPattern& pattern : kPatterns) {
850 // Initialize the destination slots to 1,2,3...
851 std::iota(&slots[0], &slots[16 * N], 1.0f);
852
853 // Apply the shuffle.
854 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
855 SkRasterPipeline p(&alloc);
856 SkRasterPipeline_ShuffleCtx ctx;
857 ctx.ptr = slots;
858 ctx.count = pattern.count;
859 for (size_t index = 0; index < std::size(ctx.offsets); ++index) {
860 ctx.offsets[index] = pattern.shuffle[index] * N * sizeof(float);
861 }
862 p.append(SkRasterPipelineOp::shuffle, &ctx);
863 p.run(0,0,1,1);
864
865 // Verify that the shuffle has been applied in each slot.
866 float* destPtr = &slots[0];
867 for (int checkSlot = 0; checkSlot < 16; ++checkSlot) {
868 float expected = pattern.expectation[checkSlot] * N + 1;
869 for (int checkLane = 0; checkLane < N; ++checkLane) {
870 REPORTER_ASSERT(r, *destPtr == expected);
871
872 ++destPtr;
873 expected += 1.0f;
874 }
875 }
876 }
877 }
878
DEF_TEST(SkRasterPipeline_FloatArithmeticWithNSlots,r)879 DEF_TEST(SkRasterPipeline_FloatArithmeticWithNSlots, r) {
880 // Allocate space for 5 dest and 5 source slots.
881 alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
882 const int N = SkOpts::raster_pipeline_highp_stride;
883
884 struct ArithmeticOp {
885 SkRasterPipelineOp stage;
886 std::function<float(float, float)> verify;
887 };
888
889 static const ArithmeticOp kArithmeticOps[] = {
890 {SkRasterPipelineOp::add_n_floats, [](float a, float b) { return a + b; }},
891 {SkRasterPipelineOp::sub_n_floats, [](float a, float b) { return a - b; }},
892 {SkRasterPipelineOp::mul_n_floats, [](float a, float b) { return a * b; }},
893 {SkRasterPipelineOp::div_n_floats, [](float a, float b) { return a / b; }},
894 };
895
896 for (const ArithmeticOp& op : kArithmeticOps) {
897 for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) {
898 // Initialize the slot values to 1,2,3...
899 std::iota(&slots[0], &slots[10 * N], 1.0f);
900
901 // Run the arithmetic op over our data.
902 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
903 SkRasterPipeline p(&alloc);
904 auto* ctx = alloc.make<SkRasterPipeline_BinaryOpCtx>();
905 ctx->dst = &slots[0];
906 ctx->src = &slots[numSlotsAffected * N];
907 p.append(op.stage, ctx);
908 p.run(0,0,1,1);
909
910 // Verify that the affected slots now equal (1,2,3...) op (4,5,6...).
911 float leftValue = 1.0f;
912 float rightValue = float(numSlotsAffected * N) + 1.0f;
913 float* destPtr = &slots[0];
914 for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
915 for (int checkLane = 0; checkLane < N; ++checkLane) {
916 if (checkSlot < numSlotsAffected) {
917 REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue));
918 } else {
919 REPORTER_ASSERT(r, *destPtr == leftValue);
920 }
921
922 ++destPtr;
923 leftValue += 1.0f;
924 rightValue += 1.0f;
925 }
926 }
927 }
928 }
929 }
930
DEF_TEST(SkRasterPipeline_FloatArithmeticWithHardcodedSlots,r)931 DEF_TEST(SkRasterPipeline_FloatArithmeticWithHardcodedSlots, r) {
932 // Allocate space for 5 dest and 5 source slots.
933 alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
934 const int N = SkOpts::raster_pipeline_highp_stride;
935
936 struct ArithmeticOp {
937 SkRasterPipelineOp stage;
938 int numSlotsAffected;
939 std::function<float(float, float)> verify;
940 };
941
942 static const ArithmeticOp kArithmeticOps[] = {
943 {SkRasterPipelineOp::add_float, 1, [](float a, float b) { return a + b; }},
944 {SkRasterPipelineOp::sub_float, 1, [](float a, float b) { return a - b; }},
945 {SkRasterPipelineOp::mul_float, 1, [](float a, float b) { return a * b; }},
946 {SkRasterPipelineOp::div_float, 1, [](float a, float b) { return a / b; }},
947
948 {SkRasterPipelineOp::add_2_floats, 2, [](float a, float b) { return a + b; }},
949 {SkRasterPipelineOp::sub_2_floats, 2, [](float a, float b) { return a - b; }},
950 {SkRasterPipelineOp::mul_2_floats, 2, [](float a, float b) { return a * b; }},
951 {SkRasterPipelineOp::div_2_floats, 2, [](float a, float b) { return a / b; }},
952
953 {SkRasterPipelineOp::add_3_floats, 3, [](float a, float b) { return a + b; }},
954 {SkRasterPipelineOp::sub_3_floats, 3, [](float a, float b) { return a - b; }},
955 {SkRasterPipelineOp::mul_3_floats, 3, [](float a, float b) { return a * b; }},
956 {SkRasterPipelineOp::div_3_floats, 3, [](float a, float b) { return a / b; }},
957
958 {SkRasterPipelineOp::add_4_floats, 4, [](float a, float b) { return a + b; }},
959 {SkRasterPipelineOp::sub_4_floats, 4, [](float a, float b) { return a - b; }},
960 {SkRasterPipelineOp::mul_4_floats, 4, [](float a, float b) { return a * b; }},
961 {SkRasterPipelineOp::div_4_floats, 4, [](float a, float b) { return a / b; }},
962 };
963
964 for (const ArithmeticOp& op : kArithmeticOps) {
965 // Initialize the slot values to 1,2,3...
966 std::iota(&slots[0], &slots[10 * N], 1.0f);
967
968 // Run the arithmetic op over our data.
969 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
970 SkRasterPipeline p(&alloc);
971 p.append(op.stage, &slots[0]);
972 p.run(0,0,1,1);
973
974 // Verify that the affected slots now equal (1,2,3...) op (4,5,6...).
975 float leftValue = 1.0f;
976 float rightValue = float(op.numSlotsAffected * N) + 1.0f;
977 float* destPtr = &slots[0];
978 for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
979 for (int checkLane = 0; checkLane < N; ++checkLane) {
980 if (checkSlot < op.numSlotsAffected) {
981 REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue));
982 } else {
983 REPORTER_ASSERT(r, *destPtr == leftValue);
984 }
985
986 ++destPtr;
987 leftValue += 1.0f;
988 rightValue += 1.0f;
989 }
990 }
991 }
992 }
993
divide_unsigned(int a,int b)994 static int divide_unsigned(int a, int b) { return int(uint32_t(a) / uint32_t(b)); }
min_unsigned(int a,int b)995 static int min_unsigned (int a, int b) { return uint32_t(a) < uint32_t(b) ? a : b; }
max_unsigned(int a,int b)996 static int max_unsigned (int a, int b) { return uint32_t(a) > uint32_t(b) ? a : b; }
997
DEF_TEST(SkRasterPipeline_IntArithmeticWithNSlots,r)998 DEF_TEST(SkRasterPipeline_IntArithmeticWithNSlots, r) {
999 // Allocate space for 5 dest and 5 source slots.
1000 alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp];
1001 const int N = SkOpts::raster_pipeline_highp_stride;
1002
1003 struct ArithmeticOp {
1004 SkRasterPipelineOp stage;
1005 std::function<int(int, int)> verify;
1006 };
1007
1008 static const ArithmeticOp kArithmeticOps[] = {
1009 {SkRasterPipelineOp::add_n_ints, [](int a, int b) { return a + b; }},
1010 {SkRasterPipelineOp::sub_n_ints, [](int a, int b) { return a - b; }},
1011 {SkRasterPipelineOp::mul_n_ints, [](int a, int b) { return a * b; }},
1012 {SkRasterPipelineOp::div_n_ints, [](int a, int b) { return a / b; }},
1013 {SkRasterPipelineOp::div_n_uints, divide_unsigned},
1014 {SkRasterPipelineOp::bitwise_and_n_ints, [](int a, int b) { return a & b; }},
1015 {SkRasterPipelineOp::bitwise_or_n_ints, [](int a, int b) { return a | b; }},
1016 {SkRasterPipelineOp::bitwise_xor_n_ints, [](int a, int b) { return a ^ b; }},
1017 {SkRasterPipelineOp::min_n_ints, [](int a, int b) { return a < b ? a : b; }},
1018 {SkRasterPipelineOp::min_n_uints, min_unsigned},
1019 {SkRasterPipelineOp::max_n_ints, [](int a, int b) { return a > b ? a : b; }},
1020 {SkRasterPipelineOp::max_n_uints, max_unsigned},
1021 };
1022
1023 for (const ArithmeticOp& op : kArithmeticOps) {
1024 for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) {
1025 // Initialize the slot values to 1,2,3...
1026 std::iota(&slots[0], &slots[10 * N], 1);
1027 int leftValue = slots[0];
1028 int rightValue = slots[numSlotsAffected * N];
1029
1030 // Run the op (e.g. `add_n_ints`) over our data.
1031 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1032 SkRasterPipeline p(&alloc);
1033 auto* ctx = alloc.make<SkRasterPipeline_BinaryOpCtx>();
1034 ctx->dst = (float*)&slots[0];
1035 ctx->src = (float*)&slots[numSlotsAffected * N];
1036 p.append(op.stage, ctx);
1037 p.run(0,0,1,1);
1038
1039 // Verify that the affected slots now equal (1,2,3...) op (4,5,6...).
1040 int* destPtr = &slots[0];
1041 for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1042 for (int checkLane = 0; checkLane < N; ++checkLane) {
1043 if (checkSlot < numSlotsAffected) {
1044 REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue));
1045 } else {
1046 REPORTER_ASSERT(r, *destPtr == leftValue);
1047 }
1048
1049 ++destPtr;
1050 leftValue += 1;
1051 rightValue += 1;
1052 }
1053 }
1054 }
1055 }
1056 }
1057
DEF_TEST(SkRasterPipeline_IntArithmeticWithHardcodedSlots,r)1058 DEF_TEST(SkRasterPipeline_IntArithmeticWithHardcodedSlots, r) {
1059 // Allocate space for 5 dest and 5 source slots.
1060 alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp];
1061 const int N = SkOpts::raster_pipeline_highp_stride;
1062
1063 struct ArithmeticOp {
1064 SkRasterPipelineOp stage;
1065 int numSlotsAffected;
1066 std::function<int(int, int)> verify;
1067 };
1068
1069 static const ArithmeticOp kArithmeticOps[] = {
1070 {SkRasterPipelineOp::add_int, 1, [](int a, int b) { return a + b; }},
1071 {SkRasterPipelineOp::sub_int, 1, [](int a, int b) { return a - b; }},
1072 {SkRasterPipelineOp::mul_int, 1, [](int a, int b) { return a * b; }},
1073 {SkRasterPipelineOp::div_int, 1, [](int a, int b) { return a / b; }},
1074 {SkRasterPipelineOp::div_uint, 1, divide_unsigned},
1075 {SkRasterPipelineOp::bitwise_and_int, 1, [](int a, int b) { return a & b; }},
1076 {SkRasterPipelineOp::bitwise_or_int, 1, [](int a, int b) { return a | b; }},
1077 {SkRasterPipelineOp::bitwise_xor_int, 1, [](int a, int b) { return a ^ b; }},
1078 {SkRasterPipelineOp::min_int, 1, [](int a, int b) { return a < b ? a: b; }},
1079 {SkRasterPipelineOp::min_uint, 1, min_unsigned},
1080 {SkRasterPipelineOp::max_int, 1, [](int a, int b) { return a > b ? a: b; }},
1081 {SkRasterPipelineOp::max_uint, 1, max_unsigned},
1082
1083 {SkRasterPipelineOp::add_2_ints, 2, [](int a, int b) { return a + b; }},
1084 {SkRasterPipelineOp::sub_2_ints, 2, [](int a, int b) { return a - b; }},
1085 {SkRasterPipelineOp::mul_2_ints, 2, [](int a, int b) { return a * b; }},
1086 {SkRasterPipelineOp::div_2_ints, 2, [](int a, int b) { return a / b; }},
1087 {SkRasterPipelineOp::div_2_uints, 2, divide_unsigned},
1088 {SkRasterPipelineOp::bitwise_and_2_ints, 2, [](int a, int b) { return a & b; }},
1089 {SkRasterPipelineOp::bitwise_or_2_ints, 2, [](int a, int b) { return a | b; }},
1090 {SkRasterPipelineOp::bitwise_xor_2_ints, 2, [](int a, int b) { return a ^ b; }},
1091 {SkRasterPipelineOp::min_2_ints, 2, [](int a, int b) { return a < b ? a: b; }},
1092 {SkRasterPipelineOp::min_2_uints, 2, min_unsigned},
1093 {SkRasterPipelineOp::max_2_ints, 2, [](int a, int b) { return a > b ? a: b; }},
1094 {SkRasterPipelineOp::max_2_uints, 2, max_unsigned},
1095
1096 {SkRasterPipelineOp::add_3_ints, 3, [](int a, int b) { return a + b; }},
1097 {SkRasterPipelineOp::sub_3_ints, 3, [](int a, int b) { return a - b; }},
1098 {SkRasterPipelineOp::mul_3_ints, 3, [](int a, int b) { return a * b; }},
1099 {SkRasterPipelineOp::div_3_ints, 3, [](int a, int b) { return a / b; }},
1100 {SkRasterPipelineOp::div_3_uints, 3, divide_unsigned},
1101 {SkRasterPipelineOp::bitwise_and_3_ints, 3, [](int a, int b) { return a & b; }},
1102 {SkRasterPipelineOp::bitwise_or_3_ints, 3, [](int a, int b) { return a | b; }},
1103 {SkRasterPipelineOp::bitwise_xor_3_ints, 3, [](int a, int b) { return a ^ b; }},
1104 {SkRasterPipelineOp::min_3_ints, 3, [](int a, int b) { return a < b ? a: b; }},
1105 {SkRasterPipelineOp::min_3_uints, 3, min_unsigned},
1106 {SkRasterPipelineOp::max_3_ints, 3, [](int a, int b) { return a > b ? a: b; }},
1107 {SkRasterPipelineOp::max_3_uints, 3, max_unsigned},
1108
1109 {SkRasterPipelineOp::add_4_ints, 4, [](int a, int b) { return a + b; }},
1110 {SkRasterPipelineOp::sub_4_ints, 4, [](int a, int b) { return a - b; }},
1111 {SkRasterPipelineOp::mul_4_ints, 4, [](int a, int b) { return a * b; }},
1112 {SkRasterPipelineOp::div_4_ints, 4, [](int a, int b) { return a / b; }},
1113 {SkRasterPipelineOp::div_4_uints, 4, divide_unsigned},
1114 {SkRasterPipelineOp::bitwise_and_4_ints, 4, [](int a, int b) { return a & b; }},
1115 {SkRasterPipelineOp::bitwise_or_4_ints, 4, [](int a, int b) { return a | b; }},
1116 {SkRasterPipelineOp::bitwise_xor_4_ints, 4, [](int a, int b) { return a ^ b; }},
1117 {SkRasterPipelineOp::min_4_ints, 4, [](int a, int b) { return a < b ? a: b; }},
1118 {SkRasterPipelineOp::min_4_uints, 4, min_unsigned},
1119 {SkRasterPipelineOp::max_4_ints, 4, [](int a, int b) { return a > b ? a: b; }},
1120 {SkRasterPipelineOp::max_4_uints, 4, max_unsigned},
1121 };
1122
1123 for (const ArithmeticOp& op : kArithmeticOps) {
1124 // Initialize the slot values to 1,2,3...
1125 std::iota(&slots[0], &slots[10 * N], 1);
1126 int leftValue = slots[0];
1127 int rightValue = slots[op.numSlotsAffected * N];
1128
1129 // Run the op (e.g. `add_2_ints`) over our data.
1130 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1131 SkRasterPipeline p(&alloc);
1132 p.append(op.stage, &slots[0]);
1133 p.run(0,0,1,1);
1134
1135 // Verify that the affected slots now equal (1,2,3...) op (4,5,6...).
1136 int* destPtr = &slots[0];
1137 for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1138 for (int checkLane = 0; checkLane < N; ++checkLane) {
1139 if (checkSlot < op.numSlotsAffected) {
1140 REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue));
1141 } else {
1142 REPORTER_ASSERT(r, *destPtr == leftValue);
1143 }
1144
1145 ++destPtr;
1146 leftValue += 1;
1147 rightValue += 1;
1148 }
1149 }
1150 }
1151 }
1152
DEF_TEST(SkRasterPipeline_CompareFloatsWithNSlots,r)1153 DEF_TEST(SkRasterPipeline_CompareFloatsWithNSlots, r) {
1154 // Allocate space for 5 dest and 5 source slots.
1155 alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
1156 const int N = SkOpts::raster_pipeline_highp_stride;
1157
1158 struct CompareOp {
1159 SkRasterPipelineOp stage;
1160 std::function<bool(float, float)> verify;
1161 };
1162
1163 static const CompareOp kCompareOps[] = {
1164 {SkRasterPipelineOp::cmpeq_n_floats, [](float a, float b) { return a == b; }},
1165 {SkRasterPipelineOp::cmpne_n_floats, [](float a, float b) { return a != b; }},
1166 {SkRasterPipelineOp::cmplt_n_floats, [](float a, float b) { return a < b; }},
1167 {SkRasterPipelineOp::cmple_n_floats, [](float a, float b) { return a <= b; }},
1168 };
1169
1170 for (const CompareOp& op : kCompareOps) {
1171 for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) {
1172 // Initialize the slot values to 0,1,2,0,1,2,0,1,2...
1173 for (int index = 0; index < 10 * N; ++index) {
1174 slots[index] = std::fmod(index, 3.0f);
1175 }
1176
1177 float leftValue = slots[0];
1178 float rightValue = slots[numSlotsAffected * N];
1179
1180 // Run the comparison op over our data.
1181 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1182 SkRasterPipeline p(&alloc);
1183 auto* ctx = alloc.make<SkRasterPipeline_BinaryOpCtx>();
1184 ctx->dst = &slots[0];
1185 ctx->src = &slots[numSlotsAffected * N];
1186 p.append(op.stage, ctx);
1187 p.run(0, 0, 1, 1);
1188
1189 // Verify that the affected slots now contain "(0,1,2,0...) op (1,2,0,1...)".
1190 float* destPtr = &slots[0];
1191 for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1192 for (int checkLane = 0; checkLane < N; ++checkLane) {
1193 if (checkSlot < numSlotsAffected) {
1194 bool compareIsTrue = op.verify(leftValue, rightValue);
1195 REPORTER_ASSERT(r, *(int*)destPtr == (compareIsTrue ? ~0 : 0));
1196 } else {
1197 REPORTER_ASSERT(r, *destPtr == leftValue);
1198 }
1199
1200 ++destPtr;
1201 leftValue = std::fmod(leftValue + 1.0f, 3.0f);
1202 rightValue = std::fmod(rightValue + 1.0f, 3.0f);
1203 }
1204 }
1205 }
1206 }
1207 }
1208
DEF_TEST(SkRasterPipeline_CompareFloatsWithHardcodedSlots,r)1209 DEF_TEST(SkRasterPipeline_CompareFloatsWithHardcodedSlots, r) {
1210 // Allocate space for 5 dest and 5 source slots.
1211 alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp];
1212 const int N = SkOpts::raster_pipeline_highp_stride;
1213
1214 struct CompareOp {
1215 SkRasterPipelineOp stage;
1216 int numSlotsAffected;
1217 std::function<bool(float, float)> verify;
1218 };
1219
1220 static const CompareOp kCompareOps[] = {
1221 {SkRasterPipelineOp::cmpeq_float, 1, [](float a, float b) { return a == b; }},
1222 {SkRasterPipelineOp::cmpne_float, 1, [](float a, float b) { return a != b; }},
1223 {SkRasterPipelineOp::cmplt_float, 1, [](float a, float b) { return a < b; }},
1224 {SkRasterPipelineOp::cmple_float, 1, [](float a, float b) { return a <= b; }},
1225
1226 {SkRasterPipelineOp::cmpeq_2_floats, 2, [](float a, float b) { return a == b; }},
1227 {SkRasterPipelineOp::cmpne_2_floats, 2, [](float a, float b) { return a != b; }},
1228 {SkRasterPipelineOp::cmplt_2_floats, 2, [](float a, float b) { return a < b; }},
1229 {SkRasterPipelineOp::cmple_2_floats, 2, [](float a, float b) { return a <= b; }},
1230
1231 {SkRasterPipelineOp::cmpeq_3_floats, 3, [](float a, float b) { return a == b; }},
1232 {SkRasterPipelineOp::cmpne_3_floats, 3, [](float a, float b) { return a != b; }},
1233 {SkRasterPipelineOp::cmplt_3_floats, 3, [](float a, float b) { return a < b; }},
1234 {SkRasterPipelineOp::cmple_3_floats, 3, [](float a, float b) { return a <= b; }},
1235
1236 {SkRasterPipelineOp::cmpeq_4_floats, 4, [](float a, float b) { return a == b; }},
1237 {SkRasterPipelineOp::cmpne_4_floats, 4, [](float a, float b) { return a != b; }},
1238 {SkRasterPipelineOp::cmplt_4_floats, 4, [](float a, float b) { return a < b; }},
1239 {SkRasterPipelineOp::cmple_4_floats, 4, [](float a, float b) { return a <= b; }},
1240 };
1241
1242 for (const CompareOp& op : kCompareOps) {
1243 // Initialize the slot values to 0,1,2,0,1,2,0,1,2...
1244 for (int index = 0; index < 10 * N; ++index) {
1245 slots[index] = std::fmod(index, 3.0f);
1246 }
1247
1248 float leftValue = slots[0];
1249 float rightValue = slots[op.numSlotsAffected * N];
1250
1251 // Run the comparison op over our data.
1252 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1253 SkRasterPipeline p(&alloc);
1254 p.append(op.stage, &slots[0]);
1255 p.run(0, 0, 1, 1);
1256
1257 // Verify that the affected slots now contain "(0,1,2,0...) op (1,2,0,1...)".
1258 float* destPtr = &slots[0];
1259 for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1260 for (int checkLane = 0; checkLane < N; ++checkLane) {
1261 if (checkSlot < op.numSlotsAffected) {
1262 bool compareIsTrue = op.verify(leftValue, rightValue);
1263 REPORTER_ASSERT(r, *(int*)destPtr == (compareIsTrue ? ~0 : 0));
1264 } else {
1265 REPORTER_ASSERT(r, *destPtr == leftValue);
1266 }
1267
1268 ++destPtr;
1269 leftValue = std::fmod(leftValue + 1.0f, 3.0f);
1270 rightValue = std::fmod(rightValue + 1.0f, 3.0f);
1271 }
1272 }
1273 }
1274 }
1275
compare_lt_uint(int a,int b)1276 static bool compare_lt_uint (int a, int b) { return uint32_t(a) < uint32_t(b); }
compare_lteq_uint(int a,int b)1277 static bool compare_lteq_uint(int a, int b) { return uint32_t(a) <= uint32_t(b); }
1278
DEF_TEST(SkRasterPipeline_CompareIntsWithNSlots,r)1279 DEF_TEST(SkRasterPipeline_CompareIntsWithNSlots, r) {
1280 // Allocate space for 5 dest and 5 source slots.
1281 alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp];
1282 const int N = SkOpts::raster_pipeline_highp_stride;
1283
1284 struct CompareOp {
1285 SkRasterPipelineOp stage;
1286 std::function<bool(int, int)> verify;
1287 };
1288
1289 static const CompareOp kCompareOps[] = {
1290 {SkRasterPipelineOp::cmpeq_n_ints, [](int a, int b) { return a == b; }},
1291 {SkRasterPipelineOp::cmpne_n_ints, [](int a, int b) { return a != b; }},
1292 {SkRasterPipelineOp::cmplt_n_ints, [](int a, int b) { return a < b; }},
1293 {SkRasterPipelineOp::cmple_n_ints, [](int a, int b) { return a <= b; }},
1294 {SkRasterPipelineOp::cmplt_n_uints, compare_lt_uint},
1295 {SkRasterPipelineOp::cmple_n_uints, compare_lteq_uint},
1296 };
1297
1298 for (const CompareOp& op : kCompareOps) {
1299 for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) {
1300 // Initialize the slot values to -1,0,1,-1,0,1,-1,0,1,-1...
1301 for (int index = 0; index < 10 * N; ++index) {
1302 slots[index] = (index % 3) - 1;
1303 }
1304
1305 int leftValue = slots[0];
1306 int rightValue = slots[numSlotsAffected * N];
1307
1308 // Run the comparison op over our data.
1309 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1310 SkRasterPipeline p(&alloc);
1311 auto* ctx = alloc.make<SkRasterPipeline_BinaryOpCtx>();
1312 ctx->dst = (float*)&slots[0];
1313 ctx->src = (float*)&slots[numSlotsAffected * N];
1314 p.append(op.stage, ctx);
1315 p.run(0, 0, 1, 1);
1316
1317 // Verify that the affected slots now contain "(-1,0,1,-1...) op (0,1,-1,0...)".
1318 int* destPtr = &slots[0];
1319 for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1320 for (int checkLane = 0; checkLane < N; ++checkLane) {
1321 if (checkSlot < numSlotsAffected) {
1322 bool compareIsTrue = op.verify(leftValue, rightValue);
1323 REPORTER_ASSERT(r, *destPtr == (compareIsTrue ? ~0 : 0));
1324 } else {
1325 REPORTER_ASSERT(r, *destPtr == leftValue);
1326 }
1327
1328 ++destPtr;
1329 if (++leftValue == 2) {
1330 leftValue = -1;
1331 }
1332 if (++rightValue == 2) {
1333 rightValue = -1;
1334 }
1335 }
1336 }
1337 }
1338 }
1339 }
1340
DEF_TEST(SkRasterPipeline_CompareIntsWithHardcodedSlots,r)1341 DEF_TEST(SkRasterPipeline_CompareIntsWithHardcodedSlots, r) {
1342 // Allocate space for 5 dest and 5 source slots.
1343 alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp];
1344 const int N = SkOpts::raster_pipeline_highp_stride;
1345
1346 struct CompareOp {
1347 SkRasterPipelineOp stage;
1348 int numSlotsAffected;
1349 std::function<bool(int, int)> verify;
1350 };
1351
1352 static const CompareOp kCompareOps[] = {
1353 {SkRasterPipelineOp::cmpeq_int, 1, [](int a, int b) { return a == b; }},
1354 {SkRasterPipelineOp::cmpne_int, 1, [](int a, int b) { return a != b; }},
1355 {SkRasterPipelineOp::cmplt_int, 1, [](int a, int b) { return a < b; }},
1356 {SkRasterPipelineOp::cmple_int, 1, [](int a, int b) { return a <= b; }},
1357 {SkRasterPipelineOp::cmplt_uint, 1, compare_lt_uint},
1358 {SkRasterPipelineOp::cmple_uint, 1, compare_lteq_uint},
1359
1360 {SkRasterPipelineOp::cmpeq_2_ints, 2, [](int a, int b) { return a == b; }},
1361 {SkRasterPipelineOp::cmpne_2_ints, 2, [](int a, int b) { return a != b; }},
1362 {SkRasterPipelineOp::cmplt_2_ints, 2, [](int a, int b) { return a < b; }},
1363 {SkRasterPipelineOp::cmple_2_ints, 2, [](int a, int b) { return a <= b; }},
1364 {SkRasterPipelineOp::cmplt_2_uints, 2, compare_lt_uint},
1365 {SkRasterPipelineOp::cmple_2_uints, 2, compare_lteq_uint},
1366
1367 {SkRasterPipelineOp::cmpeq_3_ints, 3, [](int a, int b) { return a == b; }},
1368 {SkRasterPipelineOp::cmpne_3_ints, 3, [](int a, int b) { return a != b; }},
1369 {SkRasterPipelineOp::cmplt_3_ints, 3, [](int a, int b) { return a < b; }},
1370 {SkRasterPipelineOp::cmple_3_ints, 3, [](int a, int b) { return a <= b; }},
1371 {SkRasterPipelineOp::cmplt_3_uints, 3, compare_lt_uint},
1372 {SkRasterPipelineOp::cmple_3_uints, 3, compare_lteq_uint},
1373
1374 {SkRasterPipelineOp::cmpeq_4_ints, 4, [](int a, int b) { return a == b; }},
1375 {SkRasterPipelineOp::cmpne_4_ints, 4, [](int a, int b) { return a != b; }},
1376 {SkRasterPipelineOp::cmplt_4_ints, 4, [](int a, int b) { return a < b; }},
1377 {SkRasterPipelineOp::cmple_4_ints, 4, [](int a, int b) { return a <= b; }},
1378 {SkRasterPipelineOp::cmplt_4_uints, 4, compare_lt_uint},
1379 {SkRasterPipelineOp::cmple_4_uints, 4, compare_lteq_uint},
1380 };
1381
1382 for (const CompareOp& op : kCompareOps) {
1383 // Initialize the slot values to -1,0,1,-1,0,1,-1,0,1,-1...
1384 for (int index = 0; index < 10 * N; ++index) {
1385 slots[index] = (index % 3) - 1;
1386 }
1387
1388 int leftValue = slots[0];
1389 int rightValue = slots[op.numSlotsAffected * N];
1390
1391 // Run the comparison op over our data.
1392 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1393 SkRasterPipeline p(&alloc);
1394 p.append(op.stage, &slots[0]);
1395 p.run(0, 0, 1, 1);
1396
1397 // Verify that the affected slots now contain "(0,1,2,0...) op (1,2,0,1...)".
1398 int* destPtr = &slots[0];
1399 for (int checkSlot = 0; checkSlot < 10; ++checkSlot) {
1400 for (int checkLane = 0; checkLane < N; ++checkLane) {
1401 if (checkSlot < op.numSlotsAffected) {
1402 bool compareIsTrue = op.verify(leftValue, rightValue);
1403 REPORTER_ASSERT(r, *destPtr == (compareIsTrue ? ~0 : 0));
1404 } else {
1405 REPORTER_ASSERT(r, *destPtr == leftValue);
1406 }
1407
1408 ++destPtr;
1409 if (++leftValue == 2) {
1410 leftValue = -1;
1411 }
1412 if (++rightValue == 2) {
1413 rightValue = -1;
1414 }
1415 }
1416 }
1417 }
1418 }
1419
to_float(int a)1420 static int to_float(int a) { return sk_bit_cast<int>((float)a); }
1421
DEF_TEST(SkRasterPipeline_UnaryIntOps,r)1422 DEF_TEST(SkRasterPipeline_UnaryIntOps, r) {
1423 // Allocate space for 5 slots.
1424 alignas(64) int slots[5 * SkRasterPipeline_kMaxStride_highp];
1425 const int N = SkOpts::raster_pipeline_highp_stride;
1426
1427 struct UnaryOp {
1428 SkRasterPipelineOp stage;
1429 int numSlotsAffected;
1430 std::function<int(int)> verify;
1431 };
1432
1433 static const UnaryOp kUnaryOps[] = {
1434 {SkRasterPipelineOp::bitwise_not_int, 1, [](int a) { return ~a; }},
1435 {SkRasterPipelineOp::bitwise_not_2_ints, 2, [](int a) { return ~a; }},
1436 {SkRasterPipelineOp::bitwise_not_3_ints, 3, [](int a) { return ~a; }},
1437 {SkRasterPipelineOp::bitwise_not_4_ints, 4, [](int a) { return ~a; }},
1438
1439 {SkRasterPipelineOp::cast_to_float_from_int, 1, to_float},
1440 {SkRasterPipelineOp::cast_to_float_from_2_ints, 2, to_float},
1441 {SkRasterPipelineOp::cast_to_float_from_3_ints, 3, to_float},
1442 {SkRasterPipelineOp::cast_to_float_from_4_ints, 4, to_float},
1443
1444 {SkRasterPipelineOp::abs_int, 1, [](int a) { return a < 0 ? -a : a; }},
1445 {SkRasterPipelineOp::abs_2_ints, 2, [](int a) { return a < 0 ? -a : a; }},
1446 {SkRasterPipelineOp::abs_3_ints, 3, [](int a) { return a < 0 ? -a : a; }},
1447 {SkRasterPipelineOp::abs_4_ints, 4, [](int a) { return a < 0 ? -a : a; }},
1448 };
1449
1450 for (const UnaryOp& op : kUnaryOps) {
1451 // Initialize the slot values to -10,-9,-8...
1452 std::iota(&slots[0], &slots[5 * N], -10);
1453 int inputValue = slots[0];
1454
1455 // Run the unary op over our data.
1456 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1457 SkRasterPipeline p(&alloc);
1458 p.append(op.stage, &slots[0]);
1459 p.run(0, 0, 1, 1);
1460
1461 // Verify that the destination slots have been updated.
1462 int* destPtr = &slots[0];
1463 for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
1464 for (int checkLane = 0; checkLane < N; ++checkLane) {
1465 if (checkSlot < op.numSlotsAffected) {
1466 int expected = op.verify(inputValue);
1467 REPORTER_ASSERT(r, *destPtr == expected);
1468 } else {
1469 REPORTER_ASSERT(r, *destPtr == inputValue);
1470 }
1471
1472 ++destPtr;
1473 ++inputValue;
1474 }
1475 }
1476 }
1477 }
1478
to_int(float a)1479 static float to_int(float a) { return sk_bit_cast<float>((int)a); }
to_uint(float a)1480 static float to_uint(float a) { return sk_bit_cast<float>((unsigned int)a); }
1481
DEF_TEST(SkRasterPipeline_UnaryFloatOps,r)1482 DEF_TEST(SkRasterPipeline_UnaryFloatOps, r) {
1483 // Allocate space for 5 slots.
1484 alignas(64) float slots[5 * SkRasterPipeline_kMaxStride_highp];
1485 const int N = SkOpts::raster_pipeline_highp_stride;
1486
1487 struct UnaryOp {
1488 SkRasterPipelineOp stage;
1489 int numSlotsAffected;
1490 std::function<float(float)> verify;
1491 };
1492
1493 static const UnaryOp kUnaryOps[] = {
1494 {SkRasterPipelineOp::cast_to_int_from_float, 1, to_int},
1495 {SkRasterPipelineOp::cast_to_int_from_2_floats, 2, to_int},
1496 {SkRasterPipelineOp::cast_to_int_from_3_floats, 3, to_int},
1497 {SkRasterPipelineOp::cast_to_int_from_4_floats, 4, to_int},
1498
1499 {SkRasterPipelineOp::cast_to_uint_from_float, 1, to_uint},
1500 {SkRasterPipelineOp::cast_to_uint_from_2_floats, 2, to_uint},
1501 {SkRasterPipelineOp::cast_to_uint_from_3_floats, 3, to_uint},
1502 {SkRasterPipelineOp::cast_to_uint_from_4_floats, 4, to_uint},
1503
1504 {SkRasterPipelineOp::abs_float, 1, [](float a) { return a < 0 ? -a : a; }},
1505 {SkRasterPipelineOp::abs_2_floats, 2, [](float a) { return a < 0 ? -a : a; }},
1506 {SkRasterPipelineOp::abs_3_floats, 3, [](float a) { return a < 0 ? -a : a; }},
1507 {SkRasterPipelineOp::abs_4_floats, 4, [](float a) { return a < 0 ? -a : a; }},
1508
1509 {SkRasterPipelineOp::floor_float, 1, [](float a) { return floorf(a); }},
1510 {SkRasterPipelineOp::floor_2_floats, 2, [](float a) { return floorf(a); }},
1511 {SkRasterPipelineOp::floor_3_floats, 3, [](float a) { return floorf(a); }},
1512 {SkRasterPipelineOp::floor_4_floats, 4, [](float a) { return floorf(a); }},
1513
1514 {SkRasterPipelineOp::ceil_float, 1, [](float a) { return ceilf(a); }},
1515 {SkRasterPipelineOp::ceil_2_floats, 2, [](float a) { return ceilf(a); }},
1516 {SkRasterPipelineOp::ceil_3_floats, 3, [](float a) { return ceilf(a); }},
1517 {SkRasterPipelineOp::ceil_4_floats, 4, [](float a) { return ceilf(a); }},
1518 };
1519
1520 for (const UnaryOp& op : kUnaryOps) {
1521 // The result of some ops are undefined with negative inputs, so only test positive values.
1522 bool positiveOnly = (op.stage == SkRasterPipelineOp::cast_to_uint_from_float ||
1523 op.stage == SkRasterPipelineOp::cast_to_uint_from_2_floats ||
1524 op.stage == SkRasterPipelineOp::cast_to_uint_from_3_floats ||
1525 op.stage == SkRasterPipelineOp::cast_to_uint_from_4_floats);
1526
1527 float iotaStart = positiveOnly ? 1.0f : -9.75f;
1528 std::iota(&slots[0], &slots[5 * N], iotaStart);
1529 float inputValue = slots[0];
1530
1531 // Run the unary op over our data.
1532 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1533 SkRasterPipeline p(&alloc);
1534 p.append(op.stage, &slots[0]);
1535 p.run(0, 0, 1, 1);
1536
1537 // Verify that the destination slots have been updated.
1538 float* destPtr = &slots[0];
1539 for (int checkSlot = 0; checkSlot < 5; ++checkSlot) {
1540 for (int checkLane = 0; checkLane < N; ++checkLane) {
1541 if (checkSlot < op.numSlotsAffected) {
1542 float expected = op.verify(inputValue);
1543 // The casting tests can generate NaN, depending on the input value, so a value
1544 // match (via ==) might not succeed.
1545 // The ceil tests can generate negative zeros _sometimes_, depending on the
1546 // exact implementation of ceil(), so a bitwise match might not succeed.
1547 // Because of this, we allow either a value match or a bitwise match.
1548 bool bitwiseMatch = (0 == memcmp(destPtr, &expected, sizeof(float)));
1549 bool valueMatch = (*destPtr == expected);
1550 REPORTER_ASSERT(r, valueMatch || bitwiseMatch);
1551 } else {
1552 REPORTER_ASSERT(r, *destPtr == inputValue);
1553 }
1554
1555 ++destPtr;
1556 ++inputValue;
1557 }
1558 }
1559 }
1560 }
1561
to_mix_weight(float value)1562 static float to_mix_weight(float value) {
1563 // Convert a positive value to a mix-weight (a number between 0 and 1).
1564 value /= 16.0f;
1565 return value - std::floor(value);
1566 }
1567
DEF_TEST(SkRasterPipeline_MixTest,r)1568 DEF_TEST(SkRasterPipeline_MixTest, r) {
1569 // Allocate space for 5 dest and 10 source slots.
1570 alignas(64) float slots[15 * SkRasterPipeline_kMaxStride_highp];
1571 const int N = SkOpts::raster_pipeline_highp_stride;
1572
1573 struct MixOp {
1574 int numSlotsAffected;
1575 std::function<void(SkRasterPipeline*, SkArenaAlloc*)> append;
1576 };
1577
1578 static const MixOp kMixOps[] = {
1579 {1, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
1580 p->append(SkRasterPipelineOp::mix_float, slots);
1581 }},
1582 {2, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
1583 p->append(SkRasterPipelineOp::mix_2_floats, slots);
1584 }},
1585 {3, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
1586 p->append(SkRasterPipelineOp::mix_3_floats, slots);
1587 }},
1588 {4, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
1589 p->append(SkRasterPipelineOp::mix_4_floats, slots);
1590 }},
1591 {5, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) {
1592 auto* ctx = alloc->make<SkRasterPipeline_TernaryOpCtx>();
1593 ctx->dst = &slots[0];
1594 ctx->src0 = &slots[5 * N];
1595 ctx->src1 = &slots[10 * N];
1596 p->append(SkRasterPipelineOp::mix_n_floats, ctx);
1597 }},
1598 };
1599
1600 for (const MixOp& op : kMixOps) {
1601 // Initialize the values to 1,2,3...
1602 std::iota(&slots[0], &slots[15 * N], 1.0f);
1603
1604 float weightValue = slots[0];
1605 float fromValue = slots[1 * op.numSlotsAffected * N];
1606 float toValue = slots[2 * op.numSlotsAffected * N];
1607
1608 // The first group of values (the weights) must be between zero and one.
1609 for (int idx = 0; idx < 1 * op.numSlotsAffected * N; ++idx) {
1610 slots[idx] = to_mix_weight(slots[idx]);
1611 }
1612
1613 // Run the mix op over our data.
1614 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1615 SkRasterPipeline p(&alloc);
1616 op.append(&p, &alloc);
1617 p.run(0,0,1,1);
1618
1619 // Verify that the affected slots now equal mix({0.25, 0.3125...}, {3,4...}, {5,6...}, ).
1620 float* destPtr = &slots[0];
1621 for (int checkSlot = 0; checkSlot < op.numSlotsAffected; ++checkSlot) {
1622 for (int checkLane = 0; checkLane < N; ++checkLane) {
1623 float checkValue = (toValue - fromValue) * to_mix_weight(weightValue) + fromValue;
1624 REPORTER_ASSERT(r, *destPtr == checkValue);
1625
1626 ++destPtr;
1627 fromValue += 1.0f;
1628 toValue += 1.0f;
1629 weightValue += 1.0f;
1630 }
1631 }
1632 }
1633 }
1634
DEF_TEST(SkRasterPipeline_Jump,r)1635 DEF_TEST(SkRasterPipeline_Jump, r) {
1636 // Allocate space for 4 slots.
1637 alignas(64) float slots[4 * SkRasterPipeline_kMaxStride_highp] = {};
1638 const int N = SkOpts::raster_pipeline_highp_stride;
1639
1640 alignas(64) static constexpr float kColorDarkRed[4] = {0.5f, 0.0f, 0.0f, 0.75f};
1641 alignas(64) static constexpr float kColorGreen[4] = {0.0f, 1.0f, 0.0f, 1.0f};
1642 const int offset = 2;
1643
1644 // Make a program which jumps over an append_constant_color op.
1645 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1646 SkRasterPipeline p(&alloc);
1647 p.append_constant_color(&alloc, kColorGreen); // assign green
1648 p.append(SkRasterPipelineOp::jump, &offset); // jump over the dark-red color assignment
1649 p.append_constant_color(&alloc, kColorDarkRed); // (not executed)
1650 p.append(SkRasterPipelineOp::store_src, slots); // store the result so we can check it
1651 p.run(0,0,1,1);
1652
1653 // Verify that the slots contain green.
1654 float* destPtr = &slots[0];
1655 for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
1656 for (int checkLane = 0; checkLane < N; ++checkLane) {
1657 REPORTER_ASSERT(r, *destPtr == kColorGreen[checkSlot]);
1658 ++destPtr;
1659 }
1660 }
1661 }
1662
DEF_TEST(SkRasterPipeline_BranchIfAnyActiveLanes,r)1663 DEF_TEST(SkRasterPipeline_BranchIfAnyActiveLanes, r) {
1664 // Allocate space for 4 slots.
1665 alignas(64) float slots[4 * SkRasterPipeline_kMaxStride_highp] = {};
1666 const int N = SkOpts::raster_pipeline_highp_stride;
1667
1668 alignas(64) static constexpr float kColorDarkRed[4] = {0.5f, 0.0f, 0.0f, 0.75f};
1669 alignas(64) static constexpr float kColorGreen[4] = {0.0f, 1.0f, 0.0f, 1.0f};
1670 SkRasterPipeline_BranchCtx ctx;
1671 ctx.offset = 2;
1672
1673 // An array of all zeros.
1674 alignas(64) static constexpr int32_t kNoLanesActive[4 * SkRasterPipeline_kMaxStride_highp] = {};
1675
1676 // An array of all zeros, except for a single ~0 in the first dA slot.
1677 alignas(64) int32_t oneLaneActive[4 * SkRasterPipeline_kMaxStride_highp] = {};
1678 oneLaneActive[3*N] = ~0;
1679
1680 // Make a program which conditionally branches past two append_constant_color ops.
1681 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1682 SkRasterPipeline p(&alloc);
1683 p.append_constant_color(&alloc, kColorDarkRed); // set the color to dark red
1684 p.append(SkRasterPipelineOp::load_dst, kNoLanesActive); // make no lanes active
1685 p.append(SkRasterPipelineOp::branch_if_any_active_lanes, &ctx); // do not skip past next line
1686 p.append_constant_color(&alloc, kColorGreen); // set the color to green
1687 p.append(SkRasterPipelineOp::load_dst, oneLaneActive); // set one lane active
1688 p.append(SkRasterPipelineOp::branch_if_any_active_lanes, &ctx); // skip past next line
1689 p.append_constant_color(&alloc, kColorDarkRed); // (not executed)
1690 p.append(SkRasterPipelineOp::init_lane_masks); // set all lanes active
1691 p.append(SkRasterPipelineOp::branch_if_any_active_lanes, &ctx); // skip past next line
1692 p.append_constant_color(&alloc, kColorDarkRed); // (not executed)
1693 p.append(SkRasterPipelineOp::store_src, slots); // store final color
1694 p.run(0,0,1,1);
1695
1696 // Verify that the slots contain green.
1697 float* destPtr = &slots[0];
1698 for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
1699 for (int checkLane = 0; checkLane < N; ++checkLane) {
1700 REPORTER_ASSERT(r, *destPtr == kColorGreen[checkSlot]);
1701 ++destPtr;
1702 }
1703 }
1704 }
1705
DEF_TEST(SkRasterPipeline_BranchIfNoActiveLanes,r)1706 DEF_TEST(SkRasterPipeline_BranchIfNoActiveLanes, r) {
1707 // Allocate space for 4 slots.
1708 alignas(64) float slots[4 * SkRasterPipeline_kMaxStride_highp] = {};
1709 const int N = SkOpts::raster_pipeline_highp_stride;
1710
1711 alignas(64) static constexpr float kColorBlack[4] = {0.0f, 0.0f, 0.0f, 0.0f};
1712 alignas(64) static constexpr float kColorRed[4] = {1.0f, 0.0f, 0.0f, 1.0f};
1713 alignas(64) static constexpr float kColorBlue[4] = {0.0f, 0.0f, 1.0f, 1.0f};
1714 SkRasterPipeline_BranchCtx ctx;
1715 ctx.offset = 2;
1716
1717 // An array of all zeros.
1718 alignas(64) static constexpr int32_t kNoLanesActive[4 * SkRasterPipeline_kMaxStride_highp] = {};
1719
1720 // An array of all zeros, except for a single ~0 in the first dA slot.
1721 alignas(64) int32_t oneLaneActive[4 * SkRasterPipeline_kMaxStride_highp] = {};
1722 oneLaneActive[3*N] = ~0;
1723
1724 // Make a program which conditionally branches past a append_constant_color op.
1725 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1726 SkRasterPipeline p(&alloc);
1727 p.append_constant_color(&alloc, kColorBlack); // set the color to black
1728 p.append(SkRasterPipelineOp::init_lane_masks); // set all lanes active
1729 p.append(SkRasterPipelineOp::branch_if_no_active_lanes, &ctx); // do not skip past next line
1730 p.append_constant_color(&alloc, kColorRed); // sets the color to red
1731 p.append(SkRasterPipelineOp::load_dst, oneLaneActive); // set one lane active
1732 p.append(SkRasterPipelineOp::branch_if_no_active_lanes, &ctx); // do not skip past next line
1733 p.append(SkRasterPipelineOp::swap_rb); // swap R and B (making blue)
1734 p.append(SkRasterPipelineOp::load_dst, kNoLanesActive); // make no lanes active
1735 p.append(SkRasterPipelineOp::branch_if_no_active_lanes, &ctx); // skip past next line
1736 p.append_constant_color(&alloc, kColorBlack); // (not executed)
1737 p.append(SkRasterPipelineOp::store_src, slots); // store final blue color
1738 p.run(0,0,1,1);
1739
1740 // Verify that the slots contain blue.
1741 float* destPtr = &slots[0];
1742 for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
1743 for (int checkLane = 0; checkLane < N; ++checkLane) {
1744 REPORTER_ASSERT(r, *destPtr == kColorBlue[checkSlot]);
1745 ++destPtr;
1746 }
1747 }
1748 }
1749
DEF_TEST(SkRasterPipeline_BranchIfActiveLanesEqual,r)1750 DEF_TEST(SkRasterPipeline_BranchIfActiveLanesEqual, r) {
1751 // Allocate space for 4 slots.
1752 alignas(64) float slots[4 * SkRasterPipeline_kMaxStride_highp] = {};
1753 const int N = SkOpts::raster_pipeline_highp_stride;
1754
1755 alignas(64) static constexpr float kColorBlack[4] = {0.0f, 0.0f, 0.0f, 0.0f};
1756 alignas(64) static constexpr float kColorRed[4] = {1.0f, 0.0f, 0.0f, 1.0f};
1757
1758 // An array of all 6s.
1759 alignas(64) int allSixes[SkRasterPipeline_kMaxStride_highp] = {};
1760 std::fill(std::begin(allSixes), std::end(allSixes), 6);
1761
1762 // An array of all 6s, except for a single 5 in one lane.
1763 alignas(64) int mostlySixesWithOneFive[SkRasterPipeline_kMaxStride_highp] = {};
1764 std::fill(std::begin(mostlySixesWithOneFive), std::end(mostlySixesWithOneFive), 6);
1765 mostlySixesWithOneFive[N - 1] = 5;
1766
1767 // A condition mask with all lanes on except for the six-lane.
1768 alignas(64) int mask[SkRasterPipeline_kMaxStride_highp] = {};
1769 std::fill(std::begin(mask), std::end(mask), ~0);
1770 mask[N - 1] = 0;
1771
1772 SkRasterPipeline_BranchIfEqualCtx matching; // comparing all-six vs five will match
1773 matching.offset = 2;
1774 matching.value = 5;
1775 matching.ptr = allSixes;
1776
1777 SkRasterPipeline_BranchIfEqualCtx nonmatching; // comparing mostly-six vs five won't match
1778 nonmatching.offset = 2;
1779 nonmatching.value = 5;
1780 nonmatching.ptr = mostlySixesWithOneFive;
1781
1782 // Make a program which conditionally branches past a swap_rb op.
1783 SkArenaAlloc alloc(/*firstHeapAllocation=*/256);
1784 SkRasterPipeline p(&alloc);
1785 p.append_constant_color(&alloc, kColorBlack); // set the color to black
1786 p.append(SkRasterPipelineOp::init_lane_masks); // set all lanes active
1787 p.append(SkRasterPipelineOp::branch_if_no_active_lanes_eq, &nonmatching);// don't skip next line
1788 p.append_constant_color(&alloc, kColorRed); // set the color to red
1789 p.append(SkRasterPipelineOp::branch_if_no_active_lanes_eq, &matching); // do skip next line
1790 p.append(SkRasterPipelineOp::swap_rb); // swap R and B (= blue)
1791 p.append(SkRasterPipelineOp::load_condition_mask, mask); // mask off the six
1792 p.append(SkRasterPipelineOp::branch_if_no_active_lanes_eq, &nonmatching);// do skip next line
1793 p.append(SkRasterPipelineOp::white_color); // set the color to white
1794 p.append(SkRasterPipelineOp::store_src, slots); // store final red color
1795 p.run(0,0,SkOpts::raster_pipeline_highp_stride,1);
1796
1797 // Verify that the slots contain red.
1798 float* destPtr = &slots[0];
1799 for (int checkSlot = 0; checkSlot < 4; ++checkSlot) {
1800 for (int checkLane = 0; checkLane < N; ++checkLane) {
1801 REPORTER_ASSERT(r, *destPtr == kColorRed[checkSlot]);
1802 ++destPtr;
1803 }
1804 }
1805 }
1806
DEF_TEST(SkRasterPipeline_empty,r)1807 DEF_TEST(SkRasterPipeline_empty, r) {
1808 // No asserts... just a test that this is safe to run.
1809 SkRasterPipeline_<256> p;
1810 p.run(0,0,20,1);
1811 }
1812
DEF_TEST(SkRasterPipeline_nonsense,r)1813 DEF_TEST(SkRasterPipeline_nonsense, r) {
1814 // No asserts... just a test that this is safe to run and terminates.
1815 // srcover() calls st->next(); this makes sure we've always got something there to call.
1816 SkRasterPipeline_<256> p;
1817 p.append(SkRasterPipelineOp::srcover);
1818 p.run(0,0,20,1);
1819 }
1820
DEF_TEST(SkRasterPipeline_JIT,r)1821 DEF_TEST(SkRasterPipeline_JIT, r) {
1822 // This tests a couple odd corners that a JIT backend can stumble over.
1823
1824 uint32_t buf[72] = {
1825 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1826 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1827 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
1828 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1829 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1830 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1831 };
1832
1833 SkRasterPipeline_MemoryCtx src = { buf + 0, 0 },
1834 dst = { buf + 36, 0 };
1835
1836 // Copy buf[x] to buf[x+36] for x in [15,35).
1837 SkRasterPipeline_<256> p;
1838 p.append(SkRasterPipelineOp::load_8888, &src);
1839 p.append(SkRasterPipelineOp::store_8888, &dst);
1840 p.run(15,0, 20,1);
1841
1842 for (int i = 0; i < 36; i++) {
1843 if (i < 15 || i == 35) {
1844 REPORTER_ASSERT(r, buf[i+36] == 0);
1845 } else {
1846 REPORTER_ASSERT(r, buf[i+36] == (uint32_t)(i - 11));
1847 }
1848 }
1849 }
1850
h(float f)1851 static uint16_t h(float f) {
1852 // Remember, a float is 1-8-23 (sign-exponent-mantissa) with 127 exponent bias.
1853 uint32_t sem;
1854 memcpy(&sem, &f, sizeof(sem));
1855 uint32_t s = sem & 0x80000000,
1856 em = sem ^ s;
1857
1858 // Convert to 1-5-10 half with 15 bias, flushing denorm halfs (including zero) to zero.
1859 auto denorm = (int32_t)em < 0x38800000; // I32 comparison is often quicker, and always safe
1860 // here.
1861 return denorm ? SkTo<uint16_t>(0)
1862 : SkTo<uint16_t>((s>>16) + (em>>13) - ((127-15)<<10));
1863 }
1864
DEF_TEST(SkRasterPipeline_tail,r)1865 DEF_TEST(SkRasterPipeline_tail, r) {
1866 {
1867 float data[][4] = {
1868 {00, 01, 02, 03},
1869 {10, 11, 12, 13},
1870 {20, 21, 22, 23},
1871 {30, 31, 32, 33},
1872 };
1873
1874 float buffer[4][4];
1875
1876 SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
1877 dst = { &buffer[0][0], 0 };
1878
1879 for (unsigned i = 1; i <= 4; i++) {
1880 memset(buffer, 0xff, sizeof(buffer));
1881 SkRasterPipeline_<256> p;
1882 p.append(SkRasterPipelineOp::load_f32, &src);
1883 p.append(SkRasterPipelineOp::store_f32, &dst);
1884 p.run(0,0, i,1);
1885 for (unsigned j = 0; j < i; j++) {
1886 for (unsigned k = 0; k < 4; k++) {
1887 if (buffer[j][k] != data[j][k]) {
1888 ERRORF(r, "(%u, %u) - a: %g r: %g\n", j, k, data[j][k], buffer[j][k]);
1889 }
1890 }
1891 }
1892 for (int j = i; j < 4; j++) {
1893 for (auto f : buffer[j]) {
1894 REPORTER_ASSERT(r, SkScalarIsNaN(f));
1895 }
1896 }
1897 }
1898 }
1899
1900 {
1901 float data[][2] = {
1902 {00, 01},
1903 {10, 11},
1904 {20, 21},
1905 {30, 31},
1906 };
1907
1908 float buffer[4][4];
1909
1910 SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
1911 dst = { &buffer[0][0], 0 };
1912
1913 for (unsigned i = 1; i <= 4; i++) {
1914 memset(buffer, 0xff, sizeof(buffer));
1915 SkRasterPipeline_<256> p;
1916 p.append(SkRasterPipelineOp::load_rgf32, &src);
1917 p.append(SkRasterPipelineOp::store_f32, &dst);
1918 p.run(0,0, i,1);
1919 for (unsigned j = 0; j < i; j++) {
1920 for (unsigned k = 0; k < 2; k++) {
1921 if (buffer[j][k] != data[j][k]) {
1922 ERRORF(r, "(%u, %u) - a: %g r: %g\n", j, k, data[j][k], buffer[j][k]);
1923 }
1924 }
1925 if (buffer[j][2] != 0) {
1926 ERRORF(r, "(%u, 2) - a: 0 r: %g\n", j, buffer[j][2]);
1927 }
1928 if (buffer[j][3] != 1) {
1929 ERRORF(r, "(%u, 3) - a: 1 r: %g\n", j, buffer[j][3]);
1930 }
1931 }
1932 for (int j = i; j < 4; j++) {
1933 for (auto f : buffer[j]) {
1934 REPORTER_ASSERT(r, SkScalarIsNaN(f));
1935 }
1936 }
1937 }
1938 }
1939
1940 {
1941 float data[][4] = {
1942 {00, 01, 02, 03},
1943 {10, 11, 12, 13},
1944 {20, 21, 22, 23},
1945 {30, 31, 32, 33},
1946 };
1947
1948 float buffer[4][2];
1949
1950 SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
1951 dst = { &buffer[0][0], 0 };
1952
1953 for (unsigned i = 1; i <= 4; i++) {
1954 memset(buffer, 0xff, sizeof(buffer));
1955 SkRasterPipeline_<256> p;
1956 p.append(SkRasterPipelineOp::load_f32, &src);
1957 p.append(SkRasterPipelineOp::store_rgf32, &dst);
1958 p.run(0,0, i,1);
1959 for (unsigned j = 0; j < i; j++) {
1960 for (unsigned k = 0; k < 2; k++) {
1961 if (buffer[j][k] != data[j][k]) {
1962 ERRORF(r, "(%u, %u) - a: %g r: %g\n", j, k, data[j][k], buffer[j][k]);
1963 }
1964 }
1965 }
1966 for (int j = i; j < 4; j++) {
1967 for (auto f : buffer[j]) {
1968 REPORTER_ASSERT(r, SkScalarIsNaN(f));
1969 }
1970 }
1971 }
1972 }
1973
1974 {
1975 alignas(8) uint16_t data[][4] = {
1976 {h(00), h(01), h(02), h(03)},
1977 {h(10), h(11), h(12), h(13)},
1978 {h(20), h(21), h(22), h(23)},
1979 {h(30), h(31), h(32), h(33)},
1980 };
1981 alignas(8) uint16_t buffer[4][4];
1982 SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
1983 dst = { &buffer[0][0], 0 };
1984
1985 for (unsigned i = 1; i <= 4; i++) {
1986 memset(buffer, 0xff, sizeof(buffer));
1987 SkRasterPipeline_<256> p;
1988 p.append(SkRasterPipelineOp::load_f16, &src);
1989 p.append(SkRasterPipelineOp::store_f16, &dst);
1990 p.run(0,0, i,1);
1991 for (unsigned j = 0; j < i; j++) {
1992 for (int k = 0; k < 4; k++) {
1993 REPORTER_ASSERT(r, buffer[j][k] == data[j][k]);
1994 }
1995 }
1996 for (int j = i; j < 4; j++) {
1997 for (auto f : buffer[j]) {
1998 REPORTER_ASSERT(r, f == 0xffff);
1999 }
2000 }
2001 }
2002 }
2003
2004 {
2005 alignas(8) uint16_t data[]= {
2006 h(00),
2007 h(10),
2008 h(20),
2009 h(30),
2010 };
2011 alignas(8) uint16_t buffer[4][4];
2012 SkRasterPipeline_MemoryCtx src = { &data[0], 0 },
2013 dst = { &buffer[0][0], 0 };
2014
2015 for (unsigned i = 1; i <= 4; i++) {
2016 memset(buffer, 0xff, sizeof(buffer));
2017 SkRasterPipeline_<256> p;
2018 p.append(SkRasterPipelineOp::load_af16, &src);
2019 p.append(SkRasterPipelineOp::store_f16, &dst);
2020 p.run(0,0, i,1);
2021 for (unsigned j = 0; j < i; j++) {
2022 uint16_t expected[] = {0, 0, 0, data[j]};
2023 REPORTER_ASSERT(r, !memcmp(expected, &buffer[j][0], sizeof(buffer[j])));
2024 }
2025 for (int j = i; j < 4; j++) {
2026 for (auto f : buffer[j]) {
2027 REPORTER_ASSERT(r, f == 0xffff);
2028 }
2029 }
2030 }
2031 }
2032
2033 {
2034 alignas(8) uint16_t data[][4] = {
2035 {h(00), h(01), h(02), h(03)},
2036 {h(10), h(11), h(12), h(13)},
2037 {h(20), h(21), h(22), h(23)},
2038 {h(30), h(31), h(32), h(33)},
2039 };
2040 alignas(8) uint16_t buffer[4];
2041 SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
2042 dst = { &buffer[0], 0 };
2043
2044 for (unsigned i = 1; i <= 4; i++) {
2045 memset(buffer, 0xff, sizeof(buffer));
2046 SkRasterPipeline_<256> p;
2047 p.append(SkRasterPipelineOp::load_f16, &src);
2048 p.append(SkRasterPipelineOp::store_af16, &dst);
2049 p.run(0,0, i,1);
2050 for (unsigned j = 0; j < i; j++) {
2051 REPORTER_ASSERT(r, !memcmp(&data[j][3], &buffer[j], sizeof(buffer[j])));
2052 }
2053 for (int j = i; j < 4; j++) {
2054 REPORTER_ASSERT(r, buffer[j] == 0xffff);
2055 }
2056 }
2057 }
2058
2059 {
2060 alignas(8) uint16_t data[][4] = {
2061 {h(00), h(01), h(02), h(03)},
2062 {h(10), h(11), h(12), h(13)},
2063 {h(20), h(21), h(22), h(23)},
2064 {h(30), h(31), h(32), h(33)},
2065 };
2066 alignas(8) uint16_t buffer[4][2];
2067 SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
2068 dst = { &buffer[0][0], 0 };
2069
2070 for (unsigned i = 1; i <= 4; i++) {
2071 memset(buffer, 0xff, sizeof(buffer));
2072 SkRasterPipeline_<256> p;
2073 p.append(SkRasterPipelineOp::load_f16, &src);
2074 p.append(SkRasterPipelineOp::store_rgf16, &dst);
2075 p.run(0,0, i,1);
2076 for (unsigned j = 0; j < i; j++) {
2077 REPORTER_ASSERT(r, !memcmp(&buffer[j], &data[j], 2 * sizeof(uint16_t)));
2078 }
2079 for (int j = i; j < 4; j++) {
2080 for (auto h : buffer[j]) {
2081 REPORTER_ASSERT(r, h == 0xffff);
2082 }
2083 }
2084 }
2085 }
2086
2087 {
2088 alignas(8) uint16_t data[][2] = {
2089 {h(00), h(01)},
2090 {h(10), h(11)},
2091 {h(20), h(21)},
2092 {h(30), h(31)},
2093 };
2094 alignas(8) uint16_t buffer[4][4];
2095 SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
2096 dst = { &buffer[0][0], 0 };
2097
2098 for (unsigned i = 1; i <= 4; i++) {
2099 memset(buffer, 0xff, sizeof(buffer));
2100 SkRasterPipeline_<256> p;
2101 p.append(SkRasterPipelineOp::load_rgf16, &src);
2102 p.append(SkRasterPipelineOp::store_f16, &dst);
2103 p.run(0,0, i,1);
2104 for (unsigned j = 0; j < i; j++) {
2105 uint16_t expected[] = {data[j][0], data[j][1], h(0), h(1)};
2106 REPORTER_ASSERT(r, !memcmp(&buffer[j], expected, sizeof(expected)));
2107 }
2108 for (int j = i; j < 4; j++) {
2109 for (auto h : buffer[j]) {
2110 REPORTER_ASSERT(r, h == 0xffff);
2111 }
2112 }
2113 }
2114 }
2115 }
2116
DEF_TEST(SkRasterPipeline_u16,r)2117 DEF_TEST(SkRasterPipeline_u16, r) {
2118 {
2119 alignas(8) uint16_t data[][2] = {
2120 {0x0000, 0x0111},
2121 {0x1010, 0x1111},
2122 {0x2020, 0x2121},
2123 {0x3030, 0x3131},
2124 };
2125 uint8_t buffer[4][4];
2126 SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
2127 dst = { &buffer[0][0], 0 };
2128
2129 for (unsigned i = 1; i <= 4; i++) {
2130 memset(buffer, 0xab, sizeof(buffer));
2131 SkRasterPipeline_<256> p;
2132 p.append(SkRasterPipelineOp::load_rg1616, &src);
2133 p.append(SkRasterPipelineOp::store_8888, &dst);
2134 p.run(0,0, i,1);
2135 for (unsigned j = 0; j < i; j++) {
2136 uint8_t expected[] = {
2137 SkToU8(data[j][0] >> 8),
2138 SkToU8(data[j][1] >> 8),
2139 000,
2140 0xff
2141 };
2142 REPORTER_ASSERT(r, !memcmp(&buffer[j], expected, sizeof(expected)));
2143 }
2144 for (int j = i; j < 4; j++) {
2145 for (auto b : buffer[j]) {
2146 REPORTER_ASSERT(r, b == 0xab);
2147 }
2148 }
2149 }
2150 }
2151
2152 {
2153 alignas(8) uint16_t data[] = {
2154 0x0000,
2155 0x1010,
2156 0x2020,
2157 0x3030,
2158 };
2159 uint8_t buffer[4][4];
2160 SkRasterPipeline_MemoryCtx src = { &data[0], 0 },
2161 dst = { &buffer[0][0], 0 };
2162
2163 for (unsigned i = 1; i <= 4; i++) {
2164 memset(buffer, 0xff, sizeof(buffer));
2165 SkRasterPipeline_<256> p;
2166 p.append(SkRasterPipelineOp::load_a16, &src);
2167 p.append(SkRasterPipelineOp::store_8888, &dst);
2168 p.run(0,0, i,1);
2169 for (unsigned j = 0; j < i; j++) {
2170 uint8_t expected[] = {0x00, 0x00, 0x00, SkToU8(data[j] >> 8)};
2171 REPORTER_ASSERT(r, !memcmp(&buffer[j], expected, sizeof(expected)));
2172 }
2173 for (int j = i; j < 4; j++) {
2174 for (auto b : buffer[j]) {
2175 REPORTER_ASSERT(r, b == 0xff);
2176 }
2177 }
2178 }
2179 }
2180
2181 {
2182 uint8_t data[][4] = {
2183 {0x00, 0x01, 0x02, 0x03},
2184 {0x10, 0x11, 0x12, 0x13},
2185 {0x20, 0x21, 0x22, 0x23},
2186 {0x30, 0x31, 0x32, 0x33},
2187 };
2188 alignas(8) uint16_t buffer[4];
2189 SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
2190 dst = { &buffer[0], 0 };
2191
2192 for (unsigned i = 1; i <= 4; i++) {
2193 memset(buffer, 0xff, sizeof(buffer));
2194 SkRasterPipeline_<256> p;
2195 p.append(SkRasterPipelineOp::load_8888, &src);
2196 p.append(SkRasterPipelineOp::store_a16, &dst);
2197 p.run(0,0, i,1);
2198 for (unsigned j = 0; j < i; j++) {
2199 uint16_t expected = (data[j][3] << 8) | data[j][3];
2200 REPORTER_ASSERT(r, buffer[j] == expected);
2201 }
2202 for (int j = i; j < 4; j++) {
2203 REPORTER_ASSERT(r, buffer[j] == 0xffff);
2204 }
2205 }
2206 }
2207
2208 {
2209 alignas(8) uint16_t data[][4] = {
2210 {0x0000, 0x1000, 0x2000, 0x3000},
2211 {0x0001, 0x1001, 0x2001, 0x3001},
2212 {0x0002, 0x1002, 0x2002, 0x3002},
2213 {0x0003, 0x1003, 0x2003, 0x3003},
2214 };
2215 alignas(8) uint16_t buffer[4][4];
2216 SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 },
2217 dst = { &buffer[0], 0 };
2218
2219 for (unsigned i = 1; i <= 4; i++) {
2220 memset(buffer, 0xff, sizeof(buffer));
2221 SkRasterPipeline_<256> p;
2222 p.append(SkRasterPipelineOp::load_16161616, &src);
2223 p.append(SkRasterPipelineOp::swap_rb);
2224 p.append(SkRasterPipelineOp::store_16161616, &dst);
2225 p.run(0,0, i,1);
2226 for (unsigned j = 0; j < i; j++) {
2227 uint16_t expected[4] = {data[j][2], data[j][1], data[j][0], data[j][3]};
2228 REPORTER_ASSERT(r, !memcmp(&expected[0], &buffer[j], sizeof(expected)));
2229 }
2230 for (int j = i; j < 4; j++) {
2231 for (uint16_t u16 : buffer[j])
2232 REPORTER_ASSERT(r, u16 == 0xffff);
2233 }
2234 }
2235 }
2236 }
2237
DEF_TEST(SkRasterPipeline_lowp,r)2238 DEF_TEST(SkRasterPipeline_lowp, r) {
2239 uint32_t rgba[64];
2240 for (int i = 0; i < 64; i++) {
2241 rgba[i] = (4*i+0) << 0
2242 | (4*i+1) << 8
2243 | (4*i+2) << 16
2244 | (4*i+3) << 24;
2245 }
2246
2247 SkRasterPipeline_MemoryCtx ptr = { rgba, 0 };
2248
2249 SkRasterPipeline_<256> p;
2250 p.append(SkRasterPipelineOp::load_8888, &ptr);
2251 p.append(SkRasterPipelineOp::swap_rb);
2252 p.append(SkRasterPipelineOp::store_8888, &ptr);
2253 p.run(0,0,64,1);
2254
2255 for (int i = 0; i < 64; i++) {
2256 uint32_t want = (4*i+0) << 16
2257 | (4*i+1) << 8
2258 | (4*i+2) << 0
2259 | (4*i+3) << 24;
2260 if (rgba[i] != want) {
2261 ERRORF(r, "got %08x, want %08x\n", rgba[i], want);
2262 }
2263 }
2264 }
2265
DEF_TEST(SkRasterPipeline_swizzle,r)2266 DEF_TEST(SkRasterPipeline_swizzle, r) {
2267 // This takes the lowp code path
2268 {
2269 uint16_t rg[64];
2270 for (int i = 0; i < 64; i++) {
2271 rg[i] = (4*i+0) << 0
2272 | (4*i+1) << 8;
2273 }
2274
2275 skgpu::Swizzle swizzle("g1b1");
2276
2277 SkRasterPipeline_MemoryCtx ptr = { rg, 0 };
2278 SkRasterPipeline_<256> p;
2279 p.append(SkRasterPipelineOp::load_rg88, &ptr);
2280 swizzle.apply(&p);
2281 p.append(SkRasterPipelineOp::store_rg88, &ptr);
2282 p.run(0,0,64,1);
2283
2284 for (int i = 0; i < 64; i++) {
2285 uint32_t want = 0xff << 8
2286 | (4*i+1) << 0;
2287 if (rg[i] != want) {
2288 ERRORF(r, "got %08x, want %08x\n", rg[i], want);
2289 }
2290 }
2291 }
2292 // This takes the highp code path
2293 {
2294 float rg[64][2];
2295 for (int i = 0; i < 64; i++) {
2296 rg[i][0] = i + 1;
2297 rg[i][1] = 2 * i + 1;
2298 }
2299
2300 skgpu::Swizzle swizzle("0gra");
2301
2302 uint16_t buffer[64][4];
2303 SkRasterPipeline_MemoryCtx src = { rg, 0 },
2304 dst = { buffer, 0};
2305 SkRasterPipeline_<256> p;
2306 p.append(SkRasterPipelineOp::load_rgf32, &src);
2307 swizzle.apply(&p);
2308 p.append(SkRasterPipelineOp::store_f16, &dst);
2309 p.run(0,0,64,1);
2310
2311 for (int i = 0; i < 64; i++) {
2312 uint16_t want[4] {
2313 h(0),
2314 h(2 * i + 1),
2315 h(i + 1),
2316 h(1),
2317 };
2318 REPORTER_ASSERT(r, !memcmp(want, buffer[i], sizeof(buffer[i])));
2319 }
2320 }
2321 }
2322
DEF_TEST(SkRasterPipeline_lowp_clamp01,r)2323 DEF_TEST(SkRasterPipeline_lowp_clamp01, r) {
2324 // This may seem like a funny pipeline to create,
2325 // but it certainly shouldn't crash when you run it.
2326
2327 uint32_t rgba = 0xff00ff00;
2328
2329 SkRasterPipeline_MemoryCtx ptr = { &rgba, 0 };
2330
2331 SkRasterPipeline_<256> p;
2332 p.append(SkRasterPipelineOp::load_8888, &ptr);
2333 p.append(SkRasterPipelineOp::swap_rb);
2334 p.append(SkRasterPipelineOp::clamp_01);
2335 p.append(SkRasterPipelineOp::store_8888, &ptr);
2336 p.run(0,0,1,1);
2337 }
2338
2339 // Helper struct that can be used to scrape stack addresses at different points in a pipeline
2340 class StackCheckerCtx : SkRasterPipeline_CallbackCtx {
2341 public:
StackCheckerCtx()2342 StackCheckerCtx() {
2343 this->fn = [](SkRasterPipeline_CallbackCtx* self, int active_pixels) {
2344 auto ctx = (StackCheckerCtx*)self;
2345 ctx->fStackAddrs.push_back(&active_pixels);
2346 };
2347 }
2348
2349 enum class Behavior {
2350 kGrowth,
2351 kBaseline,
2352 kUnknown,
2353 };
2354
GrowthBehavior()2355 static Behavior GrowthBehavior() {
2356 // Only some stages use the musttail attribute, so we have no way of knowing what's going to
2357 // happen. In release builds, it's likely that the compiler will apply tail-call
2358 // optimization. Even in some debug builds (on Windows), we don't see stack growth.
2359 return Behavior::kUnknown;
2360 }
2361
2362 // Call one of these two each time the checker callback is added:
expectGrowth()2363 StackCheckerCtx* expectGrowth() {
2364 fExpectedBehavior.push_back(GrowthBehavior());
2365 return this;
2366 }
2367
expectBaseline()2368 StackCheckerCtx* expectBaseline() {
2369 fExpectedBehavior.push_back(Behavior::kBaseline);
2370 return this;
2371 }
2372
validate(skiatest::Reporter * r)2373 void validate(skiatest::Reporter* r) {
2374 REPORTER_ASSERT(r, fStackAddrs.size() == fExpectedBehavior.size());
2375
2376 // This test is storing and comparing stack pointers (to dead stack frames) as a way of
2377 // measuring stack usage. Unsurprisingly, ASAN doesn't like that. HWASAN actually inserts
2378 // tag bytes in the pointers, causing them not to match. Newer versions of vanilla ASAN
2379 // also appear to salt the stack slightly, causing repeated calls to scrape different
2380 // addresses, even though $rsp is identical on each invocation of the lambda.
2381 #if !defined(SK_SANITIZE_ADDRESS)
2382 void* baseline = fStackAddrs[0];
2383 for (size_t i = 1; i < fStackAddrs.size(); i++) {
2384 if (fExpectedBehavior[i] == Behavior::kGrowth) {
2385 REPORTER_ASSERT(r, fStackAddrs[i] != baseline);
2386 } else if (fExpectedBehavior[i] == Behavior::kBaseline) {
2387 REPORTER_ASSERT(r, fStackAddrs[i] == baseline);
2388 } else {
2389 // Unknown behavior, nothing we can assert here
2390 }
2391 }
2392 #endif
2393 }
2394
2395 private:
2396 std::vector<void*> fStackAddrs;
2397 std::vector<Behavior> fExpectedBehavior;
2398 };
2399
DEF_TEST(SkRasterPipeline_stack_rewind,r)2400 DEF_TEST(SkRasterPipeline_stack_rewind, r) {
2401 // This test verifies that we can control stack usage with stack_rewind
2402
2403 // Without stack_rewind, we should (maybe) see stack growth
2404 {
2405 StackCheckerCtx stack;
2406 uint32_t rgba = 0xff0000ff;
2407 SkRasterPipeline_MemoryCtx ptr = { &rgba, 0 };
2408
2409 SkRasterPipeline_<256> p;
2410 p.append(SkRasterPipelineOp::callback, stack.expectBaseline());
2411 p.append(SkRasterPipelineOp::load_8888, &ptr);
2412 p.append(SkRasterPipelineOp::callback, stack.expectGrowth());
2413 p.append(SkRasterPipelineOp::swap_rb);
2414 p.append(SkRasterPipelineOp::callback, stack.expectGrowth());
2415 p.append(SkRasterPipelineOp::store_8888, &ptr);
2416 p.run(0,0,1,1);
2417
2418 REPORTER_ASSERT(r, rgba == 0xffff0000); // Ensure the pipeline worked
2419 stack.validate(r);
2420 }
2421
2422 // With stack_rewind, we should (always) be able to get back to baseline
2423 {
2424 StackCheckerCtx stack;
2425 uint32_t rgba = 0xff0000ff;
2426 SkRasterPipeline_MemoryCtx ptr = { &rgba, 0 };
2427
2428 SkRasterPipeline_<256> p;
2429 p.append(SkRasterPipelineOp::callback, stack.expectBaseline());
2430 p.append(SkRasterPipelineOp::load_8888, &ptr);
2431 p.append(SkRasterPipelineOp::callback, stack.expectGrowth());
2432 p.append_stack_rewind();
2433 p.append(SkRasterPipelineOp::callback, stack.expectBaseline());
2434 p.append(SkRasterPipelineOp::swap_rb);
2435 p.append(SkRasterPipelineOp::callback, stack.expectGrowth());
2436 p.append_stack_rewind();
2437 p.append(SkRasterPipelineOp::callback, stack.expectBaseline());
2438 p.append(SkRasterPipelineOp::store_8888, &ptr);
2439 p.run(0,0,1,1);
2440
2441 REPORTER_ASSERT(r, rgba == 0xffff0000); // Ensure the pipeline worked
2442 stack.validate(r);
2443 }
2444 }
2445