1 /*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkColorData.h"
9 #include "SkCpu.h"
10 #include "SkJumper.h"
11 #include "SkOnce.h"
12 #include "SkRasterPipeline.h"
13 #include "SkTemplates.h"
14
15 #if !defined(SK_JUMPER_USE_ASSEMBLY)
16 // We'll use __has_feature(memory_sanitizer) to detect MSAN.
17 // SkJumper_generated.S is not compiled with MSAN, so MSAN would yell really loud.
18 #if !defined(__has_feature)
19 #define __has_feature(x) 0
20 #endif
21
22 #if 0 || __has_feature(memory_sanitizer)
23 #define SK_JUMPER_USE_ASSEMBLY 0
24 #else
25 #define SK_JUMPER_USE_ASSEMBLY 1
26 #endif
27 #endif
28
29 #define M(st) +1
30 static const int kNumStages = SK_RASTER_PIPELINE_STAGES(M);
31 #undef M
32
33 #ifndef SK_JUMPER_DISABLE_8BIT
34 // Intentionally commented out; optional logging for local debugging.
35 #if 0 && SK_JUMPER_USE_ASSEMBLY && (defined(__x86_64__) || defined(_M_X64))
36 #include <atomic>
37
38 #define M(st) #st,
39 static const char* kStageNames[] = { SK_RASTER_PIPELINE_STAGES(M) };
40 #undef M
41
42 static std::atomic<int> gMissingStageCounters[kNumStages];
43
44 static void log_missing(SkRasterPipeline::StockStage st) {
45 static SkOnce once;
46 once([] { atexit([] {
47 int total = 0;
48 for (int i = 0; i < kNumStages; i++) {
49 if (int count = gMissingStageCounters[i].load()) {
50 SkDebugf("%7d\t%s\n", count, kStageNames[i]);
51 total += count;
52 }
53 }
54 SkDebugf("%7d total\n", total);
55 }); });
56
57 gMissingStageCounters[st]++;
58 }
59 #else
log_missing(SkRasterPipeline::StockStage)60 static void log_missing(SkRasterPipeline::StockStage) {}
61 #endif
62 #endif
63
64 // We can't express the real types of most stage functions portably, so we use a stand-in.
65 // We'll only ever call start_pipeline(), which then chains into the rest.
66 using StageFn = void(void);
67 using StartPipelineFn = void(size_t,size_t,size_t,size_t, void**);
68
69 // Some platforms expect C "name" maps to asm "_name", others to "name".
70 #if defined(__APPLE__)
71 #define ASM(name, suffix) sk_##name##_##suffix
72 #else
73 #define ASM(name, suffix) _sk_##name##_##suffix
74 #endif
75
76 extern "C" {
77
78 #if !SK_JUMPER_USE_ASSEMBLY
79 // We'll just run baseline code.
80
81 #elif defined(__x86_64__) || defined(_M_X64)
82 StartPipelineFn ASM(start_pipeline, skx),
83 ASM(start_pipeline, hsw),
84 ASM(start_pipeline, avx),
85 ASM(start_pipeline, sse41),
86 ASM(start_pipeline, sse2),
87 ASM(start_pipeline, hsw_lowp),
88 ASM(start_pipeline,sse41_lowp),
89 ASM(start_pipeline, sse2_lowp);
90
91 StageFn ASM(just_return, skx),
92 ASM(just_return, hsw),
93 ASM(just_return, avx),
94 ASM(just_return, sse41),
95 ASM(just_return, sse2),
96 ASM(just_return, hsw_lowp),
97 ASM(just_return,sse41_lowp),
98 ASM(just_return, sse2_lowp);
99
100 #define M(st) StageFn ASM(st, skx), \
101 ASM(st, hsw), \
102 ASM(st, avx), \
103 ASM(st,sse41), \
104 ASM(st, sse2), \
105 ASM(st, hsw_lowp), \
106 ASM(st,sse41_lowp), \
107 ASM(st, sse2_lowp);
108 SK_RASTER_PIPELINE_STAGES(M)
109 #undef M
110
111 #elif defined(__i386__) || defined(_M_IX86)
112 StartPipelineFn ASM(start_pipeline,sse2),
113 ASM(start_pipeline,sse2_lowp);
114 StageFn ASM(just_return,sse2),
115 ASM(just_return,sse2_lowp);
116 #define M(st) StageFn ASM(st,sse2), \
117 ASM(st,sse2_lowp);
118 SK_RASTER_PIPELINE_STAGES(M)
119 #undef M
120
121 #endif
122
123 // Baseline code compiled as a normal part of Skia.
124 StartPipelineFn sk_start_pipeline;
125 StageFn sk_just_return;
126 #define M(st) StageFn sk_##st;
127 SK_RASTER_PIPELINE_STAGES(M)
128 #undef M
129
130 #if defined(JUMPER_HAS_NEON_LOWP)
131 StartPipelineFn sk_start_pipeline_lowp;
132 StageFn sk_just_return_lowp;
133 #define M(st) StageFn sk_##st##_lowp;
134 SK_RASTER_PIPELINE_STAGES(M)
135 #undef M
136 #endif
137
138 }
139
140 #if SK_JUMPER_USE_ASSEMBLY
141 #if defined(__x86_64__) || defined(_M_X64)
142 template <SkRasterPipeline::StockStage st>
143 static constexpr StageFn* hsw_lowp();
144
145 template <SkRasterPipeline::StockStage st>
146 static constexpr StageFn* sse41_lowp();
147
148 template <SkRasterPipeline::StockStage st>
149 static constexpr StageFn* sse2_lowp();
150
151 #define LOWP(st) \
152 template <> constexpr StageFn* hsw_lowp<SkRasterPipeline::st>() { \
153 return ASM(st,hsw_lowp); \
154 } \
155 template <> constexpr StageFn* sse41_lowp<SkRasterPipeline::st>() { \
156 return ASM(st,sse41_lowp); \
157 } \
158 template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \
159 return ASM(st,sse2_lowp); \
160 }
161 #define NOPE(st) \
162 template <> constexpr StageFn* hsw_lowp<SkRasterPipeline::st>() { \
163 return nullptr; \
164 } \
165 template <> constexpr StageFn* sse41_lowp<SkRasterPipeline::st>() { \
166 return nullptr; \
167 } \
168 template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \
169 return nullptr; \
170 }
171
172 #elif defined(__i386__) || defined(_M_IX86)
173 template <SkRasterPipeline::StockStage st>
174 static constexpr StageFn* sse2_lowp();
175
176 #define LOWP(st) \
177 template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \
178 return ASM(st,sse2_lowp); \
179 }
180 #define NOPE(st) \
181 template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \
182 return nullptr; \
183 }
184
185 #elif defined(JUMPER_HAS_NEON_LOWP)
186 template <SkRasterPipeline::StockStage st>
187 static constexpr StageFn* neon_lowp();
188
189 #define LOWP(st) \
190 template <> constexpr StageFn* neon_lowp<SkRasterPipeline::st>() { \
191 return sk_##st##_lowp; \
192 }
193 #define NOPE(st) \
194 template <> constexpr StageFn* neon_lowp<SkRasterPipeline::st>() { \
195 return nullptr; \
196 }
197
198 #else
199 #define LOWP(st)
200 #define NOPE(st)
201
202 #endif
203
204 #define TODO(st) NOPE(st) // stages that should be implemented in lowp, but aren't.
205
206 NOPE(callback)
207 LOWP(move_src_dst) LOWP(move_dst_src)
208 NOPE(clamp_0) NOPE(clamp_1) LOWP(clamp_a) LOWP(clamp_a_dst)
209 NOPE(unpremul) LOWP(premul) LOWP(premul_dst)
210 LOWP(force_opaque) LOWP(force_opaque_dst)
211 LOWP(set_rgb) LOWP(swap_rb) LOWP(invert)
212 NOPE(from_srgb) NOPE(from_srgb_dst) NOPE(to_srgb)
213 LOWP(black_color) LOWP(white_color) LOWP(uniform_color)
214 LOWP(seed_shader) NOPE(dither)
215 LOWP(load_a8) LOWP(load_a8_dst) LOWP(store_a8) LOWP(gather_a8)
216 LOWP(load_g8) LOWP(load_g8_dst) LOWP(gather_g8)
217 LOWP(load_565) LOWP(load_565_dst) LOWP(store_565) LOWP(gather_565)
218 LOWP(load_4444) LOWP(load_4444_dst) LOWP(store_4444) LOWP(gather_4444)
219 NOPE(load_f16) NOPE(load_f16_dst) NOPE(store_f16) NOPE(gather_f16)
220 NOPE(load_f32) NOPE(load_f32_dst) NOPE(store_f32)
221 LOWP(load_8888) LOWP(load_8888_dst) LOWP(store_8888) LOWP(gather_8888)
222 LOWP(load_bgra) LOWP(load_bgra_dst) LOWP(store_bgra) LOWP(gather_bgra)
223 NOPE(load_1010102) NOPE(load_1010102_dst) NOPE(store_1010102) NOPE(gather_1010102)
224 TODO(bilerp_clamp_8888)
225 TODO(load_u16_be) TODO(load_rgb_u16_be) TODO(store_u16_be)
226 NOPE(load_tables_u16_be) NOPE(load_tables_rgb_u16_be) NOPE(load_tables)
227 NOPE(load_rgba) NOPE(store_rgba)
228 LOWP(scale_u8) LOWP(scale_565) LOWP(scale_1_float)
229 LOWP( lerp_u8) LOWP( lerp_565) LOWP( lerp_1_float)
230 LOWP(dstatop) LOWP(dstin) LOWP(dstout) LOWP(dstover)
231 LOWP(srcatop) LOWP(srcin) LOWP(srcout) LOWP(srcover)
232 LOWP(clear) LOWP(modulate) LOWP(multiply) LOWP(plus_) LOWP(screen) LOWP(xor_)
233 NOPE(colorburn) NOPE(colordodge) LOWP(darken) LOWP(difference)
234 LOWP(exclusion) LOWP(hardlight) LOWP(lighten) LOWP(overlay) NOPE(softlight)
235 NOPE(hue) NOPE(saturation) NOPE(color) NOPE(luminosity)
236 LOWP(srcover_rgba_8888) LOWP(srcover_bgra_8888)
237 LOWP(luminance_to_alpha)
238 LOWP(matrix_translate) LOWP(matrix_scale_translate)
239 LOWP(matrix_2x3) NOPE(matrix_3x4) TODO(matrix_4x5) TODO(matrix_4x3)
240 LOWP(matrix_perspective)
241 NOPE(parametric_r) NOPE(parametric_g) NOPE(parametric_b)
242 NOPE(parametric_a) NOPE(gamma) NOPE(gamma_dst)
243 NOPE(table_r) NOPE(table_g) NOPE(table_b) NOPE(table_a)
244 NOPE(lab_to_xyz)
245 TODO(mirror_x) TODO(repeat_x)
246 TODO(mirror_y) TODO(repeat_y)
247 TODO(bilinear_nx) TODO(bilinear_px) TODO(bilinear_ny) TODO(bilinear_py)
248 TODO(bicubic_n3x) TODO(bicubic_n1x) TODO(bicubic_p1x) TODO(bicubic_p3x)
249 TODO(bicubic_n3y) TODO(bicubic_n1y) TODO(bicubic_p1y) TODO(bicubic_p3y)
250 TODO(save_xy) TODO(accumulate)
251 LOWP(clamp_x_1) LOWP(mirror_x_1) LOWP(repeat_x_1)
252 LOWP(evenly_spaced_gradient)
253 LOWP(gradient)
254 LOWP(evenly_spaced_2_stop_gradient)
255 LOWP(xy_to_unit_angle)
256 LOWP(xy_to_radius)
257 TODO(negate_x)
258 TODO(xy_to_2pt_conical_strip)
259 TODO(xy_to_2pt_conical_focal_on_circle)
260 TODO(xy_to_2pt_conical_well_behaved)
261 TODO(xy_to_2pt_conical_greater)
262 TODO(xy_to_2pt_conical_smaller)
263 TODO(alter_2pt_conical_compensate_focal)
264 TODO(alter_2pt_conical_unswap)
265 TODO(mask_2pt_conical_nan) TODO(mask_2pt_conical_degenerates) TODO(apply_vector_mask)
266 TODO(byte_tables) TODO(byte_tables_rgb)
267 NOPE(rgb_to_hsl) NOPE(hsl_to_rgb)
268 NOPE(clut_3D) NOPE(clut_4D)
269 NOPE(gauss_a_to_rgba)
270
271 #undef LOWP
272 #undef TODO
273 #undef NOPE
274 #endif
275
276 // Engines comprise everything we need to run SkRasterPipelines.
277 struct SkJumper_Engine {
278 StageFn* stages[kNumStages];
279 StartPipelineFn* start_pipeline;
280 StageFn* just_return;
281 };
282
283 // We'll default to this baseline engine, but try to choose a better one at runtime.
284 static const SkJumper_Engine kBaseline = {
285 #define M(stage) sk_##stage,
286 { SK_RASTER_PIPELINE_STAGES(M) },
287 #undef M
288 sk_start_pipeline,
289 sk_just_return,
290 };
291 static SkJumper_Engine gEngine = kBaseline;
292 static SkOnce gChooseEngineOnce;
293
choose_engine()294 static SkJumper_Engine choose_engine() {
295 #if !SK_JUMPER_USE_ASSEMBLY
296 // We'll just run baseline code.
297
298 #elif defined(__x86_64__) || defined(_M_X64)
299 #if !defined(_MSC_VER) // No _skx stages for Windows yet.
300 if (1 && SkCpu::Supports(SkCpu::SKX)) {
301 return {
302 #define M(stage) ASM(stage, skx),
303 { SK_RASTER_PIPELINE_STAGES(M) },
304 M(start_pipeline)
305 M(just_return)
306 #undef M
307 };
308 }
309 #endif
310 if (1 && SkCpu::Supports(SkCpu::HSW)) {
311 return {
312 #define M(stage) ASM(stage, hsw),
313 { SK_RASTER_PIPELINE_STAGES(M) },
314 M(start_pipeline)
315 M(just_return)
316 #undef M
317 };
318 }
319 if (1 && SkCpu::Supports(SkCpu::AVX)) {
320 return {
321 #define M(stage) ASM(stage, avx),
322 { SK_RASTER_PIPELINE_STAGES(M) },
323 M(start_pipeline)
324 M(just_return)
325 #undef M
326 };
327 }
328 if (1 && SkCpu::Supports(SkCpu::SSE41)) {
329 return {
330 #define M(stage) ASM(stage, sse41),
331 { SK_RASTER_PIPELINE_STAGES(M) },
332 M(start_pipeline)
333 M(just_return)
334 #undef M
335 };
336 }
337 if (1 && SkCpu::Supports(SkCpu::SSE2)) {
338 return {
339 #define M(stage) ASM(stage, sse2),
340 { SK_RASTER_PIPELINE_STAGES(M) },
341 M(start_pipeline)
342 M(just_return)
343 #undef M
344 };
345 }
346
347 #elif defined(__i386__) || defined(_M_IX86)
348 if (1 && SkCpu::Supports(SkCpu::SSE2)) {
349 return {
350 #define M(stage) ASM(stage, sse2),
351 { SK_RASTER_PIPELINE_STAGES(M) },
352 M(start_pipeline)
353 M(just_return)
354 #undef M
355 };
356 }
357
358 #endif
359 return kBaseline;
360 }
361
362 #ifndef SK_JUMPER_DISABLE_8BIT
363 static const SkJumper_Engine kNone = {
364 #define M(stage) nullptr,
365 { SK_RASTER_PIPELINE_STAGES(M) },
366 #undef M
367 nullptr,
368 nullptr,
369 };
370 static SkJumper_Engine gLowp = kNone;
371 static SkOnce gChooseLowpOnce;
372
choose_lowp()373 static SkJumper_Engine choose_lowp() {
374 #if SK_JUMPER_USE_ASSEMBLY
375 #if defined(__x86_64__) || defined(_M_X64)
376 if (1 && SkCpu::Supports(SkCpu::HSW)) {
377 return {
378 #define M(st) hsw_lowp<SkRasterPipeline::st>(),
379 { SK_RASTER_PIPELINE_STAGES(M) },
380 ASM(start_pipeline,hsw_lowp),
381 ASM(just_return ,hsw_lowp),
382 #undef M
383 };
384 }
385 if (1 && SkCpu::Supports(SkCpu::SSE41)) {
386 return {
387 #define M(st) sse41_lowp<SkRasterPipeline::st>(),
388 { SK_RASTER_PIPELINE_STAGES(M) },
389 ASM(start_pipeline,sse41_lowp),
390 ASM(just_return ,sse41_lowp),
391 #undef M
392 };
393 }
394 if (1 && SkCpu::Supports(SkCpu::SSE2)) {
395 return {
396 #define M(st) sse2_lowp<SkRasterPipeline::st>(),
397 { SK_RASTER_PIPELINE_STAGES(M) },
398 ASM(start_pipeline,sse2_lowp),
399 ASM(just_return ,sse2_lowp),
400 #undef M
401 };
402 }
403 #elif defined(__i386__) || defined(_M_IX86)
404 if (1 && SkCpu::Supports(SkCpu::SSE2)) {
405 return {
406 #define M(st) sse2_lowp<SkRasterPipeline::st>(),
407 { SK_RASTER_PIPELINE_STAGES(M) },
408 ASM(start_pipeline,sse2_lowp),
409 ASM(just_return ,sse2_lowp),
410 #undef M
411 };
412 }
413
414 #elif defined(JUMPER_HAS_NEON_LOWP)
415 return {
416 #define M(st) neon_lowp<SkRasterPipeline::st>(),
417 { SK_RASTER_PIPELINE_STAGES(M) },
418 sk_start_pipeline_lowp,
419 sk_just_return_lowp,
420 #undef M
421 };
422 #endif
423 #endif
424 return kNone;
425 }
426 #endif
427
build_pipeline(void ** ip) const428 const SkJumper_Engine& SkRasterPipeline::build_pipeline(void** ip) const {
429 #ifndef SK_JUMPER_DISABLE_8BIT
430 gChooseLowpOnce([]{ gLowp = choose_lowp(); });
431
432 // First try to build a lowp pipeline. If that fails, fall back to normal float gEngine.
433 void** reset_point = ip;
434 *--ip = (void*)gLowp.just_return;
435 for (const StageList* st = fStages; st; st = st->prev) {
436 if (st->stage == SkRasterPipeline::clamp_0 ||
437 st->stage == SkRasterPipeline::clamp_1) {
438 continue; // No-ops in lowp.
439 }
440 if (StageFn* fn = gLowp.stages[st->stage]) {
441 if (st->ctx) {
442 *--ip = st->ctx;
443 }
444 *--ip = (void*)fn;
445 } else {
446 log_missing(st->stage);
447 ip = reset_point;
448 break;
449 }
450 }
451 if (ip != reset_point) {
452 return gLowp;
453 }
454 #endif
455
456 gChooseEngineOnce([]{ gEngine = choose_engine(); });
457 // We're building the pipeline backwards, so we start with the final stage just_return.
458 *--ip = (void*)gEngine.just_return;
459
460 // Still going backwards, each stage's context pointer then its StageFn.
461 for (const StageList* st = fStages; st; st = st->prev) {
462 if (st->ctx) {
463 *--ip = st->ctx;
464 }
465 *--ip = (void*)gEngine.stages[st->stage];
466 }
467 return gEngine;
468 }
469
run(size_t x,size_t y,size_t w,size_t h) const470 void SkRasterPipeline::run(size_t x, size_t y, size_t w, size_t h) const {
471 if (this->empty()) {
472 return;
473 }
474
475 // Best to not use fAlloc here... we can't bound how often run() will be called.
476 SkAutoSTMalloc<64, void*> program(fSlotsNeeded);
477
478 const SkJumper_Engine& engine = this->build_pipeline(program.get() + fSlotsNeeded);
479 engine.start_pipeline(x,y,x+w,y+h, program.get());
480 }
481
compile() const482 std::function<void(size_t, size_t, size_t, size_t)> SkRasterPipeline::compile() const {
483 if (this->empty()) {
484 return [](size_t, size_t, size_t, size_t) {};
485 }
486
487 void** program = fAlloc->makeArray<void*>(fSlotsNeeded);
488 const SkJumper_Engine& engine = this->build_pipeline(program + fSlotsNeeded);
489
490 auto start_pipeline = engine.start_pipeline;
491 return [=](size_t x, size_t y, size_t w, size_t h) {
492 start_pipeline(x,y,x+w,y+h, program);
493 };
494 }
495