• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2017 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "SkColorData.h"
9 #include "SkCpu.h"
10 #include "SkJumper.h"
11 #include "SkOnce.h"
12 #include "SkRasterPipeline.h"
13 #include "SkTemplates.h"
14 
15 #if !defined(SK_JUMPER_USE_ASSEMBLY)
16     // We'll use __has_feature(memory_sanitizer) to detect MSAN.
17     // SkJumper_generated.S is not compiled with MSAN, so MSAN would yell really loud.
18     #if !defined(__has_feature)
19         #define __has_feature(x) 0
20     #endif
21 
22     #if 0 || __has_feature(memory_sanitizer)
23         #define SK_JUMPER_USE_ASSEMBLY 0
24     #else
25         #define SK_JUMPER_USE_ASSEMBLY 1
26     #endif
27 #endif
28 
29 #define M(st) +1
30 static const int kNumStages = SK_RASTER_PIPELINE_STAGES(M);
31 #undef M
32 
33 #ifndef SK_JUMPER_DISABLE_8BIT
34     // Intentionally commented out; optional logging for local debugging.
35     #if 0 && SK_JUMPER_USE_ASSEMBLY && (defined(__x86_64__) || defined(_M_X64))
36         #include <atomic>
37 
38         #define M(st) #st,
39         static const char* kStageNames[] = { SK_RASTER_PIPELINE_STAGES(M) };
40         #undef M
41 
42         static std::atomic<int> gMissingStageCounters[kNumStages];
43 
44         static void log_missing(SkRasterPipeline::StockStage st) {
45             static SkOnce once;
46             once([] { atexit([] {
47                 int total = 0;
48                 for (int i = 0; i < kNumStages; i++) {
49                     if (int count = gMissingStageCounters[i].load()) {
50                         SkDebugf("%7d\t%s\n", count, kStageNames[i]);
51                         total += count;
52                     }
53                 }
54                 SkDebugf("%7d total\n", total);
55             }); });
56 
57             gMissingStageCounters[st]++;
58         }
59     #else
log_missing(SkRasterPipeline::StockStage)60         static void log_missing(SkRasterPipeline::StockStage) {}
61     #endif
62 #endif
63 
64 // We can't express the real types of most stage functions portably, so we use a stand-in.
65 // We'll only ever call start_pipeline(), which then chains into the rest.
66 using StageFn         = void(void);
67 using StartPipelineFn = void(size_t,size_t,size_t,size_t, void**);
68 
69 // Some platforms expect C "name" maps to asm "_name", others to "name".
70 #if defined(__APPLE__)
71     #define ASM(name, suffix)  sk_##name##_##suffix
72 #else
73     #define ASM(name, suffix) _sk_##name##_##suffix
74 #endif
75 
76 extern "C" {
77 
78 #if !SK_JUMPER_USE_ASSEMBLY
79     // We'll just run baseline code.
80 
81 #elif defined(__x86_64__) || defined(_M_X64)
82     StartPipelineFn ASM(start_pipeline,       skx),
83                     ASM(start_pipeline,       hsw),
84                     ASM(start_pipeline,       avx),
85                     ASM(start_pipeline,     sse41),
86                     ASM(start_pipeline,      sse2),
87                     ASM(start_pipeline,  hsw_lowp),
88                     ASM(start_pipeline,sse41_lowp),
89                     ASM(start_pipeline, sse2_lowp);
90 
91     StageFn ASM(just_return,       skx),
92             ASM(just_return,       hsw),
93             ASM(just_return,       avx),
94             ASM(just_return,     sse41),
95             ASM(just_return,      sse2),
96             ASM(just_return,  hsw_lowp),
97             ASM(just_return,sse41_lowp),
98             ASM(just_return, sse2_lowp);
99 
100     #define M(st) StageFn ASM(st,  skx),      \
101                           ASM(st,  hsw),      \
102                           ASM(st,  avx),      \
103                           ASM(st,sse41),      \
104                           ASM(st, sse2),      \
105                           ASM(st,  hsw_lowp), \
106                           ASM(st,sse41_lowp), \
107                           ASM(st, sse2_lowp);
108         SK_RASTER_PIPELINE_STAGES(M)
109     #undef M
110 
111 #elif defined(__i386__) || defined(_M_IX86)
112     StartPipelineFn ASM(start_pipeline,sse2),
113                     ASM(start_pipeline,sse2_lowp);
114     StageFn ASM(just_return,sse2),
115             ASM(just_return,sse2_lowp);
116     #define M(st) StageFn ASM(st,sse2),      \
117                           ASM(st,sse2_lowp);
118         SK_RASTER_PIPELINE_STAGES(M)
119     #undef M
120 
121 #endif
122 
123     // Baseline code compiled as a normal part of Skia.
124     StartPipelineFn sk_start_pipeline;
125     StageFn sk_just_return;
126     #define M(st) StageFn sk_##st;
127         SK_RASTER_PIPELINE_STAGES(M)
128     #undef M
129 
130 #if defined(JUMPER_HAS_NEON_LOWP)
131     StartPipelineFn sk_start_pipeline_lowp;
132     StageFn sk_just_return_lowp;
133     #define M(st) StageFn sk_##st##_lowp;
134         SK_RASTER_PIPELINE_STAGES(M)
135     #undef M
136 #endif
137 
138 }
139 
140 #if SK_JUMPER_USE_ASSEMBLY
141     #if defined(__x86_64__) || defined(_M_X64)
142         template <SkRasterPipeline::StockStage st>
143         static constexpr StageFn* hsw_lowp();
144 
145         template <SkRasterPipeline::StockStage st>
146         static constexpr StageFn* sse41_lowp();
147 
148         template <SkRasterPipeline::StockStage st>
149         static constexpr StageFn* sse2_lowp();
150 
151         #define LOWP(st) \
152             template <> constexpr StageFn* hsw_lowp<SkRasterPipeline::st>() {   \
153                 return ASM(st,hsw_lowp);                                        \
154             }                                                                   \
155             template <> constexpr StageFn* sse41_lowp<SkRasterPipeline::st>() { \
156                 return ASM(st,sse41_lowp);                                      \
157             }                                                                   \
158             template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() {  \
159                 return ASM(st,sse2_lowp);                                       \
160             }
161         #define NOPE(st) \
162             template <> constexpr StageFn* hsw_lowp<SkRasterPipeline::st>() {   \
163                 return nullptr;                                                 \
164             }                                                                   \
165             template <> constexpr StageFn* sse41_lowp<SkRasterPipeline::st>() { \
166                 return nullptr;                                                 \
167             }                                                                   \
168             template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() {  \
169                 return nullptr;                                                 \
170             }
171 
172     #elif defined(__i386__) || defined(_M_IX86)
173         template <SkRasterPipeline::StockStage st>
174         static constexpr StageFn* sse2_lowp();
175 
176         #define LOWP(st) \
177             template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() {  \
178                 return ASM(st,sse2_lowp);                                       \
179             }
180         #define NOPE(st) \
181             template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() {  \
182                 return nullptr;                                                 \
183             }
184 
185     #elif defined(JUMPER_HAS_NEON_LOWP)
186         template <SkRasterPipeline::StockStage st>
187         static constexpr StageFn* neon_lowp();
188 
189         #define LOWP(st)                                                         \
190             template <> constexpr StageFn* neon_lowp<SkRasterPipeline::st>() {   \
191                 return sk_##st##_lowp;                                           \
192             }
193         #define NOPE(st)                                                         \
194             template <> constexpr StageFn* neon_lowp<SkRasterPipeline::st>() {   \
195                 return nullptr;                                                  \
196             }
197 
198     #else
199         #define LOWP(st)
200         #define NOPE(st)
201 
202     #endif
203 
204     #define TODO(st) NOPE(st)  // stages that should be implemented in lowp, but aren't.
205 
206     NOPE(callback)
207     LOWP(move_src_dst) LOWP(move_dst_src)
208     NOPE(clamp_0) NOPE(clamp_1) LOWP(clamp_a) LOWP(clamp_a_dst)
209     NOPE(unpremul) LOWP(premul) LOWP(premul_dst)
210     LOWP(force_opaque) LOWP(force_opaque_dst)
211     LOWP(set_rgb) LOWP(swap_rb) LOWP(invert)
212     NOPE(from_srgb) NOPE(from_srgb_dst) NOPE(to_srgb)
213     LOWP(black_color) LOWP(white_color) LOWP(uniform_color)
214     LOWP(seed_shader) NOPE(dither)
215     LOWP(load_a8)   LOWP(load_a8_dst)   LOWP(store_a8)   LOWP(gather_a8)
216     LOWP(load_g8)   LOWP(load_g8_dst)                    LOWP(gather_g8)
217     LOWP(load_565)  LOWP(load_565_dst)  LOWP(store_565)  LOWP(gather_565)
218     LOWP(load_4444) LOWP(load_4444_dst) LOWP(store_4444) LOWP(gather_4444)
219     NOPE(load_f16)  NOPE(load_f16_dst)  NOPE(store_f16)  NOPE(gather_f16)
220     NOPE(load_f32)  NOPE(load_f32_dst)  NOPE(store_f32)
221     LOWP(load_8888) LOWP(load_8888_dst) LOWP(store_8888) LOWP(gather_8888)
222     LOWP(load_bgra) LOWP(load_bgra_dst) LOWP(store_bgra) LOWP(gather_bgra)
223     NOPE(load_1010102) NOPE(load_1010102_dst) NOPE(store_1010102) NOPE(gather_1010102)
224     TODO(bilerp_clamp_8888)
225     TODO(load_u16_be) TODO(load_rgb_u16_be) TODO(store_u16_be)
226     NOPE(load_tables_u16_be) NOPE(load_tables_rgb_u16_be) NOPE(load_tables)
227     NOPE(load_rgba) NOPE(store_rgba)
228     LOWP(scale_u8) LOWP(scale_565) LOWP(scale_1_float)
229     LOWP( lerp_u8) LOWP( lerp_565) LOWP( lerp_1_float)
230     LOWP(dstatop) LOWP(dstin) LOWP(dstout) LOWP(dstover)
231     LOWP(srcatop) LOWP(srcin) LOWP(srcout) LOWP(srcover)
232     LOWP(clear) LOWP(modulate) LOWP(multiply) LOWP(plus_) LOWP(screen) LOWP(xor_)
233     NOPE(colorburn) NOPE(colordodge) LOWP(darken) LOWP(difference)
234     LOWP(exclusion) LOWP(hardlight) LOWP(lighten) LOWP(overlay) NOPE(softlight)
235     NOPE(hue) NOPE(saturation) NOPE(color) NOPE(luminosity)
236     LOWP(srcover_rgba_8888) LOWP(srcover_bgra_8888)
237     LOWP(luminance_to_alpha)
238     LOWP(matrix_translate) LOWP(matrix_scale_translate)
239     LOWP(matrix_2x3) NOPE(matrix_3x4) TODO(matrix_4x5) TODO(matrix_4x3)
240     LOWP(matrix_perspective)
241     NOPE(parametric_r) NOPE(parametric_g) NOPE(parametric_b)
242     NOPE(parametric_a) NOPE(gamma) NOPE(gamma_dst)
243     NOPE(table_r) NOPE(table_g) NOPE(table_b) NOPE(table_a)
244     NOPE(lab_to_xyz)
245                     TODO(mirror_x)   TODO(repeat_x)
246                     TODO(mirror_y)   TODO(repeat_y)
247     TODO(bilinear_nx) TODO(bilinear_px) TODO(bilinear_ny) TODO(bilinear_py)
248     TODO(bicubic_n3x) TODO(bicubic_n1x) TODO(bicubic_p1x) TODO(bicubic_p3x)
249     TODO(bicubic_n3y) TODO(bicubic_n1y) TODO(bicubic_p1y) TODO(bicubic_p3y)
250     TODO(save_xy) TODO(accumulate)
251     LOWP(clamp_x_1) LOWP(mirror_x_1) LOWP(repeat_x_1)
252     LOWP(evenly_spaced_gradient)
253     LOWP(gradient)
254     LOWP(evenly_spaced_2_stop_gradient)
255     LOWP(xy_to_unit_angle)
256     LOWP(xy_to_radius)
257     TODO(negate_x)
258     TODO(xy_to_2pt_conical_strip)
259     TODO(xy_to_2pt_conical_focal_on_circle)
260     TODO(xy_to_2pt_conical_well_behaved)
261     TODO(xy_to_2pt_conical_greater)
262     TODO(xy_to_2pt_conical_smaller)
263     TODO(alter_2pt_conical_compensate_focal)
264     TODO(alter_2pt_conical_unswap)
265     TODO(mask_2pt_conical_nan) TODO(mask_2pt_conical_degenerates) TODO(apply_vector_mask)
266     TODO(byte_tables) TODO(byte_tables_rgb)
267     NOPE(rgb_to_hsl) NOPE(hsl_to_rgb)
268     NOPE(clut_3D) NOPE(clut_4D)
269     NOPE(gauss_a_to_rgba)
270 
271     #undef LOWP
272     #undef TODO
273     #undef NOPE
274 #endif
275 
276 // Engines comprise everything we need to run SkRasterPipelines.
277 struct SkJumper_Engine {
278     StageFn*         stages[kNumStages];
279     StartPipelineFn* start_pipeline;
280     StageFn*         just_return;
281 };
282 
283 // We'll default to this baseline engine, but try to choose a better one at runtime.
284 static const SkJumper_Engine kBaseline = {
285 #define M(stage) sk_##stage,
286     { SK_RASTER_PIPELINE_STAGES(M) },
287 #undef M
288     sk_start_pipeline,
289     sk_just_return,
290 };
291 static SkJumper_Engine gEngine = kBaseline;
292 static SkOnce gChooseEngineOnce;
293 
choose_engine()294 static SkJumper_Engine choose_engine() {
295 #if !SK_JUMPER_USE_ASSEMBLY
296     // We'll just run baseline code.
297 
298 #elif defined(__x86_64__) || defined(_M_X64)
299     #if !defined(_MSC_VER)  // No _skx stages for Windows yet.
300         if (1 && SkCpu::Supports(SkCpu::SKX)) {
301             return {
302             #define M(stage) ASM(stage, skx),
303                 { SK_RASTER_PIPELINE_STAGES(M) },
304                 M(start_pipeline)
305                 M(just_return)
306             #undef M
307             };
308         }
309     #endif
310     if (1 && SkCpu::Supports(SkCpu::HSW)) {
311         return {
312         #define M(stage) ASM(stage, hsw),
313             { SK_RASTER_PIPELINE_STAGES(M) },
314             M(start_pipeline)
315             M(just_return)
316         #undef M
317         };
318     }
319     if (1 && SkCpu::Supports(SkCpu::AVX)) {
320         return {
321         #define M(stage) ASM(stage, avx),
322             { SK_RASTER_PIPELINE_STAGES(M) },
323             M(start_pipeline)
324             M(just_return)
325         #undef M
326         };
327     }
328     if (1 && SkCpu::Supports(SkCpu::SSE41)) {
329         return {
330         #define M(stage) ASM(stage, sse41),
331             { SK_RASTER_PIPELINE_STAGES(M) },
332             M(start_pipeline)
333             M(just_return)
334         #undef M
335         };
336     }
337     if (1 && SkCpu::Supports(SkCpu::SSE2)) {
338         return {
339         #define M(stage) ASM(stage, sse2),
340             { SK_RASTER_PIPELINE_STAGES(M) },
341             M(start_pipeline)
342             M(just_return)
343         #undef M
344         };
345     }
346 
347 #elif defined(__i386__) || defined(_M_IX86)
348     if (1 && SkCpu::Supports(SkCpu::SSE2)) {
349         return {
350         #define M(stage) ASM(stage, sse2),
351             { SK_RASTER_PIPELINE_STAGES(M) },
352             M(start_pipeline)
353             M(just_return)
354         #undef M
355         };
356     }
357 
358 #endif
359     return kBaseline;
360 }
361 
362 #ifndef SK_JUMPER_DISABLE_8BIT
363     static const SkJumper_Engine kNone = {
364     #define M(stage) nullptr,
365         { SK_RASTER_PIPELINE_STAGES(M) },
366     #undef M
367         nullptr,
368         nullptr,
369     };
370     static SkJumper_Engine gLowp = kNone;
371     static SkOnce gChooseLowpOnce;
372 
choose_lowp()373     static SkJumper_Engine choose_lowp() {
374     #if SK_JUMPER_USE_ASSEMBLY
375         #if defined(__x86_64__) || defined(_M_X64)
376             if (1 && SkCpu::Supports(SkCpu::HSW)) {
377                 return {
378                 #define M(st) hsw_lowp<SkRasterPipeline::st>(),
379                     { SK_RASTER_PIPELINE_STAGES(M) },
380                     ASM(start_pipeline,hsw_lowp),
381                     ASM(just_return   ,hsw_lowp),
382                 #undef M
383                 };
384             }
385             if (1 && SkCpu::Supports(SkCpu::SSE41)) {
386                 return {
387                 #define M(st) sse41_lowp<SkRasterPipeline::st>(),
388                     { SK_RASTER_PIPELINE_STAGES(M) },
389                     ASM(start_pipeline,sse41_lowp),
390                     ASM(just_return   ,sse41_lowp),
391                 #undef M
392                 };
393             }
394             if (1 && SkCpu::Supports(SkCpu::SSE2)) {
395                 return {
396                 #define M(st) sse2_lowp<SkRasterPipeline::st>(),
397                     { SK_RASTER_PIPELINE_STAGES(M) },
398                     ASM(start_pipeline,sse2_lowp),
399                     ASM(just_return   ,sse2_lowp),
400                 #undef M
401                 };
402             }
403         #elif defined(__i386__) || defined(_M_IX86)
404             if (1 && SkCpu::Supports(SkCpu::SSE2)) {
405                 return {
406                 #define M(st) sse2_lowp<SkRasterPipeline::st>(),
407                     { SK_RASTER_PIPELINE_STAGES(M) },
408                     ASM(start_pipeline,sse2_lowp),
409                     ASM(just_return   ,sse2_lowp),
410                 #undef M
411                 };
412             }
413 
414         #elif defined(JUMPER_HAS_NEON_LOWP)
415             return {
416             #define M(st) neon_lowp<SkRasterPipeline::st>(),
417                 { SK_RASTER_PIPELINE_STAGES(M) },
418                 sk_start_pipeline_lowp,
419                 sk_just_return_lowp,
420             #undef M
421             };
422         #endif
423     #endif
424         return kNone;
425     }
426 #endif
427 
build_pipeline(void ** ip) const428 const SkJumper_Engine& SkRasterPipeline::build_pipeline(void** ip) const {
429 #ifndef SK_JUMPER_DISABLE_8BIT
430     gChooseLowpOnce([]{ gLowp = choose_lowp(); });
431 
432     // First try to build a lowp pipeline.  If that fails, fall back to normal float gEngine.
433     void** reset_point = ip;
434     *--ip = (void*)gLowp.just_return;
435     for (const StageList* st = fStages; st; st = st->prev) {
436         if (st->stage == SkRasterPipeline::clamp_0 ||
437             st->stage == SkRasterPipeline::clamp_1) {
438             continue;  // No-ops in lowp.
439         }
440         if (StageFn* fn = gLowp.stages[st->stage]) {
441             if (st->ctx) {
442                 *--ip = st->ctx;
443             }
444             *--ip = (void*)fn;
445         } else {
446             log_missing(st->stage);
447             ip = reset_point;
448             break;
449         }
450     }
451     if (ip != reset_point) {
452         return gLowp;
453     }
454 #endif
455 
456     gChooseEngineOnce([]{ gEngine = choose_engine(); });
457     // We're building the pipeline backwards, so we start with the final stage just_return.
458     *--ip = (void*)gEngine.just_return;
459 
460     // Still going backwards, each stage's context pointer then its StageFn.
461     for (const StageList* st = fStages; st; st = st->prev) {
462         if (st->ctx) {
463             *--ip = st->ctx;
464         }
465         *--ip = (void*)gEngine.stages[st->stage];
466     }
467     return gEngine;
468 }
469 
run(size_t x,size_t y,size_t w,size_t h) const470 void SkRasterPipeline::run(size_t x, size_t y, size_t w, size_t h) const {
471     if (this->empty()) {
472         return;
473     }
474 
475     // Best to not use fAlloc here... we can't bound how often run() will be called.
476     SkAutoSTMalloc<64, void*> program(fSlotsNeeded);
477 
478     const SkJumper_Engine& engine = this->build_pipeline(program.get() + fSlotsNeeded);
479     engine.start_pipeline(x,y,x+w,y+h, program.get());
480 }
481 
compile() const482 std::function<void(size_t, size_t, size_t, size_t)> SkRasterPipeline::compile() const {
483     if (this->empty()) {
484         return [](size_t, size_t, size_t, size_t) {};
485     }
486 
487     void** program = fAlloc->makeArray<void*>(fSlotsNeeded);
488     const SkJumper_Engine& engine = this->build_pipeline(program + fSlotsNeeded);
489 
490     auto start_pipeline = engine.start_pipeline;
491     return [=](size_t x, size_t y, size_t w, size_t h) {
492         start_pipeline(x,y,x+w,y+h, program);
493     };
494 }
495