1 /*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkColorPriv.h"
9 #include "SkCpu.h"
10 #include "SkJumper.h"
11 #include "SkOnce.h"
12 #include "SkRasterPipeline.h"
13 #include "SkTemplates.h"
14
15 // We'll use __has_feature(memory_sanitizer) to detect MSAN.
16 // SkJumper_generated.S is not compiled with MSAN, so MSAN would yell really loud.
17 #if !defined(__has_feature)
18 #define __has_feature(x) 0
19 #endif
20
21 // Stages expect these constants to be set to these values.
22 // It's fine to rearrange and add new ones if you update SkJumper_constants.
23 using K = const SkJumper_constants;
24 static K kConstants = {
25 {0,1,2,3,4,5,6,7},
26 {0,1,2,3,4,5,6,7},
27 };
28
29 #define M(st) +1
30 static const int kNumStages = SK_RASTER_PIPELINE_STAGES(M);
31 #undef M
32
33 #ifndef SK_DISABLE_SSSE3_RUNTIME_CHECK_FOR_LOWP_STAGES
34 #if 0 && !__has_feature(memory_sanitizer) && (defined(__x86_64__) || defined(_M_X64))
35 #include <atomic>
36
37 #define M(st) #st,
38 static const char* kStageNames[] = { SK_RASTER_PIPELINE_STAGES(M) };
39 #undef M
40
41 static std::atomic<int> gMissingStageCounters[kNumStages];
42
43 static void log_missing(SkRasterPipeline::StockStage st) {
44 static SkOnce once;
45 once([] { atexit([] {
46 for (int i = 0; i < kNumStages; i++) {
47 if (int count = gMissingStageCounters[i].load()) {
48 SkDebugf("%7d\t%s\n", count, kStageNames[i]);
49 }
50 }
51 }); });
52
53 gMissingStageCounters[st]++;
54 }
55 #else
log_missing(SkRasterPipeline::StockStage)56 static void log_missing(SkRasterPipeline::StockStage) {}
57 #endif
58 #endif
59
60 // We can't express the real types of most stage functions portably, so we use a stand-in.
61 // We'll only ever call start_pipeline() or start_pipeline_2d(), which then chain into the rest.
62 using StageFn = void(void);
63 using StartPipelineFn = void(size_t,size_t,size_t, void**,K*);
64 using StartPipeline2dFn = void(size_t,size_t,size_t,size_t, void**,K*);
65
66 // Some platforms expect C "name" maps to asm "_name", others to "name".
67 #if defined(__APPLE__)
68 #define ASM(name, suffix) sk_##name##_##suffix
69 #else
70 #define ASM(name, suffix) _sk_##name##_##suffix
71 #endif
72
73 // Some stages have low-precision (~15 bit) versions from SkJumper_stages_lowp.cpp.
74 #define LOWP_STAGES(M) \
75 M(black_color) M(white_color) M(uniform_color) \
76 M(set_rgb) \
77 M(premul) \
78 M(load_8888) M(load_8888_dst) M(store_8888) \
79 M(load_bgra) M(load_bgra_dst) M(store_bgra) \
80 M(load_a8) M(load_a8_dst) M(store_a8) \
81 M(load_g8) M(load_g8_dst) \
82 M(swap_rb) \
83 M(srcover_rgba_8888) \
84 M(lerp_1_float) \
85 M(lerp_u8) \
86 M(scale_1_float) \
87 M(scale_u8) \
88 M(move_src_dst) \
89 M(move_dst_src) \
90 M(clear) \
91 M(srcatop) \
92 M(dstatop) \
93 M(srcin) \
94 M(dstin) \
95 M(srcout) \
96 M(dstout) \
97 M(srcover) \
98 M(dstover) \
99 M(modulate) \
100 M(multiply) \
101 M(screen) \
102 M(xor_)
103
104 extern "C" {
105
106 #if __has_feature(memory_sanitizer)
107 // We'll just run portable code.
108
109 #elif defined(__aarch64__)
110 StartPipelineFn ASM(start_pipeline ,aarch64);
111 StartPipeline2dFn ASM(start_pipeline_2d,aarch64);
112 StageFn ASM(just_return,aarch64);
113 #define M(st) StageFn ASM(st,aarch64);
114 SK_RASTER_PIPELINE_STAGES(M)
115 #undef M
116
117 #elif defined(__arm__)
118 StartPipelineFn ASM(start_pipeline ,vfp4);
119 StartPipeline2dFn ASM(start_pipeline_2d,vfp4);
120 StageFn ASM(just_return,vfp4);
121 #define M(st) StageFn ASM(st,vfp4);
122 SK_RASTER_PIPELINE_STAGES(M)
123 #undef M
124
125 #elif defined(__x86_64__) || defined(_M_X64)
126 StartPipelineFn ASM(start_pipeline,hsw ),
127 ASM(start_pipeline,avx ),
128 ASM(start_pipeline,sse41 ),
129 ASM(start_pipeline,sse2 ),
130 ASM(start_pipeline,hsw_lowp ),
131 ASM(start_pipeline,ssse3_lowp);
132
133 StartPipeline2dFn ASM(start_pipeline_2d,hsw ),
134 ASM(start_pipeline_2d,avx ),
135 ASM(start_pipeline_2d,sse41 ),
136 ASM(start_pipeline_2d,sse2 ),
137 ASM(start_pipeline_2d,hsw_lowp ),
138 ASM(start_pipeline_2d,ssse3_lowp);
139
140 StageFn ASM(just_return,hsw),
141 ASM(just_return,avx),
142 ASM(just_return,sse41),
143 ASM(just_return,sse2),
144 ASM(just_return,hsw_lowp ),
145 ASM(just_return,ssse3_lowp);
146
147 #define M(st) StageFn ASM(st,hsw);
148 SK_RASTER_PIPELINE_STAGES(M)
149 #undef M
150 #define M(st) StageFn ASM(st,avx);
151 SK_RASTER_PIPELINE_STAGES(M)
152 #undef M
153 #define M(st) StageFn ASM(st,sse41);
154 SK_RASTER_PIPELINE_STAGES(M)
155 #undef M
156 #define M(st) StageFn ASM(st,sse2);
157 SK_RASTER_PIPELINE_STAGES(M)
158 #undef M
159
160 #define M(st) StageFn ASM(st,hsw_lowp);
161 LOWP_STAGES(M)
162 #undef M
163 #define M(st) StageFn ASM(st,ssse3_lowp);
164 LOWP_STAGES(M)
165 #undef M
166
167 #elif (defined(__i386__) || defined(_M_IX86)) && \
168 !(defined(_MSC_VER) && defined(SK_SUPPORT_LEGACY_WIN32_JUMPER))
169 StartPipelineFn ASM(start_pipeline ,sse2);
170 StartPipeline2dFn ASM(start_pipeline_2d,sse2);
171 StageFn ASM(just_return,sse2);
172 #define M(st) StageFn ASM(st,sse2);
173 SK_RASTER_PIPELINE_STAGES(M)
174 #undef M
175
176 #endif
177
178 // Portable, single-pixel stages.
179 StartPipelineFn sk_start_pipeline;
180 StartPipeline2dFn sk_start_pipeline_2d;
181 StageFn sk_just_return;
182 #define M(st) StageFn sk_##st;
183 SK_RASTER_PIPELINE_STAGES(M)
184 #undef M
185 }
186
187 #if !__has_feature(memory_sanitizer) && (defined(__x86_64__) || defined(_M_X64))
188 template <SkRasterPipeline::StockStage st>
hsw_lowp()189 static constexpr StageFn* hsw_lowp() { return nullptr; }
190
191 template <SkRasterPipeline::StockStage st>
ssse3_lowp()192 static constexpr StageFn* ssse3_lowp() { return nullptr; }
193
194 #define M(st) \
195 template <> constexpr StageFn* hsw_lowp<SkRasterPipeline::st>() { \
196 return ASM(st,hsw_lowp); \
197 } \
198 template <> constexpr StageFn* ssse3_lowp<SkRasterPipeline::st>() { \
199 return ASM(st,ssse3_lowp); \
200 }
201 LOWP_STAGES(M)
202 #undef M
203 #endif
204
205 // Engines comprise everything we need to run SkRasterPipelines.
206 struct SkJumper_Engine {
207 StageFn* stages[kNumStages];
208 StartPipelineFn* start_pipeline;
209 StartPipeline2dFn* start_pipeline_2d;
210 StageFn* just_return;
211 };
212
213 // We'll default to this portable engine, but try to choose a better one at runtime.
214 static const SkJumper_Engine kPortable = {
215 #define M(stage) sk_##stage,
216 { SK_RASTER_PIPELINE_STAGES(M) },
217 #undef M
218 sk_start_pipeline,
219 sk_start_pipeline_2d,
220 sk_just_return,
221 };
222 static SkJumper_Engine gEngine = kPortable;
223 static SkOnce gChooseEngineOnce;
224
choose_engine()225 static SkJumper_Engine choose_engine() {
226 #if __has_feature(memory_sanitizer)
227 // We'll just run portable code.
228
229 #elif defined(__aarch64__)
230 return {
231 #define M(stage) ASM(stage, aarch64),
232 { SK_RASTER_PIPELINE_STAGES(M) },
233 M(start_pipeline)
234 M(start_pipeline_2d)
235 M(just_return)
236 #undef M
237 };
238
239 #elif defined(__arm__)
240 if (1 && SkCpu::Supports(SkCpu::NEON|SkCpu::NEON_FMA|SkCpu::VFP_FP16)) {
241 return {
242 #define M(stage) ASM(stage, vfp4),
243 { SK_RASTER_PIPELINE_STAGES(M) },
244 M(start_pipeline)
245 M(start_pipeline_2d)
246 M(just_return)
247 #undef M
248 };
249 }
250
251 #elif defined(__x86_64__) || defined(_M_X64)
252 if (1 && SkCpu::Supports(SkCpu::HSW)) {
253 return {
254 #define M(stage) ASM(stage, hsw),
255 { SK_RASTER_PIPELINE_STAGES(M) },
256 M(start_pipeline)
257 M(start_pipeline_2d)
258 M(just_return)
259 #undef M
260 };
261 }
262 if (1 && SkCpu::Supports(SkCpu::AVX)) {
263 return {
264 #define M(stage) ASM(stage, avx),
265 { SK_RASTER_PIPELINE_STAGES(M) },
266 M(start_pipeline)
267 M(start_pipeline_2d)
268 M(just_return)
269 #undef M
270 };
271 }
272 if (1 && SkCpu::Supports(SkCpu::SSE41)) {
273 return {
274 #define M(stage) ASM(stage, sse41),
275 { SK_RASTER_PIPELINE_STAGES(M) },
276 M(start_pipeline)
277 M(start_pipeline_2d)
278 M(just_return)
279 #undef M
280 };
281 }
282 if (1 && SkCpu::Supports(SkCpu::SSE2)) {
283 return {
284 #define M(stage) ASM(stage, sse2),
285 { SK_RASTER_PIPELINE_STAGES(M) },
286 M(start_pipeline)
287 M(start_pipeline_2d)
288 M(just_return)
289 #undef M
290 };
291 }
292
293 #elif (defined(__i386__) || defined(_M_IX86)) && \
294 !(defined(_MSC_VER) && defined(SK_SUPPORT_LEGACY_WIN32_JUMPER))
295 if (1 && SkCpu::Supports(SkCpu::SSE2)) {
296 return {
297 #define M(stage) ASM(stage, sse2),
298 { SK_RASTER_PIPELINE_STAGES(M) },
299 M(start_pipeline)
300 M(start_pipeline_2d)
301 M(just_return)
302 #undef M
303 };
304 }
305
306 #endif
307 return kPortable;
308 }
309
310 #ifndef SK_DISABLE_SSSE3_RUNTIME_CHECK_FOR_LOWP_STAGES
311 static const SkJumper_Engine kNone = {
312 #define M(stage) nullptr,
313 { SK_RASTER_PIPELINE_STAGES(M) },
314 #undef M
315 nullptr,
316 nullptr,
317 nullptr,
318 };
319 static SkJumper_Engine gLowp = kNone;
320 static SkOnce gChooseLowpOnce;
321
choose_lowp()322 static SkJumper_Engine choose_lowp() {
323 #if !__has_feature(memory_sanitizer) && (defined(__x86_64__) || defined(_M_X64))
324 if (1 && SkCpu::Supports(SkCpu::HSW)) {
325 return {
326 #define M(st) hsw_lowp<SkRasterPipeline::st>(),
327 { SK_RASTER_PIPELINE_STAGES(M) },
328 ASM(start_pipeline ,hsw_lowp),
329 ASM(start_pipeline_2d,hsw_lowp),
330 ASM(just_return ,hsw_lowp)
331 #undef M
332 };
333 }
334 if (1 && SkCpu::Supports(SkCpu::SSSE3)) {
335 return {
336 #define M(st) ssse3_lowp<SkRasterPipeline::st>(),
337 { SK_RASTER_PIPELINE_STAGES(M) },
338 ASM(start_pipeline ,ssse3_lowp),
339 ASM(start_pipeline_2d,ssse3_lowp),
340 ASM(just_return ,ssse3_lowp)
341 #undef M
342 };
343 }
344 #endif
345 return kNone;
346 }
347 #endif
348
build_pipeline(void ** ip) const349 const SkJumper_Engine& SkRasterPipeline::build_pipeline(void** ip) const {
350 #ifndef SK_DISABLE_SSSE3_RUNTIME_CHECK_FOR_LOWP_STAGES
351 gChooseLowpOnce([]{ gLowp = choose_lowp(); });
352
353 // First try to build a lowp pipeline. If that fails, fall back to normal float gEngine.
354 void** reset_point = ip;
355 *--ip = (void*)gLowp.just_return;
356 for (const StageList* st = fStages; st; st = st->prev) {
357 if (st->stage == SkRasterPipeline::clamp_0) {
358 continue; // No-op in lowp.
359 }
360 if (StageFn* fn = gLowp.stages[st->stage]) {
361 if (st->ctx) {
362 *--ip = st->ctx;
363 }
364 *--ip = (void*)fn;
365 } else {
366 log_missing(st->stage);
367 ip = reset_point;
368 break;
369 }
370 }
371 if (ip != reset_point) {
372 return gLowp;
373 }
374 #endif
375
376 gChooseEngineOnce([]{ gEngine = choose_engine(); });
377 // We're building the pipeline backwards, so we start with the final stage just_return.
378 *--ip = (void*)gEngine.just_return;
379
380 // Still going backwards, each stage's context pointer then its StageFn.
381 for (const StageList* st = fStages; st; st = st->prev) {
382 if (st->ctx) {
383 *--ip = st->ctx;
384 }
385 *--ip = (void*)gEngine.stages[st->stage];
386 }
387 return gEngine;
388 }
389
run(size_t x,size_t y,size_t n) const390 void SkRasterPipeline::run(size_t x, size_t y, size_t n) const {
391 if (this->empty()) {
392 return;
393 }
394
395 // Best to not use fAlloc here... we can't bound how often run() will be called.
396 SkAutoSTMalloc<64, void*> program(fSlotsNeeded);
397
398 const SkJumper_Engine& engine = this->build_pipeline(program.get() + fSlotsNeeded);
399 engine.start_pipeline(x,y,x+n, program.get(), &kConstants);
400 }
401
compile() const402 std::function<void(size_t, size_t, size_t)> SkRasterPipeline::compile() const {
403 if (this->empty()) {
404 return [](size_t, size_t, size_t) {};
405 }
406
407 void** program = fAlloc->makeArray<void*>(fSlotsNeeded);
408 const SkJumper_Engine& engine = this->build_pipeline(program + fSlotsNeeded);
409
410 auto start_pipeline = engine.start_pipeline;
411 return [=](size_t x, size_t y, size_t n) {
412 start_pipeline(x,y,x+n, program, &kConstants);
413 };
414 }
415
run_2d(size_t x,size_t y,size_t w,size_t h) const416 void SkRasterPipeline::run_2d(size_t x, size_t y, size_t w, size_t h) const {
417 if (this->empty()) {
418 return;
419 }
420
421 // Like in run(), it's best to not use fAlloc here... we can't bound how often we'll be called.
422 SkAutoSTMalloc<64, void*> program(fSlotsNeeded);
423
424 const SkJumper_Engine& engine = this->build_pipeline(program.get() + fSlotsNeeded);
425 engine.start_pipeline_2d(x,y,x+w,y+h, program.get(), &kConstants);
426 }
427