• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2019 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef SkVM_DEFINED
9 #define SkVM_DEFINED
10 
11 #include "include/core/SkBlendMode.h"
12 #include "include/core/SkColor.h"
13 #include "include/core/SkSpan.h"
14 #include "include/private/SkMacros.h"
15 #include "include/private/SkTArray.h"
16 #include "include/private/SkTHash.h"
17 #include "src/core/SkVM_fwd.h"
18 #include <vector>      // std::vector
19 
20 class SkWStream;
21 
22 #if defined(SKVM_JIT_WHEN_POSSIBLE) && !defined(SK_BUILD_FOR_IOS)
23     #if defined(__x86_64__) || defined(_M_X64)
24         #if defined(_WIN32) || defined(__linux) || defined(__APPLE__)
25             #define SKVM_JIT
26         #endif
27     #endif
28     #if defined(__aarch64__)
29         #if defined(__ANDROID__) || defined(__APPLE__)
30             #define SKVM_JIT
31         #endif
32     #endif
33 #endif
34 
35 #if 0
36     #define SKVM_LLVM
37 #endif
38 
39 #if 0
40     #undef SKVM_JIT
41 #endif
42 
43 namespace skvm {
44 
45     class Assembler {
46     public:
47         explicit Assembler(void* buf);
48 
49         size_t size() const;
50 
51         // Order matters... GP64, Xmm, Ymm values match 4-bit register encoding for each.
52         enum GP64 {
53             rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
54             r8 , r9 , r10, r11, r12, r13, r14, r15,
55         };
56         enum Xmm {
57             xmm0, xmm1, xmm2 , xmm3 , xmm4 , xmm5 , xmm6 , xmm7 ,
58             xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
59         };
60         enum Ymm {
61             ymm0, ymm1, ymm2 , ymm3 , ymm4 , ymm5 , ymm6 , ymm7 ,
62             ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15,
63         };
64 
65         // X and V values match 5-bit encoding for each (nothing tricky).
66         enum X {
67             x0 , x1 , x2 , x3 , x4 , x5 , x6 , x7 ,
68             x8 , x9 , x10, x11, x12, x13, x14, x15,
69             x16, x17, x18, x19, x20, x21, x22, x23,
70             x24, x25, x26, x27, x28, x29, x30, xzr, sp=xzr,
71         };
72         enum V {
73             v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 ,
74             v8 , v9 , v10, v11, v12, v13, v14, v15,
75             v16, v17, v18, v19, v20, v21, v22, v23,
76             v24, v25, v26, v27, v28, v29, v30, v31,
77         };
78 
79         void bytes(const void*, int);
80         void byte(uint8_t);
81         void word(uint32_t);
82 
83         struct Label {
84             int                                      offset = 0;
85             enum { NotYetSet, ARMDisp19, X86Disp32 } kind = NotYetSet;
86             SkSTArray<2, int>                        references;
87         };
88 
89         // x86-64
90 
91         void align(int mod);
92 
93         void int3();
94         void vzeroupper();
95         void ret();
96 
97         // Mem represents a value at base + disp + scale*index,
98         // or simply at base + disp if index=rsp.
99         enum Scale { ONE, TWO, FOUR, EIGHT };
100         struct Mem {
101             GP64  base;
102             int   disp  = 0;
103             GP64  index = rsp;
104             Scale scale = ONE;
105         };
106 
107         struct Operand {
108             union {
109                 int    reg;
110                 Mem    mem;
111                 Label* label;
112             };
113             enum { REG, MEM, LABEL } kind;
114 
OperandOperand115             Operand(GP64   r) : reg  (r), kind(REG  ) {}
OperandOperand116             Operand(Xmm    r) : reg  (r), kind(REG  ) {}
OperandOperand117             Operand(Ymm    r) : reg  (r), kind(REG  ) {}
OperandOperand118             Operand(Mem    m) : mem  (m), kind(MEM  ) {}
OperandOperand119             Operand(Label* l) : label(l), kind(LABEL) {}
120         };
121 
122         void vpand (Ymm dst, Ymm x, Operand y);
123         void vpandn(Ymm dst, Ymm x, Operand y);
124         void vpor  (Ymm dst, Ymm x, Operand y);
125         void vpxor (Ymm dst, Ymm x, Operand y);
126 
127         void vpaddd (Ymm dst, Ymm x, Operand y);
128         void vpsubd (Ymm dst, Ymm x, Operand y);
129         void vpmulld(Ymm dst, Ymm x, Operand y);
130 
131         void vpaddw   (Ymm dst, Ymm x, Operand y);
132         void vpsubw   (Ymm dst, Ymm x, Operand y);
133         void vpmullw  (Ymm dst, Ymm x, Operand y);
134 
135         void vpabsw   (Ymm dst, Operand x);
136         void vpavgw   (Ymm dst, Ymm x, Operand y);  // dst = (x+y+1)>>1, unsigned.
137         void vpmulhrsw(Ymm dst, Ymm x, Operand y);  // dst = (x*y + (1<<14)) >> 15, signed.
138         void vpminsw  (Ymm dst, Ymm x, Operand y);
139         void vpminuw  (Ymm dst, Ymm x, Operand y);
140         void vpmaxsw  (Ymm dst, Ymm x, Operand y);
141         void vpmaxuw  (Ymm dst, Ymm x, Operand y);
142 
143         void vaddps(Ymm dst, Ymm x, Operand y);
144         void vsubps(Ymm dst, Ymm x, Operand y);
145         void vmulps(Ymm dst, Ymm x, Operand y);
146         void vdivps(Ymm dst, Ymm x, Operand y);
147         void vminps(Ymm dst, Ymm x, Operand y);
148         void vmaxps(Ymm dst, Ymm x, Operand y);
149 
150         void vsqrtps(Ymm dst, Operand x);
151 
152         void vfmadd132ps(Ymm dst, Ymm x, Operand y);
153         void vfmadd213ps(Ymm dst, Ymm x, Operand y);
154         void vfmadd231ps(Ymm dst, Ymm x, Operand y);
155 
156         void vfmsub132ps(Ymm dst, Ymm x, Operand y);
157         void vfmsub213ps(Ymm dst, Ymm x, Operand y);
158         void vfmsub231ps(Ymm dst, Ymm x, Operand y);
159 
160         void vfnmadd132ps(Ymm dst, Ymm x, Operand y);
161         void vfnmadd213ps(Ymm dst, Ymm x, Operand y);
162         void vfnmadd231ps(Ymm dst, Ymm x, Operand y);
163 
164         void vpackusdw(Ymm dst, Ymm x, Operand y);
165         void vpackuswb(Ymm dst, Ymm x, Operand y);
166 
167         void vpunpckldq(Ymm dst, Ymm x, Operand y);
168         void vpunpckhdq(Ymm dst, Ymm x, Operand y);
169 
170         void vpcmpeqd(Ymm dst, Ymm x, Operand y);
171         void vpcmpgtd(Ymm dst, Ymm x, Operand y);
172         void vpcmpeqw(Ymm dst, Ymm x, Operand y);
173         void vpcmpgtw(Ymm dst, Ymm x, Operand y);
174 
175         void vcmpps   (Ymm dst, Ymm x, Operand y, int imm);
vcmpeqps(Ymm dst,Ymm x,Operand y)176         void vcmpeqps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,0); }
vcmpltps(Ymm dst,Ymm x,Operand y)177         void vcmpltps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,1); }
vcmpleps(Ymm dst,Ymm x,Operand y)178         void vcmpleps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,2); }
vcmpneqps(Ymm dst,Ymm x,Operand y)179         void vcmpneqps(Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,4); }
180 
181         // Sadly, the x parameter cannot be a general Operand for these shifts.
182         void vpslld(Ymm dst, Ymm x, int imm);
183         void vpsrld(Ymm dst, Ymm x, int imm);
184         void vpsrad(Ymm dst, Ymm x, int imm);
185 
186         void vpsllw(Ymm dst, Ymm x, int imm);
187         void vpsrlw(Ymm dst, Ymm x, int imm);
188         void vpsraw(Ymm dst, Ymm x, int imm);
189 
190         void vpermq    (Ymm dst, Operand x, int imm);
191         void vperm2f128(Ymm dst, Ymm x, Operand y, int imm);
192         void vpermps   (Ymm dst, Ymm ix, Operand src);        // dst[i] = src[ix[i]]
193 
194         enum Rounding { NEAREST, FLOOR, CEIL, TRUNC, CURRENT };
195         void vroundps(Ymm dst, Operand x, Rounding);
196 
197         void vmovdqa(Ymm dst, Operand x);
198         void vmovups(Ymm dst, Operand x);
199         void vmovups(Xmm dst, Operand x);
200         void vmovups(Operand dst, Ymm x);
201         void vmovups(Operand dst, Xmm x);
202 
203         void vcvtdq2ps (Ymm dst, Operand x);
204         void vcvttps2dq(Ymm dst, Operand x);
205         void vcvtps2dq (Ymm dst, Operand x);
206 
207         void vcvtps2ph(Operand dst, Ymm x, Rounding);
208         void vcvtph2ps(Ymm dst, Operand x);
209 
210         void vpblendvb(Ymm dst, Ymm x, Operand y, Ymm z);
211 
212         void vpshufb(Ymm dst, Ymm x, Operand y);
213 
214         void vptest(Ymm x, Operand y);
215 
216         void vbroadcastss(Ymm dst, Operand y);
217 
218         void vpmovzxwd(Ymm dst, Operand src);   // dst = src, 128-bit, uint16_t -> int
219         void vpmovzxbd(Ymm dst, Operand src);   // dst = src,  64-bit, uint8_t  -> int
220 
221         void vmovq(Operand dst, Xmm src);  // dst = src,  64-bit
222         void vmovd(Operand dst, Xmm src);  // dst = src,  32-bit
223         void vmovd(Xmm dst, Operand src);  // dst = src,  32-bit
224 
225         void vpinsrd(Xmm dst, Xmm src, Operand y, int imm);  // dst = src; dst[imm] = y, 32-bit
226         void vpinsrw(Xmm dst, Xmm src, Operand y, int imm);  // dst = src; dst[imm] = y, 16-bit
227         void vpinsrb(Xmm dst, Xmm src, Operand y, int imm);  // dst = src; dst[imm] = y,  8-bit
228 
229         void vextracti128(Operand dst, Ymm src, int imm);    // dst = src[imm], 128-bit
230         void vpextrd     (Operand dst, Xmm src, int imm);    // dst = src[imm],  32-bit
231         void vpextrw     (Operand dst, Xmm src, int imm);    // dst = src[imm],  16-bit
232         void vpextrb     (Operand dst, Xmm src, int imm);    // dst = src[imm],   8-bit
233 
234         // if (mask & 0x8000'0000) {
235         //     dst = base[scale*ix];
236         // }
237         // mask = 0;
238         void vgatherdps(Ymm dst, Scale scale, Ymm ix, GP64 base, Ymm mask);
239 
240 
241         void label(Label*);
242 
243         void jmp(Label*);
244         void je (Label*);
245         void jne(Label*);
246         void jl (Label*);
247         void jc (Label*);
248 
249         void add (Operand dst, int imm);
250         void sub (Operand dst, int imm);
251         void cmp (Operand dst, int imm);
252         void mov (Operand dst, int imm);
253         void movb(Operand dst, int imm);
254 
255         void add (Operand dst, GP64 x);
256         void sub (Operand dst, GP64 x);
257         void cmp (Operand dst, GP64 x);
258         void mov (Operand dst, GP64 x);
259         void movb(Operand dst, GP64 x);
260 
261         void add (GP64 dst, Operand x);
262         void sub (GP64 dst, Operand x);
263         void cmp (GP64 dst, Operand x);
264         void mov (GP64 dst, Operand x);
265         void movb(GP64 dst, Operand x);
266 
267         // Disambiguators... choice is arbitrary (but generates different code!).
add(GP64 dst,GP64 x)268         void add (GP64 dst, GP64 x) { this->add (Operand(dst), x); }
sub(GP64 dst,GP64 x)269         void sub (GP64 dst, GP64 x) { this->sub (Operand(dst), x); }
cmp(GP64 dst,GP64 x)270         void cmp (GP64 dst, GP64 x) { this->cmp (Operand(dst), x); }
mov(GP64 dst,GP64 x)271         void mov (GP64 dst, GP64 x) { this->mov (Operand(dst), x); }
movb(GP64 dst,GP64 x)272         void movb(GP64 dst, GP64 x) { this->movb(Operand(dst), x); }
273 
274         void movzbq(GP64 dst, Operand x);  // dst = x, uint8_t  -> int
275         void movzwq(GP64 dst, Operand x);  // dst = x, uint16_t -> int
276 
277         // aarch64
278 
279         // d = op(n,m)
280         using DOpNM = void(V d, V n, V m);
281         DOpNM  and16b, orr16b, eor16b, bic16b, bsl16b,
282                add4s,  sub4s,  mul4s,
283               cmeq4s, cmgt4s,
284                        sub8h,  mul8h,
285               fadd4s, fsub4s, fmul4s, fdiv4s, fmin4s, fmax4s,
286               fcmeq4s, fcmgt4s, fcmge4s,
287               tbl,
288               uzp14s, uzp24s,
289               zip14s, zip24s;
290 
291         // TODO: there are also float ==,<,<=,>,>= instructions with an immediate 0.0f,
292         // and the register comparison > and >= can also compare absolute values.  Interesting.
293 
294         // d += n*m
295         void fmla4s(V d, V n, V m);
296 
297         // d -= n*m
298         void fmls4s(V d, V n, V m);
299 
300         // d = op(n,imm)
301         using DOpNImm = void(V d, V n, int imm);
302         DOpNImm sli4s,
303                 shl4s, sshr4s, ushr4s,
304                                ushr8h;
305 
306         // d = op(n)
307         using DOpN = void(V d, V n);
308         DOpN not16b,    // d = ~n
309              fneg4s,    // d = -n
310              fsqrt4s,   // d = sqrtf(n)
311              scvtf4s,   // int -> float
312              fcvtzs4s,  // truncate float -> int
313              fcvtns4s,  // round float -> int  (nearest even)
314              frintp4s,  // round float -> int as float, toward plus infinity  (ceil)
315              frintm4s,  // round float -> int as float, toward minus infinity (floor)
316              fcvtn,     // f32 -> f16 in low half
317              fcvtl,     // f16 in low half -> f32
318              xtns2h,    // u32 -> u16
319              xtnh2b,    // u16 -> u8
320              uxtlb2h,   // u8 -> u16    (TODO: this is a special case of ushll.8h)
321              uxtlh2s,   // u16 -> u32   (TODO: this is a special case of ushll.4s)
322              uminv4s;   // dst[0] = min(n[0],n[1],n[2],n[3]), n as unsigned
323 
324         void brk (int imm16);
325         void ret (X);
326         void add (X d, X n, int imm12);
327         void sub (X d, X n, int imm12);
328         void subs(X d, X n, int imm12);  // subtract setting condition flags
329 
330         enum Shift { LSL,LSR,ASR,ROR };
331         void add (X d, X n, X m, Shift=LSL, int imm6=0);  // d=n+Shift(m,imm6), for Shift != ROR.
332 
333         // There's another encoding for unconditional branches that can jump further,
334         // but this one encoded as b.al is simple to implement and should be fine.
b(Label * l)335         void b  (Label* l) { this->b(Condition::al, l); }
bne(Label * l)336         void bne(Label* l) { this->b(Condition::ne, l); }
blt(Label * l)337         void blt(Label* l) { this->b(Condition::lt, l); }
338 
339         // "cmp ..." is just an assembler mnemonic for "subs xzr, ..."!
cmp(X n,int imm12)340         void cmp(X n, int imm12) { this->subs(xzr, n, imm12); }
341 
342         // Compare and branch if zero/non-zero, as if
343         //      cmp(t,0)
344         //      beq/bne(l)
345         // but without setting condition flags.
346         void cbz (X t, Label* l);
347         void cbnz(X t, Label* l);
348 
349         // TODO: there are ldur variants with unscaled imm, useful?
350         void ldrd(X dst, X src, int imm12=0);  // 64-bit dst = *(src+imm12*8)
351         void ldrs(X dst, X src, int imm12=0);  // 32-bit dst = *(src+imm12*4)
352         void ldrh(X dst, X src, int imm12=0);  // 16-bit dst = *(src+imm12*2)
353         void ldrb(X dst, X src, int imm12=0);  //  8-bit dst = *(src+imm12)
354 
355         void ldrq(V dst, Label*);  // 128-bit PC-relative load
356 
357         void ldrq(V dst, X src, int imm12=0);  // 128-bit dst = *(src+imm12*16)
358         void ldrd(V dst, X src, int imm12=0);  //  64-bit dst = *(src+imm12*8)
359         void ldrs(V dst, X src, int imm12=0);  //  32-bit dst = *(src+imm12*4)
360         void ldrh(V dst, X src, int imm12=0);  //  16-bit dst = *(src+imm12*2)
361         void ldrb(V dst, X src, int imm12=0);  //   8-bit dst = *(src+imm12)
362 
363         void strs(X src, X dst, int imm12=0);  // 32-bit *(dst+imm12*4) = src
364 
365         void strq(V src, X dst, int imm12=0);  // 128-bit *(dst+imm12*16) = src
366         void strd(V src, X dst, int imm12=0);  //  64-bit *(dst+imm12*8)  = src
367         void strs(V src, X dst, int imm12=0);  //  32-bit *(dst+imm12*4)  = src
368         void strh(V src, X dst, int imm12=0);  //  16-bit *(dst+imm12*2)  = src
369         void strb(V src, X dst, int imm12=0);  //   8-bit *(dst+imm12)    = src
370 
371         void movs(X dst, V src, int lane);  // dst = 32-bit src[lane]
372         void inss(V dst, X src, int lane);  // dst[lane] = 32-bit src
373 
374         void dup4s  (V dst, X src);  // Each 32-bit lane = src
375 
376         void ld1r4s (V dst, X src);  // Each 32-bit lane = *src
377         void ld1r8h (V dst, X src);  // Each 16-bit lane = *src
378         void ld1r16b(V dst, X src);  // Each  8-bit lane = *src
379 
380         void ld24s(V dst, X src);  // deinterleave(dst,dst+1)             = 256-bit *src
381         void ld44s(V dst, X src);  // deinterleave(dst,dst+1,dst+2,dst+3) = 512-bit *src
382         void st24s(V src, X dst);  // 256-bit *dst = interleave_32bit_lanes(src,src+1)
383         void st44s(V src, X dst);  // 512-bit *dst = interleave_32bit_lanes(src,src+1,src+2,src+3)
384 
385         void ld24s(V dst, X src, int lane);  // Load 2 32-bit values into given lane of dst..dst+1
386         void ld44s(V dst, X src, int lane);  // Load 4 32-bit values into given lane of dst..dst+3
387 
388     private:
389         uint8_t* fCode;
390         size_t   fSize;
391 
392         // x86-64
393         enum W { W0, W1 };      // Are the lanes 64-bit (W1) or default (W0)?  Intel Vol 2A 2.3.5.5
394         enum L { L128, L256 };  // Is this a 128- or 256-bit operation?        Intel Vol 2A 2.3.6.2
395 
396         // Helpers for vector instructions.
397         void op(int prefix, int map, int opcode, int dst, int x, Operand y, W,L);
398         void op(int p, int m, int o, Ymm d, Ymm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L256); }
399         void op(int p, int m, int o, Ymm d,        Operand y, W w=W0) { op(p,m,o, d,0,y,w,L256); }
400         void op(int p, int m, int o, Xmm d, Xmm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L128); }
401         void op(int p, int m, int o, Xmm d,        Operand y, W w=W0) { op(p,m,o, d,0,y,w,L128); }
402 
403         // Helpers for GP64 instructions.
404         void op(int opcode, Operand dst, GP64 x);
405         void op(int opcode, int opcode_ext, Operand dst, int imm);
406 
407         void jump(uint8_t condition, Label*);
408         int disp32(Label*);
409         void imm_byte_after_operand(const Operand&, int byte);
410 
411         // aarch64
412 
413         // Opcode for 3-arguments ops is split between hi and lo:
414         //    [11 bits hi] [5 bits m] [6 bits lo] [5 bits n] [5 bits d]
415         void op(uint32_t hi, V m, uint32_t lo, V n, V d);
416 
417         // 0,1,2-argument ops, with or without an immediate:
418         //    [ 22 bits op ] [5 bits n] [5 bits d]
419         // Any immediate falls in the middle somewhere overlapping with either op, n, or both.
420         void op(uint32_t op22, V n, V d, int imm=0);
421         void op(uint32_t op22, X n, V d, int imm=0) { this->op(op22,(V)n,   d,imm); }
422         void op(uint32_t op22, V n, X d, int imm=0) { this->op(op22,   n,(V)d,imm); }
423         void op(uint32_t op22, X n, X d, int imm=0) { this->op(op22,(V)n,(V)d,imm); }
424         void op(uint32_t op22,           int imm=0) { this->op(op22,(V)0,(V)0,imm); }
425         // (1-argument ops don't seem to have a consistent convention of passing as n or d.)
426 
427 
428         // Order matters... value is 4-bit encoding for condition code.
429         enum class Condition { eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,al };
430         void b(Condition, Label*);
431         int disp19(Label*);
432     };
433 
434     // Order matters a little: Ops <=store128 are treated as having side effects.
435     #define SKVM_OPS(M)                                              \
436         M(assert_true)                                               \
437         M(trace_line) M(trace_var) M(trace_call)                     \
438         M(store8)   M(store16)   M(store32) M(store64) M(store128)   \
439         M(load8)    M(load16)    M(load32)  M(load64) M(load128)     \
440         M(index)                                                     \
441         M(gather8)  M(gather16)  M(gather32)                         \
442                                  M(uniform32)                        \
443                                  M(array32)                          \
444         M(splat)                                                     \
445         M(add_f32) M(add_i32)                                        \
446         M(sub_f32) M(sub_i32)                                        \
447         M(mul_f32) M(mul_i32)                                        \
448         M(div_f32)                                                   \
449         M(min_f32) M(max_f32)                                        \
450         M(fma_f32) M(fms_f32) M(fnma_f32)                            \
451         M(sqrt_f32)                                                  \
452         M(shl_i32) M(shr_i32) M(sra_i32)                             \
453         M(ceil) M(floor) M(trunc) M(round) M(to_fp16) M(from_fp16)   \
454         M(to_f32)                                                    \
455         M(neq_f32) M(eq_f32) M(eq_i32)                               \
456         M(gte_f32) M(gt_f32) M(gt_i32)                               \
457         M(bit_and)     M(bit_or)     M(bit_xor)     M(bit_clear)     \
458         M(select)
459     // End of SKVM_OPS
460 
461     enum class Op : int {
462     #define M(op) op,
463         SKVM_OPS(M)
464     #undef M
465     };
466 
has_side_effect(Op op)467     static inline bool has_side_effect(Op op) {
468         return op <= Op::store128;
469     }
touches_varying_memory(Op op)470     static inline bool touches_varying_memory(Op op) {
471         return Op::store8 <= op && op <= Op::load128;
472     }
is_always_varying(Op op)473     static inline bool is_always_varying(Op op) {
474         return Op::store8 <= op && op <= Op::index;
475     }
is_trace(Op op)476     static inline bool is_trace(Op op) {
477         return Op::trace_line <= op && op <= Op::trace_call;
478     }
479 
480     using Val = int;
481     // We reserve an impossibe Val ID as a sentinel
482     // NA meaning none, n/a, null, nil, etc.
483     static const Val NA = -1;
484 
485     // Ptr and UPtr are an index into the registers args[]. The two styles of using args are
486     // varyings and uniforms. Varyings use Ptr, have a stride associated with them, and are
487     // evaluated everytime through the loop. Uniforms use UPtr, don't have a stride, and are
488     // usually hoisted above the loop.
489     struct Ptr {
490         Ptr() = default;
PtrPtr491         Ptr(int ix_) : ix(ix_) {}
492         int ix;
493     };
494     struct UPtr : public Ptr {
495         UPtr() = default;
UPtrUPtr496         UPtr(int ix_) : Ptr(ix_) {}
497     };
498 
499     bool operator!=(Ptr a, Ptr b);
500 
501     struct I32 {
502         Builder* builder = nullptr;
503         Val      id      = NA;
504         explicit operator bool() const { return id != NA; }
505         Builder* operator->()    const { return builder; }
506     };
507 
508     struct F32 {
509         Builder* builder = nullptr;
510         Val      id      = NA;
511         explicit operator bool() const { return id != NA; }
512         Builder* operator->()    const { return builder; }
513     };
514 
515     struct Color {
516         F32 r,g,b,a;
517         explicit operator bool() const { return r && g && b && a; }
518         Builder* operator->()    const { return a.operator->(); }
519     };
520 
521     struct HSLA {
522         F32 h,s,l,a;
523         explicit operator bool() const { return h && s && l && a; }
524         Builder* operator->()    const { return a.operator->(); }
525     };
526 
527     struct Coord {
528         F32 x,y;
529         explicit operator bool() const { return x && y; }
530         Builder* operator->()    const { return x.operator->(); }
531     };
532 
533     struct Uniform {
534         UPtr ptr;
535         int offset;
536     };
537     struct Uniforms {
538         UPtr             base;
539         std::vector<int> buf;
540 
UniformsUniforms541         Uniforms(UPtr ptr, int init) : base(ptr), buf(init) {}
542 
pushUniforms543         Uniform push(int val) {
544             buf.push_back(val);
545             return {base, (int)( sizeof(int)*(buf.size() - 1) )};
546         }
547 
pushFUniforms548         Uniform pushF(float val) {
549             int bits;
550             memcpy(&bits, &val, sizeof(int));
551             return this->push(bits);
552         }
553 
pushPtrUniforms554         Uniform pushPtr(const void* ptr) {
555             // Jam the pointer into 1 or 2 ints.
556             int ints[sizeof(ptr) / sizeof(int)];
557             memcpy(ints, &ptr, sizeof(ptr));
558             for (int bits : ints) {
559                 buf.push_back(bits);
560             }
561             return {base, (int)( sizeof(int)*(buf.size() - SK_ARRAY_COUNT(ints)) )};
562         }
563 
pushArrayUniforms564         Uniform pushArray(int32_t a[]) {
565             return this->pushPtr(a);
566         }
567 
pushArrayFUniforms568         Uniform pushArrayF(float a[]) {
569             return this->pushPtr(a);
570         }
571     };
572 
573     struct PixelFormat {
574         enum { UNORM, SRGB, FLOAT} encoding;
575         int r_bits,  g_bits,  b_bits,  a_bits,
576             r_shift, g_shift, b_shift, a_shift;
577     };
578     PixelFormat SkColorType_to_PixelFormat(SkColorType);
579 
580     SK_BEGIN_REQUIRE_DENSE
581     struct Instruction {
582         Op  op;              // v* = op(x,y,z,w,immA,immB), where * == index of this Instruction.
583         Val x,y,z,w;         // Enough arguments for Op::store128.
584         int immA,immB,immC;  // Immediate bit pattern, shift count, pointer index, byte offset, etc.
585     };
586     SK_END_REQUIRE_DENSE
587 
588     bool operator==(const Instruction&, const Instruction&);
589     struct InstructionHash {
590         uint32_t operator()(const Instruction&, uint32_t seed=0) const;
591     };
592 
593     struct OptimizedInstruction {
594         Op op;
595         Val x,y,z,w;
596         int immA,immB,immC;
597 
598         Val  death;
599         bool can_hoist;
600     };
601 
602     struct Features {
603         bool fma   = false;
604         bool fp16  = false;
605     };
606 
607     class Builder {
608     public:
609 
610         Builder();
611         explicit Builder(Features);
612 
613         Program done(const char* debug_name = nullptr, bool allow_jit=true) const;
614 
615         // Mostly for debugging, tests, etc.
program()616         std::vector<Instruction> program() const { return fProgram; }
617         std::vector<OptimizedInstruction> optimize() const;
618 
619         // Convenience arg() wrappers for most common strides, sizeof(T) and 0.
620         template <typename T>
varying()621         Ptr varying() { return this->arg(sizeof(T)); }
varying(int stride)622         Ptr varying(int stride) { SkASSERT(stride > 0); return this->arg(stride); }
uniform()623         UPtr uniform() { Ptr p = this->arg(0); return UPtr{p.ix}; }
624 
625         // TODO: allow uniform (i.e. Ptr) offsets to store* and load*?
626         // TODO: sign extension (signed types) for <32-bit loads?
627         // TODO: unsigned integer operations where relevant (just comparisons?)?
628 
629         // Assert cond is true, printing debug when not.
630         void assert_true(I32 cond, I32 debug);
assert_true(I32 cond,F32 debug)631         void assert_true(I32 cond, F32 debug) { assert_true(cond, pun_to_I32(debug)); }
assert_true(I32 cond)632         void assert_true(I32 cond)            { assert_true(cond, cond); }
633 
634         // Insert debug traces into the instruction stream
635         void trace_line(I32 mask, int line);
636         void trace_var(I32 mask, int slot, I32 val);
637         void trace_var(I32 mask, int slot, F32 val);
638         void trace_var(I32 mask, int slot, bool b);
639         void trace_call_enter(I32 mask, int line);
640         void trace_call_exit(I32 mask, int line);
641 
642         // Store {8,16,32,64,128}-bit varying.
643         void store8  (Ptr ptr, I32 val);
644         void store16 (Ptr ptr, I32 val);
645         void store32 (Ptr ptr, I32 val);
storeF(Ptr ptr,F32 val)646         void storeF  (Ptr ptr, F32 val) { store32(ptr, pun_to_I32(val)); }
647         void store64 (Ptr ptr, I32 lo, I32 hi);              // *ptr = lo|(hi<<32)
648         void store128(Ptr ptr, I32 x, I32 y, I32 z, I32 w);  // *ptr = x|(y<<32)|(z<<64)|(w<<96)
649 
650         // Returns varying {n, n-1, n-2, ..., 1}, where n is the argument to Program::eval().
651         I32 index();
652 
653         // Load {8,16,32,64,128}-bit varying.
654         I32 load8  (Ptr ptr);
655         I32 load16 (Ptr ptr);
656         I32 load32 (Ptr ptr);
loadF(Ptr ptr)657         F32 loadF  (Ptr ptr) { return pun_to_F32(load32(ptr)); }
658         I32 load64 (Ptr ptr, int lane);  // Load 32-bit lane 0-1 of  64-bit value.
659         I32 load128(Ptr ptr, int lane);  // Load 32-bit lane 0-3 of 128-bit value.
660 
661         // Load i32/f32 uniform with byte-count offset.
662         I32 uniform32(UPtr ptr, int offset);
uniformF(UPtr ptr,int offset)663         F32 uniformF (UPtr ptr, int offset) { return pun_to_F32(uniform32(ptr,offset)); }
664 
665         // Load i32/f32 uniform with byte-count offset and an c-style array index. The address of
666         // the element is (*(ptr + byte-count offset))[index].
667         I32 array32  (UPtr ptr, int offset, int index);
arrayF(UPtr ptr,int offset,int index)668         F32 arrayF   (UPtr ptr, int offset, int index) {
669             return pun_to_F32(array32(ptr, offset, index));
670         }
671 
672         // Push and load this color as a uniform.
673         Color uniformColor(SkColor4f, Uniforms*);
674 
675         // Gather u8,u16,i32 with varying element-count index from *(ptr + byte-count offset).
676         I32 gather8 (UPtr ptr, int offset, I32 index);
677         I32 gather16(UPtr ptr, int offset, I32 index);
678         I32 gather32(UPtr ptr, int offset, I32 index);
gatherF(UPtr ptr,int offset,I32 index)679         F32 gatherF (UPtr ptr, int offset, I32 index) {
680             return pun_to_F32(gather32(ptr, offset, index));
681         }
682 
683         // Convenience methods for working with skvm::Uniform(s).
uniform32(Uniform u)684         I32 uniform32(Uniform u)            { return this->uniform32(u.ptr, u.offset); }
uniformF(Uniform u)685         F32 uniformF (Uniform u)            { return this->uniformF (u.ptr, u.offset); }
gather8(Uniform u,I32 index)686         I32 gather8  (Uniform u, I32 index) { return this->gather8  (u.ptr, u.offset, index); }
gather16(Uniform u,I32 index)687         I32 gather16 (Uniform u, I32 index) { return this->gather16 (u.ptr, u.offset, index); }
gather32(Uniform u,I32 index)688         I32 gather32 (Uniform u, I32 index) { return this->gather32 (u.ptr, u.offset, index); }
gatherF(Uniform u,I32 index)689         F32 gatherF  (Uniform u, I32 index) { return this->gatherF  (u.ptr, u.offset, index); }
690 
691         // Convenience methods for working with array pointers in skvm::Uniforms. Index is an
692         // array index and not a byte offset. The array pointer is stored at u.
array32(Uniform a,int index)693         I32 array32  (Uniform a, int index) { return this->array32  (a.ptr, a.offset, index); }
arrayF(Uniform a,int index)694         F32 arrayF   (Uniform a, int index) { return this->arrayF   (a.ptr, a.offset, index); }
695 
696         // Load an immediate constant.
697         I32 splat(int      n);
splat(unsigned u)698         I32 splat(unsigned u) { return splat((int)u); }
splat(float f)699         F32 splat(float    f) {
700             int bits;
701             memcpy(&bits, &f, 4);
702             return pun_to_F32(splat(bits));
703         }
704 
705         // Some operations make sense with immediate arguments,
706         // so we provide overloads inline to make that seamless.
707         //
708         // We omit overloads that may indicate a bug or performance issue.
709         // In general it does not make sense to pass immediates to unary operations,
710         // and even sometimes not for binary operations, e.g.
711         //
712         //   div(x, y)    -- normal every day divide
713         //   div(3.0f, y) -- yep, makes sense
714         //   div(x, 3.0f) -- omitted as a reminder you probably want mul(x, 1/3.0f).
715         //
716         // You can of course always splat() to override these opinions.
717 
718         // float math, comparisons, etc.
719         F32 add(F32, F32);
add(F32 x,float y)720         F32 add(F32 x, float y) { return add(x, splat(y)); }
add(float x,F32 y)721         F32 add(float x, F32 y) { return add(splat(x), y); }
722 
723         F32 sub(F32, F32);
sub(F32 x,float y)724         F32 sub(F32 x, float y) { return sub(x, splat(y)); }
sub(float x,F32 y)725         F32 sub(float x, F32 y) { return sub(splat(x), y); }
726 
727         F32 mul(F32, F32);
mul(F32 x,float y)728         F32 mul(F32 x, float y) { return mul(x, splat(y)); }
mul(float x,F32 y)729         F32 mul(float x, F32 y) { return mul(splat(x), y); }
730 
731         // mul(), but allowing optimizations not strictly legal under IEEE-754 rules.
732         F32 fast_mul(F32, F32);
fast_mul(F32 x,float y)733         F32 fast_mul(F32 x, float y) { return fast_mul(x, splat(y)); }
fast_mul(float x,F32 y)734         F32 fast_mul(float x, F32 y) { return fast_mul(splat(x), y); }
735 
736         F32 div(F32, F32);
div(float x,F32 y)737         F32 div(float x, F32 y) { return div(splat(x), y); }
738 
739         F32 min(F32, F32);
min(F32 x,float y)740         F32 min(F32 x, float y) { return min(x, splat(y)); }
min(float x,F32 y)741         F32 min(float x, F32 y) { return min(splat(x), y); }
742 
743         F32 max(F32, F32);
max(F32 x,float y)744         F32 max(F32 x, float y) { return max(x, splat(y)); }
max(float x,F32 y)745         F32 max(float x, F32 y) { return max(splat(x), y); }
746 
747         // TODO: remove mad()?  It's just sugar.
mad(F32 x,F32 y,F32 z)748         F32 mad(F32   x, F32   y, F32   z) { return add(mul(x,y), z); }
mad(F32 x,F32 y,float z)749         F32 mad(F32   x, F32   y, float z) { return mad(      x ,       y , splat(z)); }
mad(F32 x,float y,F32 z)750         F32 mad(F32   x, float y, F32   z) { return mad(      x , splat(y),       z ); }
mad(F32 x,float y,float z)751         F32 mad(F32   x, float y, float z) { return mad(      x , splat(y), splat(z)); }
mad(float x,F32 y,F32 z)752         F32 mad(float x, F32   y, F32   z) { return mad(splat(x),       y ,       z ); }
mad(float x,F32 y,float z)753         F32 mad(float x, F32   y, float z) { return mad(splat(x),       y , splat(z)); }
mad(float x,float y,F32 z)754         F32 mad(float x, float y, F32   z) { return mad(splat(x), splat(y),       z ); }
755 
756         F32        sqrt(F32);
757         F32 approx_log2(F32);
758         F32 approx_pow2(F32);
approx_log(F32 x)759         F32 approx_log (F32 x) { return mul(0.69314718f, approx_log2(x)); }
approx_exp(F32 x)760         F32 approx_exp (F32 x) { return approx_pow2(mul(x, 1.4426950408889634074f)); }
761 
762         F32 approx_powf(F32 base, F32 exp);
approx_powf(F32 base,float exp)763         F32 approx_powf(F32 base, float exp) { return approx_powf(base, splat(exp)); }
approx_powf(float base,F32 exp)764         F32 approx_powf(float base, F32 exp) { return approx_powf(splat(base), exp); }
765 
766 
767         F32 approx_sin(F32 radians);
approx_cos(F32 radians)768         F32 approx_cos(F32 radians) { return approx_sin(add(radians, SK_ScalarPI/2)); }
769         F32 approx_tan(F32 radians);
770 
771         F32 approx_asin(F32 x);
approx_acos(F32 x)772         F32 approx_acos(F32 x) { return sub(SK_ScalarPI/2, approx_asin(x)); }
773         F32 approx_atan(F32 x);
774         F32 approx_atan2(F32 y, F32 x);
775 
776         F32 lerp(F32   lo, F32   hi, F32   t);
lerp(F32 lo,F32 hi,float t)777         F32 lerp(F32   lo, F32   hi, float t) { return lerp(      lo ,       hi , splat(t)); }
lerp(F32 lo,float hi,float t)778         F32 lerp(F32   lo, float hi, float t) { return lerp(      lo , splat(hi), splat(t)); }
lerp(F32 lo,float hi,F32 t)779         F32 lerp(F32   lo, float hi, F32   t) { return lerp(      lo , splat(hi),       t ); }
lerp(float lo,F32 hi,F32 t)780         F32 lerp(float lo, F32   hi, F32   t) { return lerp(splat(lo),       hi ,       t ); }
lerp(float lo,F32 hi,float t)781         F32 lerp(float lo, F32   hi, float t) { return lerp(splat(lo),       hi , splat(t)); }
lerp(float lo,float hi,F32 t)782         F32 lerp(float lo, float hi, F32   t) { return lerp(splat(lo), splat(hi),       t ); }
783 
clamp(F32 x,F32 lo,F32 hi)784         F32 clamp(F32   x, F32   lo, F32   hi) { return max(lo, min(x, hi)); }
clamp(F32 x,F32 lo,float hi)785         F32 clamp(F32   x, F32   lo, float hi) { return clamp(      x ,       lo , splat(hi)); }
clamp(F32 x,float lo,float hi)786         F32 clamp(F32   x, float lo, float hi) { return clamp(      x , splat(lo), splat(hi)); }
clamp(F32 x,float lo,F32 hi)787         F32 clamp(F32   x, float lo, F32   hi) { return clamp(      x , splat(lo),       hi ); }
clamp(float x,F32 lo,F32 hi)788         F32 clamp(float x, F32   lo, F32   hi) { return clamp(splat(x),       lo ,       hi ); }
clamp(float x,F32 lo,float hi)789         F32 clamp(float x, F32   lo, float hi) { return clamp(splat(x),       lo , splat(hi)); }
clamp(float x,float lo,F32 hi)790         F32 clamp(float x, float lo, F32   hi) { return clamp(splat(x), splat(lo),       hi ); }
791 
clamp01(F32 x)792         F32 clamp01(F32 x) { return clamp(x, 0.0f, 1.0f); }
793 
abs(F32 x)794         F32    abs(F32 x) { return pun_to_F32(bit_and(pun_to_I32(x), 0x7fff'ffff)); }
795         F32  fract(F32 x) { return sub(x, floor(x)); }
796         F32   ceil(F32);
797         F32  floor(F32);
798         I32 is_NaN   (F32 x) { return neq(x,x); }
799         I32 is_finite(F32 x) { return lt(bit_and(pun_to_I32(x), 0x7f80'0000), 0x7f80'0000); }
800 
801         I32 trunc(F32 x);
802         I32 round(F32 x);  // Round to int using current rounding mode (as if lrintf()).
803         I32 pun_to_I32(F32 x) { return {x.builder, x.id}; }
804 
805         I32   to_fp16(F32 x);
806         F32 from_fp16(I32 x);
807 
808         I32 eq(F32, F32);
809         I32 eq(F32 x, float y) { return eq(x, splat(y)); }
810         I32 eq(float x, F32 y) { return eq(splat(x), y); }
811 
812         I32 neq(F32, F32);
813         I32 neq(F32 x, float y) { return neq(x, splat(y)); }
814         I32 neq(float x, F32 y) { return neq(splat(x), y); }
815 
816         I32 lt(F32, F32);
817         I32 lt(F32 x, float y) { return lt(x, splat(y)); }
818         I32 lt(float x, F32 y) { return lt(splat(x), y); }
819 
820         I32 lte(F32, F32);
821         I32 lte(F32 x, float y) { return lte(x, splat(y)); }
822         I32 lte(float x, F32 y) { return lte(splat(x), y); }
823 
824         I32 gt(F32, F32);
825         I32 gt(F32 x, float y) { return gt(x, splat(y)); }
826         I32 gt(float x, F32 y) { return gt(splat(x), y); }
827 
828         I32 gte(F32, F32);
829         I32 gte(F32 x, float y) { return gte(x, splat(y)); }
830         I32 gte(float x, F32 y) { return gte(splat(x), y); }
831 
832         // int math, comparisons, etc.
833         I32 add(I32, I32);
834         I32 add(I32 x, int y) { return add(x, splat(y)); }
835         I32 add(int x, I32 y) { return add(splat(x), y); }
836 
837         I32 sub(I32, I32);
838         I32 sub(I32 x, int y) { return sub(x, splat(y)); }
839         I32 sub(int x, I32 y) { return sub(splat(x), y); }
840 
841         I32 mul(I32, I32);
842         I32 mul(I32 x, int y) { return mul(x, splat(y)); }
843         I32 mul(int x, I32 y) { return mul(splat(x), y); }
844 
845         I32 shl(I32 x, int bits);
846         I32 shr(I32 x, int bits);
847         I32 sra(I32 x, int bits);
848 
849         I32 eq(I32, I32);
850         I32 eq(I32 x, int y) { return eq(x, splat(y)); }
851         I32 eq(int x, I32 y) { return eq(splat(x), y); }
852 
853         I32 neq(I32, I32);
854         I32 neq(I32 x, int y) { return neq(x, splat(y)); }
855         I32 neq(int x, I32 y) { return neq(splat(x), y); }
856 
857         I32 lt(I32, I32);
858         I32 lt(I32 x, int y) { return lt(x, splat(y)); }
859         I32 lt(int x, I32 y) { return lt(splat(x), y); }
860 
861         I32 lte(I32, I32);
862         I32 lte(I32 x, int y) { return lte(x, splat(y)); }
863         I32 lte(int x, I32 y) { return lte(splat(x), y); }
864 
865         I32 gt(I32, I32);
866         I32 gt(I32 x, int y) { return gt(x, splat(y)); }
867         I32 gt(int x, I32 y) { return gt(splat(x), y); }
868 
869         I32 gte(I32, I32);
870         I32 gte(I32 x, int y) { return gte(x, splat(y)); }
871         I32 gte(int x, I32 y) { return gte(splat(x), y); }
872 
873         F32 to_F32(I32 x);
874         F32 pun_to_F32(I32 x) { return {x.builder, x.id}; }
875 
876         // Bitwise operations.
877         I32 bit_and(I32, I32);
878         I32 bit_and(I32 x, int y) { return bit_and(x, splat(y)); }
879         I32 bit_and(int x, I32 y) { return bit_and(splat(x), y); }
880 
881         I32 bit_or(I32, I32);
882         I32 bit_or(I32 x, int y) { return bit_or(x, splat(y)); }
883         I32 bit_or(int x, I32 y) { return bit_or(splat(x), y); }
884 
885         I32 bit_xor(I32, I32);
886         I32 bit_xor(I32 x, int y) { return bit_xor(x, splat(y)); }
887         I32 bit_xor(int x, I32 y) { return bit_xor(splat(x), y); }
888 
889         I32 bit_clear(I32, I32);
890         I32 bit_clear(I32 x, int y) { return bit_clear(x, splat(y)); }
891         I32 bit_clear(int x, I32 y) { return bit_clear(splat(x), y); }
892 
893         I32 min(I32 x, I32 y) { return select(lte(x,y), x, y); }
894         I32 min(I32 x, int y) { return min(x, splat(y)); }
895         I32 min(int x, I32 y) { return min(splat(x), y); }
896 
897         I32 max(I32 x, I32 y) { return select(gte(x,y), x, y); }
898         I32 max(I32 x, int y) { return max(x, splat(y)); }
899         I32 max(int x, I32 y) { return max(splat(x), y); }
900 
901         I32 select(I32 cond, I32 t, I32 f);  // cond ? t : f
902         I32 select(I32 cond, int t, I32 f) { return select(cond, splat(t),       f ); }
903         I32 select(I32 cond, I32 t, int f) { return select(cond,       t , splat(f)); }
904         I32 select(I32 cond, int t, int f) { return select(cond, splat(t), splat(f)); }
905 
906         F32 select(I32 cond, F32 t, F32 f) {
907             return pun_to_F32(select(cond, pun_to_I32(t)
908                                          , pun_to_I32(f)));
909         }
910         F32 select(I32 cond, float t, F32   f) { return select(cond, splat(t),       f ); }
911         F32 select(I32 cond, F32   t, float f) { return select(cond,       t , splat(f)); }
912         F32 select(I32 cond, float t, float f) { return select(cond, splat(t), splat(f)); }
913 
914         I32 extract(I32 x, int bits, I32 z);   // (x>>bits) & z
915         I32 extract(I32 x, int bits, int z) { return extract(x, bits, splat(z)); }
916         I32 extract(int x, int bits, I32 z) { return extract(splat(x), bits, z); }
917 
918         I32 pack(I32 x, I32 y, int bits);   // x | (y<<bits)
919         I32 pack(I32 x, int y, int bits) { return pack(x, splat(y), bits); }
920         I32 pack(int x, I32 y, int bits) { return pack(splat(x), y, bits); }
921 
922 
923         // Common idioms used in several places, worth centralizing for consistency.
924         F32 from_unorm(int bits, I32);   // E.g. from_unorm(8, x) -> x * (1/255.0f)
925         I32   to_unorm(int bits, F32);   // E.g.   to_unorm(8, x) -> round(x * 255)
926 
927         Color   load(PixelFormat, Ptr ptr);
928         void   store(PixelFormat, Ptr ptr, Color);
929         Color gather(PixelFormat, UPtr ptr, int offset, I32 index);
930         Color gather(PixelFormat f, Uniform u, I32 index) {
931             return gather(f, u.ptr, u.offset, index);
932         }
933 
934         void   premul(F32* r, F32* g, F32* b, F32 a);
935         void unpremul(F32* r, F32* g, F32* b, F32 a);
936 
937         Color   premul(Color c) {   this->premul(&c.r, &c.g, &c.b, c.a); return c; }
938         Color unpremul(Color c) { this->unpremul(&c.r, &c.g, &c.b, c.a); return c; }
939 
940         Color lerp(Color lo, Color hi, F32 t);
941         Color blend(SkBlendMode, Color src, Color dst);
942 
943         Color clamp01(Color c) {
944             return { clamp01(c.r), clamp01(c.g), clamp01(c.b), clamp01(c.a) };
945         }
946 
947         HSLA  to_hsla(Color);
948         Color to_rgba(HSLA);
949 
950         void dump(SkWStream* = nullptr) const;
951 
952         uint64_t hash() const;
953 
954         Val push(Instruction);
955 
956         bool allImm() const { return true; }
957 
958         template <typename T, typename... Rest>
959         bool allImm(Val id, T* imm, Rest... rest) const {
960             if (fProgram[id].op == Op::splat) {
961                 static_assert(sizeof(T) == 4);
962                 memcpy(imm, &fProgram[id].immA, 4);
963                 return this->allImm(rest...);
964             }
965             return false;
966         }
967 
968         bool allUniform() const { return true; }
969 
970         template <typename... Rest>
971         bool allUniform(Val id, Uniform* uni, Rest... rest) const {
972             if (fProgram[id].op == Op::uniform32) {
973                 uni->ptr.ix = fProgram[id].immA;
974                 uni->offset = fProgram[id].immB;
975                 return this->allUniform(rest...);
976             }
977             return false;
978         }
979 
980     private:
981         // Declare an argument with given stride (use stride=0 for uniforms).
982         Ptr arg(int stride);
983 
984         Val push(
985                 Op op, Val x=NA, Val y=NA, Val z=NA, Val w=NA, int immA=0, int immB=0, int immC=0) {
986             return this->push(Instruction{op, x,y,z,w, immA,immB,immC});
987         }
988 
989         template <typename T>
990         bool isImm(Val id, T want) const {
991             T imm = 0;
992             return this->allImm(id, &imm) && imm == want;
993         }
994 
995         SkTHashMap<Instruction, Val, InstructionHash> fIndex;
996         std::vector<Instruction>                      fProgram;
997         std::vector<int>                              fStrides;
998         const Features                                fFeatures;
999     };
1000 
1001     // Optimization passes and data structures normally used by Builder::optimize(),
1002     // extracted here so they can be unit tested.
1003     std::vector<Instruction>          eliminate_dead_code(std::vector<Instruction>);
1004     std::vector<OptimizedInstruction> finalize           (std::vector<Instruction>);
1005 
1006     using Reg = int;
1007 
1008     // d = op(x,y,z,w, immA,immB)
1009     struct InterpreterInstruction {
1010         Op  op;
1011         Reg d,x,y,z,w;
1012         int immA,immB,immC;
1013     };
1014 
1015     class Program {
1016     public:
1017         Program(const std::vector<OptimizedInstruction>& instructions,
1018                 const std::vector<int>& strides,
1019                 const char* debug_name, bool allow_jit);
1020 
1021         Program();
1022         ~Program();
1023 
1024         Program(Program&&);
1025         Program& operator=(Program&&);
1026 
1027         Program(const Program&) = delete;
1028         Program& operator=(const Program&) = delete;
1029 
1030         void eval(int n, void* args[]) const;
1031 
1032         template <typename... T>
1033         void eval(int n, T*... arg) const {
1034             SkASSERT(sizeof...(arg) == this->nargs());
1035             // This nullptr isn't important except that it makes args[] non-empty if you pass none.
1036             void* args[] = { (void*)arg..., nullptr };
1037             this->eval(n, args);
1038         }
1039 
1040         std::vector<InterpreterInstruction> instructions() const;
1041         int  nargs() const;
1042         int  nregs() const;
1043         int  loop () const;
1044         bool empty() const;
1045 
1046         bool hasJIT() const;  // Has this Program been JITted?
1047 
1048         void dump(SkWStream* = nullptr) const;
1049         void disassemble(SkWStream* = nullptr) const;
1050 
1051     private:
1052         void setupInterpreter(const std::vector<OptimizedInstruction>&);
1053         void setupJIT        (const std::vector<OptimizedInstruction>&, const char* debug_name);
1054         void setupLLVM       (const std::vector<OptimizedInstruction>&, const char* debug_name);
1055 
1056         bool jit(const std::vector<OptimizedInstruction>&,
1057                  int* stack_hint, uint32_t* registers_used,
1058                  Assembler*) const;
1059 
1060         void waitForLLVM() const;
1061         void dropJIT();
1062 
1063         struct Impl;
1064         std::unique_ptr<Impl> fImpl;
1065     };
1066 
1067     // TODO: control flow
1068     // TODO: 64-bit values?
1069 
1070 #define SI static inline
1071 
1072     SI I32 operator+(I32 x, I32 y) { return x->add(x,y); }
1073     SI I32 operator+(I32 x, int y) { return x->add(x,y); }
1074     SI I32 operator+(int x, I32 y) { return y->add(x,y); }
1075 
1076     SI I32 operator-(I32 x, I32 y) { return x->sub(x,y); }
1077     SI I32 operator-(I32 x, int y) { return x->sub(x,y); }
1078     SI I32 operator-(int x, I32 y) { return y->sub(x,y); }
1079 
1080     SI I32 operator*(I32 x, I32 y) { return x->mul(x,y); }
1081     SI I32 operator*(I32 x, int y) { return x->mul(x,y); }
1082     SI I32 operator*(int x, I32 y) { return y->mul(x,y); }
1083 
min(I32 x,I32 y)1084     SI I32 min(I32 x, I32 y) { return x->min(x,y); }
min(I32 x,int y)1085     SI I32 min(I32 x, int y) { return x->min(x,y); }
min(int x,I32 y)1086     SI I32 min(int x, I32 y) { return y->min(x,y); }
1087 
max(I32 x,I32 y)1088     SI I32 max(I32 x, I32 y) { return x->max(x,y); }
max(I32 x,int y)1089     SI I32 max(I32 x, int y) { return x->max(x,y); }
max(int x,I32 y)1090     SI I32 max(int x, I32 y) { return y->max(x,y); }
1091 
1092     SI I32 operator==(I32 x, I32 y) { return x->eq(x,y); }
1093     SI I32 operator==(I32 x, int y) { return x->eq(x,y); }
1094     SI I32 operator==(int x, I32 y) { return y->eq(x,y); }
1095 
1096     SI I32 operator!=(I32 x, I32 y) { return x->neq(x,y); }
1097     SI I32 operator!=(I32 x, int y) { return x->neq(x,y); }
1098     SI I32 operator!=(int x, I32 y) { return y->neq(x,y); }
1099 
1100     SI I32 operator< (I32 x, I32 y) { return x->lt(x,y); }
1101     SI I32 operator< (I32 x, int y) { return x->lt(x,y); }
1102     SI I32 operator< (int x, I32 y) { return y->lt(x,y); }
1103 
1104     SI I32 operator<=(I32 x, I32 y) { return x->lte(x,y); }
1105     SI I32 operator<=(I32 x, int y) { return x->lte(x,y); }
1106     SI I32 operator<=(int x, I32 y) { return y->lte(x,y); }
1107 
1108     SI I32 operator> (I32 x, I32 y) { return x->gt(x,y); }
1109     SI I32 operator> (I32 x, int y) { return x->gt(x,y); }
1110     SI I32 operator> (int x, I32 y) { return y->gt(x,y); }
1111 
1112     SI I32 operator>=(I32 x, I32 y) { return x->gte(x,y); }
1113     SI I32 operator>=(I32 x, int y) { return x->gte(x,y); }
1114     SI I32 operator>=(int x, I32 y) { return y->gte(x,y); }
1115 
1116 
1117     SI F32 operator+(F32   x, F32   y) { return x->add(x,y); }
1118     SI F32 operator+(F32   x, float y) { return x->add(x,y); }
1119     SI F32 operator+(float x, F32   y) { return y->add(x,y); }
1120 
1121     SI F32 operator-(F32   x, F32   y) { return x->sub(x,y); }
1122     SI F32 operator-(F32   x, float y) { return x->sub(x,y); }
1123     SI F32 operator-(float x, F32   y) { return y->sub(x,y); }
1124 
1125     SI F32 operator*(F32   x, F32   y) { return x->mul(x,y); }
1126     SI F32 operator*(F32   x, float y) { return x->mul(x,y); }
1127     SI F32 operator*(float x, F32   y) { return y->mul(x,y); }
1128 
fast_mul(F32 x,F32 y)1129     SI F32 fast_mul(F32   x, F32   y) { return x->fast_mul(x,y); }
fast_mul(F32 x,float y)1130     SI F32 fast_mul(F32   x, float y) { return x->fast_mul(x,y); }
fast_mul(float x,F32 y)1131     SI F32 fast_mul(float x, F32   y) { return y->fast_mul(x,y); }
1132 
1133     SI F32 operator/(F32   x, F32  y) { return x->div(x,y); }
1134     SI F32 operator/(float x, F32  y) { return y->div(x,y); }
1135 
min(F32 x,F32 y)1136     SI F32 min(F32   x, F32   y) { return x->min(x,y); }
min(F32 x,float y)1137     SI F32 min(F32   x, float y) { return x->min(x,y); }
min(float x,F32 y)1138     SI F32 min(float x, F32   y) { return y->min(x,y); }
1139 
max(F32 x,F32 y)1140     SI F32 max(F32   x, F32   y) { return x->max(x,y); }
max(F32 x,float y)1141     SI F32 max(F32   x, float y) { return x->max(x,y); }
max(float x,F32 y)1142     SI F32 max(float x, F32   y) { return y->max(x,y); }
1143 
1144     SI I32 operator==(F32   x, F32   y) { return x->eq(x,y); }
1145     SI I32 operator==(F32   x, float y) { return x->eq(x,y); }
1146     SI I32 operator==(float x, F32   y) { return y->eq(x,y); }
1147 
1148     SI I32 operator!=(F32   x, F32   y) { return x->neq(x,y); }
1149     SI I32 operator!=(F32   x, float y) { return x->neq(x,y); }
1150     SI I32 operator!=(float x, F32   y) { return y->neq(x,y); }
1151 
1152     SI I32 operator< (F32   x, F32   y) { return x->lt(x,y); }
1153     SI I32 operator< (F32   x, float y) { return x->lt(x,y); }
1154     SI I32 operator< (float x, F32   y) { return y->lt(x,y); }
1155 
1156     SI I32 operator<=(F32   x, F32   y) { return x->lte(x,y); }
1157     SI I32 operator<=(F32   x, float y) { return x->lte(x,y); }
1158     SI I32 operator<=(float x, F32   y) { return y->lte(x,y); }
1159 
1160     SI I32 operator> (F32   x, F32   y) { return x->gt(x,y); }
1161     SI I32 operator> (F32   x, float y) { return x->gt(x,y); }
1162     SI I32 operator> (float x, F32   y) { return y->gt(x,y); }
1163 
1164     SI I32 operator>=(F32   x, F32   y) { return x->gte(x,y); }
1165     SI I32 operator>=(F32   x, float y) { return x->gte(x,y); }
1166     SI I32 operator>=(float x, F32   y) { return y->gte(x,y); }
1167 
1168     SI I32& operator+=(I32& x, I32 y) { return (x = x + y); }
1169     SI I32& operator+=(I32& x, int y) { return (x = x + y); }
1170 
1171     SI I32& operator-=(I32& x, I32 y) { return (x = x - y); }
1172     SI I32& operator-=(I32& x, int y) { return (x = x - y); }
1173 
1174     SI I32& operator*=(I32& x, I32 y) { return (x = x * y); }
1175     SI I32& operator*=(I32& x, int y) { return (x = x * y); }
1176 
1177     SI F32& operator+=(F32& x, F32   y) { return (x = x + y); }
1178     SI F32& operator+=(F32& x, float y) { return (x = x + y); }
1179 
1180     SI F32& operator-=(F32& x, F32   y) { return (x = x - y); }
1181     SI F32& operator-=(F32& x, float y) { return (x = x - y); }
1182 
1183     SI F32& operator*=(F32& x, F32   y) { return (x = x * y); }
1184     SI F32& operator*=(F32& x, float y) { return (x = x * y); }
1185 
1186     SI F32& operator/=(F32& x, F32   y) { return (x = x / y); }
1187 
assert_true(I32 cond,I32 debug)1188     SI void assert_true(I32 cond, I32 debug) { cond->assert_true(cond,debug); }
assert_true(I32 cond,F32 debug)1189     SI void assert_true(I32 cond, F32 debug) { cond->assert_true(cond,debug); }
assert_true(I32 cond)1190     SI void assert_true(I32 cond)            { cond->assert_true(cond); }
1191 
store8(Ptr ptr,I32 val)1192     SI void store8  (Ptr ptr, I32 val)                    { val->store8  (ptr, val); }
store16(Ptr ptr,I32 val)1193     SI void store16 (Ptr ptr, I32 val)                    { val->store16 (ptr, val); }
store32(Ptr ptr,I32 val)1194     SI void store32 (Ptr ptr, I32 val)                    { val->store32 (ptr, val); }
storeF(Ptr ptr,F32 val)1195     SI void storeF  (Ptr ptr, F32 val)                    { val->storeF  (ptr, val); }
store64(Ptr ptr,I32 lo,I32 hi)1196     SI void store64 (Ptr ptr, I32 lo, I32 hi)             { lo ->store64 (ptr, lo,hi); }
store128(Ptr ptr,I32 x,I32 y,I32 z,I32 w)1197     SI void store128(Ptr ptr, I32 x, I32 y, I32 z, I32 w) { x  ->store128(ptr, x,y,z,w); }
1198 
gather8(UPtr ptr,int off,I32 ix)1199     SI I32 gather8 (UPtr ptr, int off, I32 ix) { return ix->gather8 (ptr, off, ix); }
gather16(UPtr ptr,int off,I32 ix)1200     SI I32 gather16(UPtr ptr, int off, I32 ix) { return ix->gather16(ptr, off, ix); }
gather32(UPtr ptr,int off,I32 ix)1201     SI I32 gather32(UPtr ptr, int off, I32 ix) { return ix->gather32(ptr, off, ix); }
gatherF(UPtr ptr,int off,I32 ix)1202     SI F32 gatherF (UPtr ptr, int off, I32 ix) { return ix->gatherF (ptr, off, ix); }
1203 
gather8(Uniform u,I32 ix)1204     SI I32 gather8 (Uniform u, I32 ix) { return ix->gather8 (u, ix); }
gather16(Uniform u,I32 ix)1205     SI I32 gather16(Uniform u, I32 ix) { return ix->gather16(u, ix); }
gather32(Uniform u,I32 ix)1206     SI I32 gather32(Uniform u, I32 ix) { return ix->gather32(u, ix); }
gatherF(Uniform u,I32 ix)1207     SI F32 gatherF (Uniform u, I32 ix) { return ix->gatherF (u, ix); }
1208 
sqrt(F32 x)1209     SI F32        sqrt(F32 x) { return x->       sqrt(x); }
approx_log2(F32 x)1210     SI F32 approx_log2(F32 x) { return x->approx_log2(x); }
approx_pow2(F32 x)1211     SI F32 approx_pow2(F32 x) { return x->approx_pow2(x); }
approx_log(F32 x)1212     SI F32 approx_log (F32 x) { return x->approx_log (x); }
approx_exp(F32 x)1213     SI F32 approx_exp (F32 x) { return x->approx_exp (x); }
1214 
approx_powf(F32 base,F32 exp)1215     SI F32 approx_powf(F32   base, F32   exp) { return base->approx_powf(base, exp); }
approx_powf(F32 base,float exp)1216     SI F32 approx_powf(F32   base, float exp) { return base->approx_powf(base, exp); }
approx_powf(float base,F32 exp)1217     SI F32 approx_powf(float base, F32   exp) { return  exp->approx_powf(base, exp); }
1218 
approx_sin(F32 radians)1219     SI F32 approx_sin(F32 radians) { return radians->approx_sin(radians); }
approx_cos(F32 radians)1220     SI F32 approx_cos(F32 radians) { return radians->approx_cos(radians); }
approx_tan(F32 radians)1221     SI F32 approx_tan(F32 radians) { return radians->approx_tan(radians); }
1222 
approx_asin(F32 x)1223     SI F32 approx_asin(F32 x) { return x->approx_asin(x); }
approx_acos(F32 x)1224     SI F32 approx_acos(F32 x) { return x->approx_acos(x); }
approx_atan(F32 x)1225     SI F32 approx_atan(F32 x) { return x->approx_atan(x); }
approx_atan2(F32 y,F32 x)1226     SI F32 approx_atan2(F32 y, F32 x) { return x->approx_atan2(y, x); }
1227 
clamp01(F32 x)1228     SI F32   clamp01(F32 x) { return x->  clamp01(x); }
abs(F32 x)1229     SI F32       abs(F32 x) { return x->      abs(x); }
ceil(F32 x)1230     SI F32      ceil(F32 x) { return x->     ceil(x); }
fract(F32 x)1231     SI F32     fract(F32 x) { return x->    fract(x); }
floor(F32 x)1232     SI F32     floor(F32 x) { return x->    floor(x); }
is_NaN(F32 x)1233     SI I32    is_NaN(F32 x) { return x->   is_NaN(x); }
is_finite(F32 x)1234     SI I32 is_finite(F32 x) { return x->is_finite(x); }
1235 
trunc(F32 x)1236     SI I32      trunc(F32 x) { return x->      trunc(x); }
round(F32 x)1237     SI I32      round(F32 x) { return x->      round(x); }
pun_to_I32(F32 x)1238     SI I32 pun_to_I32(F32 x) { return x-> pun_to_I32(x); }
pun_to_F32(I32 x)1239     SI F32 pun_to_F32(I32 x) { return x-> pun_to_F32(x); }
to_F32(I32 x)1240     SI F32     to_F32(I32 x) { return x->     to_F32(x); }
to_fp16(F32 x)1241     SI I32    to_fp16(F32 x) { return x->    to_fp16(x); }
from_fp16(I32 x)1242     SI F32  from_fp16(I32 x) { return x->  from_fp16(x); }
1243 
lerp(F32 lo,F32 hi,F32 t)1244     SI F32 lerp(F32   lo, F32   hi, F32   t) { return lo->lerp(lo,hi,t); }
lerp(F32 lo,F32 hi,float t)1245     SI F32 lerp(F32   lo, F32   hi, float t) { return lo->lerp(lo,hi,t); }
lerp(F32 lo,float hi,F32 t)1246     SI F32 lerp(F32   lo, float hi, F32   t) { return lo->lerp(lo,hi,t); }
lerp(F32 lo,float hi,float t)1247     SI F32 lerp(F32   lo, float hi, float t) { return lo->lerp(lo,hi,t); }
lerp(float lo,F32 hi,F32 t)1248     SI F32 lerp(float lo, F32   hi, F32   t) { return hi->lerp(lo,hi,t); }
lerp(float lo,F32 hi,float t)1249     SI F32 lerp(float lo, F32   hi, float t) { return hi->lerp(lo,hi,t); }
lerp(float lo,float hi,F32 t)1250     SI F32 lerp(float lo, float hi, F32   t) { return  t->lerp(lo,hi,t); }
1251 
clamp(F32 x,F32 lo,F32 hi)1252     SI F32 clamp(F32   x, F32   lo, F32   hi) { return  x->clamp(x,lo,hi); }
clamp(F32 x,F32 lo,float hi)1253     SI F32 clamp(F32   x, F32   lo, float hi) { return  x->clamp(x,lo,hi); }
clamp(F32 x,float lo,F32 hi)1254     SI F32 clamp(F32   x, float lo, F32   hi) { return  x->clamp(x,lo,hi); }
clamp(F32 x,float lo,float hi)1255     SI F32 clamp(F32   x, float lo, float hi) { return  x->clamp(x,lo,hi); }
clamp(float x,F32 lo,F32 hi)1256     SI F32 clamp(float x, F32   lo, F32   hi) { return lo->clamp(x,lo,hi); }
clamp(float x,F32 lo,float hi)1257     SI F32 clamp(float x, F32   lo, float hi) { return lo->clamp(x,lo,hi); }
clamp(float x,float lo,F32 hi)1258     SI F32 clamp(float x, float lo, F32   hi) { return hi->clamp(x,lo,hi); }
1259 
1260     SI I32 operator<<(I32 x, int bits) { return x->shl(x, bits); }
shl(I32 x,int bits)1261     SI I32        shl(I32 x, int bits) { return x->shl(x, bits); }
shr(I32 x,int bits)1262     SI I32        shr(I32 x, int bits) { return x->shr(x, bits); }
sra(I32 x,int bits)1263     SI I32        sra(I32 x, int bits) { return x->sra(x, bits); }
1264 
1265     SI I32 operator&(I32 x, I32 y) { return x->bit_and(x,y); }
1266     SI I32 operator&(I32 x, int y) { return x->bit_and(x,y); }
1267     SI I32 operator&(int x, I32 y) { return y->bit_and(x,y); }
1268 
1269     SI I32 operator|(I32 x, I32 y) { return x->bit_or (x,y); }
1270     SI I32 operator|(I32 x, int y) { return x->bit_or (x,y); }
1271     SI I32 operator|(int x, I32 y) { return y->bit_or (x,y); }
1272 
1273     SI I32 operator^(I32 x, I32 y) { return x->bit_xor(x,y); }
1274     SI I32 operator^(I32 x, int y) { return x->bit_xor(x,y); }
1275     SI I32 operator^(int x, I32 y) { return y->bit_xor(x,y); }
1276 
1277     SI I32& operator&=(I32& x, I32 y) { return (x = x & y); }
1278     SI I32& operator&=(I32& x, int y) { return (x = x & y); }
1279     SI I32& operator|=(I32& x, I32 y) { return (x = x | y); }
1280     SI I32& operator|=(I32& x, int y) { return (x = x | y); }
1281     SI I32& operator^=(I32& x, I32 y) { return (x = x ^ y); }
1282     SI I32& operator^=(I32& x, int y) { return (x = x ^ y); }
1283 
bit_clear(I32 x,I32 y)1284     SI I32 bit_clear(I32 x, I32 y) { return x->bit_clear(x,y); }
bit_clear(I32 x,int y)1285     SI I32 bit_clear(I32 x, int y) { return x->bit_clear(x,y); }
bit_clear(int x,I32 y)1286     SI I32 bit_clear(int x, I32 y) { return y->bit_clear(x,y); }
1287 
select(I32 c,I32 t,I32 f)1288     SI I32 select(I32 c, I32 t, I32 f) { return c->select(c,          t ,          f ); }
select(I32 c,I32 t,int f)1289     SI I32 select(I32 c, I32 t, int f) { return c->select(c,          t , c->splat(f)); }
select(I32 c,int t,I32 f)1290     SI I32 select(I32 c, int t, I32 f) { return c->select(c, c->splat(t),          f ); }
select(I32 c,int t,int f)1291     SI I32 select(I32 c, int t, int f) { return c->select(c, c->splat(t), c->splat(f)); }
1292 
select(I32 c,F32 t,F32 f)1293     SI F32 select(I32 c, F32   t, F32   f) { return c->select(c,          t ,          f ); }
select(I32 c,F32 t,float f)1294     SI F32 select(I32 c, F32   t, float f) { return c->select(c,          t , c->splat(f)); }
select(I32 c,float t,F32 f)1295     SI F32 select(I32 c, float t, F32   f) { return c->select(c, c->splat(t),          f ); }
select(I32 c,float t,float f)1296     SI F32 select(I32 c, float t, float f) { return c->select(c, c->splat(t), c->splat(f)); }
1297 
extract(I32 x,int bits,I32 z)1298     SI I32 extract(I32 x, int bits, I32 z) { return x->extract(x,bits,z); }
extract(I32 x,int bits,int z)1299     SI I32 extract(I32 x, int bits, int z) { return x->extract(x,bits,z); }
extract(int x,int bits,I32 z)1300     SI I32 extract(int x, int bits, I32 z) { return z->extract(x,bits,z); }
1301 
pack(I32 x,I32 y,int bits)1302     SI I32 pack(I32 x, I32 y, int bits) { return x->pack   (x,y,bits); }
pack(I32 x,int y,int bits)1303     SI I32 pack(I32 x, int y, int bits) { return x->pack   (x,y,bits); }
pack(int x,I32 y,int bits)1304     SI I32 pack(int x, I32 y, int bits) { return y->pack   (x,y,bits); }
1305 
1306     SI I32 operator~(I32 x) { return ~0 ^ x; }
1307     SI I32 operator-(I32 x) { return  0 - x; }
1308     SI F32 operator-(F32 x) { return 0.0f - x; }
1309 
from_unorm(int bits,I32 x)1310     SI F32 from_unorm(int bits, I32 x) { return x->from_unorm(bits,x); }
to_unorm(int bits,F32 x)1311     SI I32   to_unorm(int bits, F32 x) { return x->  to_unorm(bits,x); }
1312 
store(PixelFormat f,Ptr p,Color c)1313     SI void store(PixelFormat f, Ptr p, Color c) { return c->store(f,p,c); }
1314 
gather(PixelFormat f,UPtr p,int off,I32 ix)1315     SI Color gather(PixelFormat f, UPtr p, int off, I32 ix) { return ix->gather(f,p,off,ix); }
gather(PixelFormat f,Uniform u,I32 ix)1316     SI Color gather(PixelFormat f, Uniform u     , I32 ix)  { return ix->gather(f,u,ix); }
1317 
premul(F32 * r,F32 * g,F32 * b,F32 a)1318     SI void   premul(F32* r, F32* g, F32* b, F32 a) { a->  premul(r,g,b,a); }
unpremul(F32 * r,F32 * g,F32 * b,F32 a)1319     SI void unpremul(F32* r, F32* g, F32* b, F32 a) { a->unpremul(r,g,b,a); }
1320 
premul(Color c)1321     SI Color   premul(Color c) { return c->  premul(c); }
unpremul(Color c)1322     SI Color unpremul(Color c) { return c->unpremul(c); }
1323 
lerp(Color lo,Color hi,F32 t)1324     SI Color lerp(Color lo, Color hi, F32 t) { return t->lerp(lo,hi,t); }
1325 
blend(SkBlendMode m,Color s,Color d)1326     SI Color blend(SkBlendMode m, Color s, Color d) { return s->blend(m,s,d); }
1327 
clamp01(Color c)1328     SI Color clamp01(Color c) { return c->clamp01(c); }
1329 
to_hsla(Color c)1330     SI HSLA  to_hsla(Color c) { return c->to_hsla(c); }
to_rgba(HSLA c)1331     SI Color to_rgba(HSLA  c) { return c->to_rgba(c); }
1332 
1333     // Evaluate polynomials: ax^n + bx^(n-1) + ... for n >= 1
1334     template <typename F32_or_float, typename... Rest>
poly(F32 x,F32_or_float a,float b,Rest...rest)1335     SI F32 poly(F32 x, F32_or_float a, float b, Rest... rest) {
1336         if constexpr (sizeof...(rest) == 0) {
1337             return x*a+b;
1338         } else {
1339             return poly(x, x*a+b, rest...);
1340         }
1341     }
1342 #undef SI
1343 }  // namespace skvm
1344 
1345 #endif//SkVM_DEFINED
1346