• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2019 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef SkVM_DEFINED
9 #define SkVM_DEFINED
10 
11 #include "include/core/SkBlendMode.h"
12 #include "include/core/SkColor.h"
13 #include "include/core/SkSpan.h"
14 #include "include/private/SkMacros.h"
15 #include "include/private/SkTArray.h"
16 #include "include/private/SkTHash.h"
17 #include "src/core/SkVM_fwd.h"
18 #include <vector>      // std::vector
19 
20 class SkWStream;
21 
22 #if defined(SKVM_JIT_WHEN_POSSIBLE) && !defined(SK_BUILD_FOR_IOS)
23     #if defined(__x86_64__) || defined(_M_X64)
24         #if defined(_WIN32) || defined(__linux) || defined(__APPLE__)
25             #define SKVM_JIT
26         #endif
27     #endif
28     #if defined(__aarch64__)
29         #if defined(__ANDROID__) || defined(__APPLE__)
30             #define SKVM_JIT
31         #endif
32     #endif
33 #endif
34 
35 #if 0
36     #define SKVM_LLVM
37 #endif
38 
39 #if 0
40     #undef SKVM_JIT
41 #endif
42 
43 namespace skvm {
44 
45     namespace viz {
46         class Visualizer;
47     }
48 
49     class Assembler {
50     public:
51         explicit Assembler(void* buf);
52 
53         size_t size() const;
54 
55         // Order matters... GP64, Xmm, Ymm values match 4-bit register encoding for each.
56         enum GP64 {
57             rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
58             r8 , r9 , r10, r11, r12, r13, r14, r15,
59         };
60         enum Xmm {
61             xmm0, xmm1, xmm2 , xmm3 , xmm4 , xmm5 , xmm6 , xmm7 ,
62             xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
63         };
64         enum Ymm {
65             ymm0, ymm1, ymm2 , ymm3 , ymm4 , ymm5 , ymm6 , ymm7 ,
66             ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15,
67         };
68 
69         // X and V values match 5-bit encoding for each (nothing tricky).
70         enum X {
71             x0 , x1 , x2 , x3 , x4 , x5 , x6 , x7 ,
72             x8 , x9 , x10, x11, x12, x13, x14, x15,
73             x16, x17, x18, x19, x20, x21, x22, x23,
74             x24, x25, x26, x27, x28, x29, x30, xzr, sp=xzr,
75         };
76         enum V {
77             v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 ,
78             v8 , v9 , v10, v11, v12, v13, v14, v15,
79             v16, v17, v18, v19, v20, v21, v22, v23,
80             v24, v25, v26, v27, v28, v29, v30, v31,
81         };
82 
83         void bytes(const void*, int);
84         void byte(uint8_t);
85         void word(uint32_t);
86 
87         struct Label {
88             int                                      offset = 0;
89             enum { NotYetSet, ARMDisp19, X86Disp32 } kind = NotYetSet;
90             SkSTArray<2, int>                        references;
91         };
92 
93         // x86-64
94 
95         void align(int mod);
96 
97         void int3();
98         void vzeroupper();
99         void ret();
100 
101         // Mem represents a value at base + disp + scale*index,
102         // or simply at base + disp if index=rsp.
103         enum Scale { ONE, TWO, FOUR, EIGHT };
104         struct Mem {
105             GP64  base;
106             int   disp  = 0;
107             GP64  index = rsp;
108             Scale scale = ONE;
109         };
110 
111         struct Operand {
112             union {
113                 int    reg;
114                 Mem    mem;
115                 Label* label;
116             };
117             enum { REG, MEM, LABEL } kind;
118 
OperandOperand119             Operand(GP64   r) : reg  (r), kind(REG  ) {}
OperandOperand120             Operand(Xmm    r) : reg  (r), kind(REG  ) {}
OperandOperand121             Operand(Ymm    r) : reg  (r), kind(REG  ) {}
OperandOperand122             Operand(Mem    m) : mem  (m), kind(MEM  ) {}
OperandOperand123             Operand(Label* l) : label(l), kind(LABEL) {}
124         };
125 
126         void vpand (Ymm dst, Ymm x, Operand y);
127         void vpandn(Ymm dst, Ymm x, Operand y);
128         void vpor  (Ymm dst, Ymm x, Operand y);
129         void vpxor (Ymm dst, Ymm x, Operand y);
130 
131         void vpaddd (Ymm dst, Ymm x, Operand y);
132         void vpsubd (Ymm dst, Ymm x, Operand y);
133         void vpmulld(Ymm dst, Ymm x, Operand y);
134 
135         void vpaddw   (Ymm dst, Ymm x, Operand y);
136         void vpsubw   (Ymm dst, Ymm x, Operand y);
137         void vpmullw  (Ymm dst, Ymm x, Operand y);
138 
139         void vpabsw   (Ymm dst, Operand x);
140         void vpavgw   (Ymm dst, Ymm x, Operand y);  // dst = (x+y+1)>>1, unsigned.
141         void vpmulhrsw(Ymm dst, Ymm x, Operand y);  // dst = (x*y + (1<<14)) >> 15, signed.
142         void vpminsw  (Ymm dst, Ymm x, Operand y);
143         void vpminuw  (Ymm dst, Ymm x, Operand y);
144         void vpmaxsw  (Ymm dst, Ymm x, Operand y);
145         void vpmaxuw  (Ymm dst, Ymm x, Operand y);
146 
147         void vaddps(Ymm dst, Ymm x, Operand y);
148         void vsubps(Ymm dst, Ymm x, Operand y);
149         void vmulps(Ymm dst, Ymm x, Operand y);
150         void vdivps(Ymm dst, Ymm x, Operand y);
151         void vminps(Ymm dst, Ymm x, Operand y);
152         void vmaxps(Ymm dst, Ymm x, Operand y);
153 
154         void vsqrtps(Ymm dst, Operand x);
155 
156         void vfmadd132ps(Ymm dst, Ymm x, Operand y);
157         void vfmadd213ps(Ymm dst, Ymm x, Operand y);
158         void vfmadd231ps(Ymm dst, Ymm x, Operand y);
159 
160         void vfmsub132ps(Ymm dst, Ymm x, Operand y);
161         void vfmsub213ps(Ymm dst, Ymm x, Operand y);
162         void vfmsub231ps(Ymm dst, Ymm x, Operand y);
163 
164         void vfnmadd132ps(Ymm dst, Ymm x, Operand y);
165         void vfnmadd213ps(Ymm dst, Ymm x, Operand y);
166         void vfnmadd231ps(Ymm dst, Ymm x, Operand y);
167 
168         void vpackusdw(Ymm dst, Ymm x, Operand y);
169         void vpackuswb(Ymm dst, Ymm x, Operand y);
170 
171         void vpunpckldq(Ymm dst, Ymm x, Operand y);
172         void vpunpckhdq(Ymm dst, Ymm x, Operand y);
173 
174         void vpcmpeqd(Ymm dst, Ymm x, Operand y);
175         void vpcmpgtd(Ymm dst, Ymm x, Operand y);
176         void vpcmpeqw(Ymm dst, Ymm x, Operand y);
177         void vpcmpgtw(Ymm dst, Ymm x, Operand y);
178 
179         void vcmpps   (Ymm dst, Ymm x, Operand y, int imm);
vcmpeqps(Ymm dst,Ymm x,Operand y)180         void vcmpeqps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,0); }
vcmpltps(Ymm dst,Ymm x,Operand y)181         void vcmpltps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,1); }
vcmpleps(Ymm dst,Ymm x,Operand y)182         void vcmpleps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,2); }
vcmpneqps(Ymm dst,Ymm x,Operand y)183         void vcmpneqps(Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,4); }
184 
185         // Sadly, the x parameter cannot be a general Operand for these shifts.
186         void vpslld(Ymm dst, Ymm x, int imm);
187         void vpsrld(Ymm dst, Ymm x, int imm);
188         void vpsrad(Ymm dst, Ymm x, int imm);
189 
190         void vpsllw(Ymm dst, Ymm x, int imm);
191         void vpsrlw(Ymm dst, Ymm x, int imm);
192         void vpsraw(Ymm dst, Ymm x, int imm);
193 
194         void vpermq    (Ymm dst, Operand x, int imm);
195         void vperm2f128(Ymm dst, Ymm x, Operand y, int imm);
196         void vpermps   (Ymm dst, Ymm ix, Operand src);        // dst[i] = src[ix[i]]
197 
198         enum Rounding { NEAREST, FLOOR, CEIL, TRUNC, CURRENT };
199         void vroundps(Ymm dst, Operand x, Rounding);
200 
201         void vmovdqa(Ymm dst, Operand x);
202         void vmovups(Ymm dst, Operand x);
203         void vmovups(Xmm dst, Operand x);
204         void vmovups(Operand dst, Ymm x);
205         void vmovups(Operand dst, Xmm x);
206 
207         void vcvtdq2ps (Ymm dst, Operand x);
208         void vcvttps2dq(Ymm dst, Operand x);
209         void vcvtps2dq (Ymm dst, Operand x);
210 
211         void vcvtps2ph(Operand dst, Ymm x, Rounding);
212         void vcvtph2ps(Ymm dst, Operand x);
213 
214         void vpblendvb(Ymm dst, Ymm x, Operand y, Ymm z);
215 
216         void vpshufb(Ymm dst, Ymm x, Operand y);
217 
218         void vptest(Ymm x, Operand y);
219 
220         void vbroadcastss(Ymm dst, Operand y);
221 
222         void vpmovzxwd(Ymm dst, Operand src);   // dst = src, 128-bit, uint16_t -> int
223         void vpmovzxbd(Ymm dst, Operand src);   // dst = src,  64-bit, uint8_t  -> int
224 
225         void vmovq(Operand dst, Xmm src);  // dst = src,  64-bit
226         void vmovd(Operand dst, Xmm src);  // dst = src,  32-bit
227         void vmovd(Xmm dst, Operand src);  // dst = src,  32-bit
228 
229         void vpinsrd(Xmm dst, Xmm src, Operand y, int imm);  // dst = src; dst[imm] = y, 32-bit
230         void vpinsrw(Xmm dst, Xmm src, Operand y, int imm);  // dst = src; dst[imm] = y, 16-bit
231         void vpinsrb(Xmm dst, Xmm src, Operand y, int imm);  // dst = src; dst[imm] = y,  8-bit
232 
233         void vextracti128(Operand dst, Ymm src, int imm);    // dst = src[imm], 128-bit
234         void vpextrd     (Operand dst, Xmm src, int imm);    // dst = src[imm],  32-bit
235         void vpextrw     (Operand dst, Xmm src, int imm);    // dst = src[imm],  16-bit
236         void vpextrb     (Operand dst, Xmm src, int imm);    // dst = src[imm],   8-bit
237 
238         // if (mask & 0x8000'0000) {
239         //     dst = base[scale*ix];
240         // }
241         // mask = 0;
242         void vgatherdps(Ymm dst, Scale scale, Ymm ix, GP64 base, Ymm mask);
243 
244 
245         void label(Label*);
246 
247         void jmp(Label*);
248         void je (Label*);
249         void jne(Label*);
250         void jl (Label*);
251         void jc (Label*);
252 
253         void add (Operand dst, int imm);
254         void sub (Operand dst, int imm);
255         void cmp (Operand dst, int imm);
256         void mov (Operand dst, int imm);
257         void movb(Operand dst, int imm);
258 
259         void add (Operand dst, GP64 x);
260         void sub (Operand dst, GP64 x);
261         void cmp (Operand dst, GP64 x);
262         void mov (Operand dst, GP64 x);
263         void movb(Operand dst, GP64 x);
264 
265         void add (GP64 dst, Operand x);
266         void sub (GP64 dst, Operand x);
267         void cmp (GP64 dst, Operand x);
268         void mov (GP64 dst, Operand x);
269         void movb(GP64 dst, Operand x);
270 
271         // Disambiguators... choice is arbitrary (but generates different code!).
add(GP64 dst,GP64 x)272         void add (GP64 dst, GP64 x) { this->add (Operand(dst), x); }
sub(GP64 dst,GP64 x)273         void sub (GP64 dst, GP64 x) { this->sub (Operand(dst), x); }
cmp(GP64 dst,GP64 x)274         void cmp (GP64 dst, GP64 x) { this->cmp (Operand(dst), x); }
mov(GP64 dst,GP64 x)275         void mov (GP64 dst, GP64 x) { this->mov (Operand(dst), x); }
movb(GP64 dst,GP64 x)276         void movb(GP64 dst, GP64 x) { this->movb(Operand(dst), x); }
277 
278         void movzbq(GP64 dst, Operand x);  // dst = x, uint8_t  -> int
279         void movzwq(GP64 dst, Operand x);  // dst = x, uint16_t -> int
280 
281         // aarch64
282 
283         // d = op(n,m)
284         using DOpNM = void(V d, V n, V m);
285         DOpNM  and16b, orr16b, eor16b, bic16b, bsl16b,
286                add4s,  sub4s,  mul4s,
287               cmeq4s, cmgt4s,
288                        sub8h,  mul8h,
289               fadd4s, fsub4s, fmul4s, fdiv4s, fmin4s, fmax4s,
290               fcmeq4s, fcmgt4s, fcmge4s,
291               tbl,
292               uzp14s, uzp24s,
293               zip14s, zip24s;
294 
295         // TODO: there are also float ==,<,<=,>,>= instructions with an immediate 0.0f,
296         // and the register comparison > and >= can also compare absolute values.  Interesting.
297 
298         // d += n*m
299         void fmla4s(V d, V n, V m);
300 
301         // d -= n*m
302         void fmls4s(V d, V n, V m);
303 
304         // d = op(n,imm)
305         using DOpNImm = void(V d, V n, int imm);
306         DOpNImm sli4s,
307                 shl4s, sshr4s, ushr4s,
308                                ushr8h;
309 
310         // d = op(n)
311         using DOpN = void(V d, V n);
312         DOpN not16b,    // d = ~n
313              fneg4s,    // d = -n
314              fsqrt4s,   // d = sqrtf(n)
315              scvtf4s,   // int -> float
316              fcvtzs4s,  // truncate float -> int
317              fcvtns4s,  // round float -> int  (nearest even)
318              frintp4s,  // round float -> int as float, toward plus infinity  (ceil)
319              frintm4s,  // round float -> int as float, toward minus infinity (floor)
320              fcvtn,     // f32 -> f16 in low half
321              fcvtl,     // f16 in low half -> f32
322              xtns2h,    // u32 -> u16
323              xtnh2b,    // u16 -> u8
324              uxtlb2h,   // u8 -> u16    (TODO: this is a special case of ushll.8h)
325              uxtlh2s,   // u16 -> u32   (TODO: this is a special case of ushll.4s)
326              uminv4s;   // dst[0] = min(n[0],n[1],n[2],n[3]), n as unsigned
327 
328         void brk (int imm16);
329         void ret (X);
330         void add (X d, X n, int imm12);
331         void sub (X d, X n, int imm12);
332         void subs(X d, X n, int imm12);  // subtract setting condition flags
333 
334         enum Shift { LSL,LSR,ASR,ROR };
335         void add (X d, X n, X m, Shift=LSL, int imm6=0);  // d=n+Shift(m,imm6), for Shift != ROR.
336 
337         // There's another encoding for unconditional branches that can jump further,
338         // but this one encoded as b.al is simple to implement and should be fine.
b(Label * l)339         void b  (Label* l) { this->b(Condition::al, l); }
bne(Label * l)340         void bne(Label* l) { this->b(Condition::ne, l); }
blt(Label * l)341         void blt(Label* l) { this->b(Condition::lt, l); }
342 
343         // "cmp ..." is just an assembler mnemonic for "subs xzr, ..."!
cmp(X n,int imm12)344         void cmp(X n, int imm12) { this->subs(xzr, n, imm12); }
345 
346         // Compare and branch if zero/non-zero, as if
347         //      cmp(t,0)
348         //      beq/bne(l)
349         // but without setting condition flags.
350         void cbz (X t, Label* l);
351         void cbnz(X t, Label* l);
352 
353         // TODO: there are ldur variants with unscaled imm, useful?
354         void ldrd(X dst, X src, int imm12=0);  // 64-bit dst = *(src+imm12*8)
355         void ldrs(X dst, X src, int imm12=0);  // 32-bit dst = *(src+imm12*4)
356         void ldrh(X dst, X src, int imm12=0);  // 16-bit dst = *(src+imm12*2)
357         void ldrb(X dst, X src, int imm12=0);  //  8-bit dst = *(src+imm12)
358 
359         void ldrq(V dst, Label*);  // 128-bit PC-relative load
360 
361         void ldrq(V dst, X src, int imm12=0);  // 128-bit dst = *(src+imm12*16)
362         void ldrd(V dst, X src, int imm12=0);  //  64-bit dst = *(src+imm12*8)
363         void ldrs(V dst, X src, int imm12=0);  //  32-bit dst = *(src+imm12*4)
364         void ldrh(V dst, X src, int imm12=0);  //  16-bit dst = *(src+imm12*2)
365         void ldrb(V dst, X src, int imm12=0);  //   8-bit dst = *(src+imm12)
366 
367         void strs(X src, X dst, int imm12=0);  // 32-bit *(dst+imm12*4) = src
368 
369         void strq(V src, X dst, int imm12=0);  // 128-bit *(dst+imm12*16) = src
370         void strd(V src, X dst, int imm12=0);  //  64-bit *(dst+imm12*8)  = src
371         void strs(V src, X dst, int imm12=0);  //  32-bit *(dst+imm12*4)  = src
372         void strh(V src, X dst, int imm12=0);  //  16-bit *(dst+imm12*2)  = src
373         void strb(V src, X dst, int imm12=0);  //   8-bit *(dst+imm12)    = src
374 
375         void movs(X dst, V src, int lane);  // dst = 32-bit src[lane]
376         void inss(V dst, X src, int lane);  // dst[lane] = 32-bit src
377 
378         void dup4s  (V dst, X src);  // Each 32-bit lane = src
379 
380         void ld1r4s (V dst, X src);  // Each 32-bit lane = *src
381         void ld1r8h (V dst, X src);  // Each 16-bit lane = *src
382         void ld1r16b(V dst, X src);  // Each  8-bit lane = *src
383 
384         void ld24s(V dst, X src);  // deinterleave(dst,dst+1)             = 256-bit *src
385         void ld44s(V dst, X src);  // deinterleave(dst,dst+1,dst+2,dst+3) = 512-bit *src
386         void st24s(V src, X dst);  // 256-bit *dst = interleave_32bit_lanes(src,src+1)
387         void st44s(V src, X dst);  // 512-bit *dst = interleave_32bit_lanes(src,src+1,src+2,src+3)
388 
389         void ld24s(V dst, X src, int lane);  // Load 2 32-bit values into given lane of dst..dst+1
390         void ld44s(V dst, X src, int lane);  // Load 4 32-bit values into given lane of dst..dst+3
391 
392     private:
393         uint8_t* fCode;
394         size_t   fSize;
395 
396         // x86-64
397         enum W { W0, W1 };      // Are the lanes 64-bit (W1) or default (W0)?  Intel Vol 2A 2.3.5.5
398         enum L { L128, L256 };  // Is this a 128- or 256-bit operation?        Intel Vol 2A 2.3.6.2
399 
400         // Helpers for vector instructions.
401         void op(int prefix, int map, int opcode, int dst, int x, Operand y, W,L);
402         void op(int p, int m, int o, Ymm d, Ymm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L256); }
403         void op(int p, int m, int o, Ymm d,        Operand y, W w=W0) { op(p,m,o, d,0,y,w,L256); }
404         void op(int p, int m, int o, Xmm d, Xmm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L128); }
405         void op(int p, int m, int o, Xmm d,        Operand y, W w=W0) { op(p,m,o, d,0,y,w,L128); }
406 
407         // Helpers for GP64 instructions.
408         void op(int opcode, Operand dst, GP64 x);
409         void op(int opcode, int opcode_ext, Operand dst, int imm);
410 
411         void jump(uint8_t condition, Label*);
412         int disp32(Label*);
413         void imm_byte_after_operand(const Operand&, int byte);
414 
415         // aarch64
416 
417         // Opcode for 3-arguments ops is split between hi and lo:
418         //    [11 bits hi] [5 bits m] [6 bits lo] [5 bits n] [5 bits d]
419         void op(uint32_t hi, V m, uint32_t lo, V n, V d);
420 
421         // 0,1,2-argument ops, with or without an immediate:
422         //    [ 22 bits op ] [5 bits n] [5 bits d]
423         // Any immediate falls in the middle somewhere overlapping with either op, n, or both.
424         void op(uint32_t op22, V n, V d, int imm=0);
425         void op(uint32_t op22, X n, V d, int imm=0) { this->op(op22,(V)n,   d,imm); }
426         void op(uint32_t op22, V n, X d, int imm=0) { this->op(op22,   n,(V)d,imm); }
427         void op(uint32_t op22, X n, X d, int imm=0) { this->op(op22,(V)n,(V)d,imm); }
428         void op(uint32_t op22,           int imm=0) { this->op(op22,(V)0,(V)0,imm); }
429         // (1-argument ops don't seem to have a consistent convention of passing as n or d.)
430 
431 
432         // Order matters... value is 4-bit encoding for condition code.
433         enum class Condition { eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,al };
434         void b(Condition, Label*);
435         int disp19(Label*);
436     };
437 
438     // Order matters a little: Ops <=store128 are treated as having side effects.
439     #define SKVM_OPS(M)                                              \
440         M(assert_true)                                               \
441         M(trace_line) M(trace_var)                                   \
442         M(trace_enter) M(trace_exit) M(trace_scope)                  \
443         M(store8)   M(store16)   M(store32) M(store64) M(store128)   \
444         M(load8)    M(load16)    M(load32)  M(load64) M(load128)     \
445         M(index)                                                     \
446         M(gather8)  M(gather16)  M(gather32)                         \
447                                  M(uniform32)                        \
448                                  M(array32)                          \
449         M(splat)                                                     \
450         M(add_f32) M(add_i32)                                        \
451         M(sub_f32) M(sub_i32)                                        \
452         M(mul_f32) M(mul_i32)                                        \
453         M(div_f32)                                                   \
454         M(min_f32) M(max_f32)                                        \
455         M(fma_f32) M(fms_f32) M(fnma_f32)                            \
456         M(sqrt_f32)                                                  \
457         M(shl_i32) M(shr_i32) M(sra_i32)                             \
458         M(ceil) M(floor) M(trunc) M(round) M(to_fp16) M(from_fp16)   \
459         M(to_f32)                                                    \
460         M(neq_f32) M(eq_f32) M(eq_i32)                               \
461         M(gte_f32) M(gt_f32) M(gt_i32)                               \
462         M(bit_and)     M(bit_or)     M(bit_xor)     M(bit_clear)     \
463         M(select)                                                    \
464         M(duplicate)
465     // End of SKVM_OPS
466 
467     enum class Op : int {
468     #define M(op) op,
469         SKVM_OPS(M)
470     #undef M
471     };
472 
has_side_effect(Op op)473     static inline bool has_side_effect(Op op) {
474         return op <= Op::store128;
475     }
touches_varying_memory(Op op)476     static inline bool touches_varying_memory(Op op) {
477         return Op::store8 <= op && op <= Op::load128;
478     }
is_always_varying(Op op)479     static inline bool is_always_varying(Op op) {
480         return Op::store8 <= op && op <= Op::index;
481     }
is_trace(Op op)482     static inline bool is_trace(Op op) {
483         return Op::trace_line <= op && op <= Op::trace_scope;
484     }
485 
486     using Val = int;
487     // We reserve an impossibe Val ID as a sentinel
488     // NA meaning none, n/a, null, nil, etc.
489     static const Val NA = -1;
490 
491     // Ptr and UPtr are an index into the registers args[]. The two styles of using args are
492     // varyings and uniforms. Varyings use Ptr, have a stride associated with them, and are
493     // evaluated everytime through the loop. Uniforms use UPtr, don't have a stride, and are
494     // usually hoisted above the loop.
495     struct Ptr { int ix; };
496     struct UPtr : public Ptr {};
497 
498     bool operator!=(Ptr a, Ptr b);
499 
500     struct I32 {
501         Builder* builder = nullptr;
502         Val      id      = NA;
503         explicit operator bool() const { return id != NA; }
504         Builder* operator->()    const { return builder; }
505     };
506 
507     struct F32 {
508         Builder* builder = nullptr;
509         Val      id      = NA;
510         explicit operator bool() const { return id != NA; }
511         Builder* operator->()    const { return builder; }
512     };
513 
514     struct Color {
515         F32 r,g,b,a;
516         explicit operator bool() const { return r && g && b && a; }
517         Builder* operator->()    const { return a.operator->(); }
518     };
519 
520     struct HSLA {
521         F32 h,s,l,a;
522         explicit operator bool() const { return h && s && l && a; }
523         Builder* operator->()    const { return a.operator->(); }
524     };
525 
526     struct Coord {
527         F32 x,y;
528         explicit operator bool() const { return x && y; }
529         Builder* operator->()    const { return x.operator->(); }
530     };
531 
532     struct Uniform {
533         UPtr ptr;
534         int offset;
535     };
536     struct Uniforms {
537         UPtr             base;
538         std::vector<int> buf;
539 
UniformsUniforms540         Uniforms(UPtr ptr, int init) : base(ptr), buf(init) {}
541 
pushUniforms542         Uniform push(int val) {
543             buf.push_back(val);
544             return {base, (int)( sizeof(int)*(buf.size() - 1) )};
545         }
546 
pushFUniforms547         Uniform pushF(float val) {
548             int bits;
549             memcpy(&bits, &val, sizeof(int));
550             return this->push(bits);
551         }
552 
pushPtrUniforms553         Uniform pushPtr(const void* ptr) {
554             // Jam the pointer into 1 or 2 ints.
555             int ints[sizeof(ptr) / sizeof(int)];
556             memcpy(ints, &ptr, sizeof(ptr));
557             for (int bits : ints) {
558                 buf.push_back(bits);
559             }
560             return {base, (int)( sizeof(int)*(buf.size() - SK_ARRAY_COUNT(ints)) )};
561         }
562 
pushArrayUniforms563         Uniform pushArray(int32_t a[]) {
564             return this->pushPtr(a);
565         }
566 
pushArrayFUniforms567         Uniform pushArrayF(float a[]) {
568             return this->pushPtr(a);
569         }
570     };
571 
572     struct PixelFormat {
573         enum { UNORM, SRGB, FLOAT} encoding;
574         int r_bits,  g_bits,  b_bits,  a_bits,
575             r_shift, g_shift, b_shift, a_shift;
576     };
577     PixelFormat SkColorType_to_PixelFormat(SkColorType);
578 
579     SK_BEGIN_REQUIRE_DENSE
580     struct Instruction {
581         Op  op;              // v* = op(x,y,z,w,immA,immB), where * == index of this Instruction.
582         Val x,y,z,w;         // Enough arguments for Op::store128.
583         int immA,immB,immC;  // Immediate bit pattern, shift count, pointer index, byte offset, etc.
584     };
585     SK_END_REQUIRE_DENSE
586 
587     bool operator==(const Instruction&, const Instruction&);
588     struct InstructionHash {
589         uint32_t operator()(const Instruction&, uint32_t seed=0) const;
590     };
591 
592     struct OptimizedInstruction {
593         Op op;
594         Val x,y,z,w;
595         int immA,immB,immC;
596 
597         Val  death;
598         bool can_hoist;
599     };
600 
601     struct Features {
602         bool fma   = false;
603         bool fp16  = false;
604     };
605 
606     class TraceHook {
607     public:
608         virtual ~TraceHook() = default;
609         virtual void line(int lineNum) = 0;
610         virtual void var(int slot, int32_t val) = 0;
611         virtual void enter(int fnIdx) = 0;
612         virtual void exit(int fnIdx) = 0;
613         virtual void scope(int delta) = 0;
614     };
615 
616     class Builder {
617     public:
618         Builder(bool createDuplicates = false);
619         Builder(Features, bool createDuplicates = false);
620 
621         Program done(const char* debug_name,
622                      bool allow_jit,
623                      std::unique_ptr<viz::Visualizer> visualizer) const;
624         Program done(const char* debug_name = nullptr,
625                      bool allow_jit=true) const;
626 
627         // Mostly for debugging, tests, etc.
program()628         std::vector<Instruction> program() const { return fProgram; }
629         std::vector<OptimizedInstruction> optimize(viz::Visualizer* visualizer = nullptr) const;
630 
631         // Returns a trace-hook ID which must be passed to the trace opcodes.
632         int attachTraceHook(TraceHook*);
633 
634         // Convenience arg() wrappers for most common strides, sizeof(T) and 0.
635         template <typename T>
varying()636         Ptr varying() { return this->arg(sizeof(T)); }
varying(int stride)637         Ptr varying(int stride) { SkASSERT(stride > 0); return this->arg(stride); }
uniform()638         UPtr uniform() { Ptr p = this->arg(0); return UPtr{{p.ix}}; }
639 
640         // TODO: allow uniform (i.e. Ptr) offsets to store* and load*?
641         // TODO: sign extension (signed types) for <32-bit loads?
642         // TODO: unsigned integer operations where relevant (just comparisons?)?
643 
644         // Assert cond is true, printing debug when not.
645         void assert_true(I32 cond, I32 debug);
assert_true(I32 cond,F32 debug)646         void assert_true(I32 cond, F32 debug) { assert_true(cond, pun_to_I32(debug)); }
assert_true(I32 cond)647         void assert_true(I32 cond)            { assert_true(cond, cond); }
648 
649         // Insert debug traces into the instruction stream
650         bool mergeMasks(I32& mask, I32& traceMask);
651         void trace_line (int traceHookID, I32 mask, I32 traceMask, int line);
652         void trace_var  (int traceHookID, I32 mask, I32 traceMask, int slot, I32 val);
653         void trace_enter(int traceHookID, I32 mask, I32 traceMask, int fnIdx);
654         void trace_exit (int traceHookID, I32 mask, I32 traceMask, int fnIdx);
655         void trace_scope(int traceHookID, I32 mask, I32 traceMask, int delta);
656 
657         // Store {8,16,32,64,128}-bit varying.
658         void store8  (Ptr ptr, I32 val);
659         void store16 (Ptr ptr, I32 val);
660         void store32 (Ptr ptr, I32 val);
storeF(Ptr ptr,F32 val)661         void storeF  (Ptr ptr, F32 val) { store32(ptr, pun_to_I32(val)); }
662         void store64 (Ptr ptr, I32 lo, I32 hi);              // *ptr = lo|(hi<<32)
663         void store128(Ptr ptr, I32 x, I32 y, I32 z, I32 w);  // *ptr = x|(y<<32)|(z<<64)|(w<<96)
664 
665         // Returns varying {n, n-1, n-2, ..., 1}, where n is the argument to Program::eval().
666         I32 index();
667 
668         // Load {8,16,32,64,128}-bit varying.
669         I32 load8  (Ptr ptr);
670         I32 load16 (Ptr ptr);
671         I32 load32 (Ptr ptr);
loadF(Ptr ptr)672         F32 loadF  (Ptr ptr) { return pun_to_F32(load32(ptr)); }
673         I32 load64 (Ptr ptr, int lane);  // Load 32-bit lane 0-1 of  64-bit value.
674         I32 load128(Ptr ptr, int lane);  // Load 32-bit lane 0-3 of 128-bit value.
675 
676         // Load i32/f32 uniform with byte-count offset.
677         I32 uniform32(UPtr ptr, int offset);
uniformF(UPtr ptr,int offset)678         F32 uniformF (UPtr ptr, int offset) { return pun_to_F32(uniform32(ptr,offset)); }
679 
680         // Load i32/f32 uniform with byte-count offset and an c-style array index. The address of
681         // the element is (*(ptr + byte-count offset))[index].
682         I32 array32  (UPtr ptr, int offset, int index);
arrayF(UPtr ptr,int offset,int index)683         F32 arrayF   (UPtr ptr, int offset, int index) {
684             return pun_to_F32(array32(ptr, offset, index));
685         }
686 
687         // Push and load this color as a uniform.
688         Color uniformColor(SkColor4f, Uniforms*);
689 
690         // Gather u8,u16,i32 with varying element-count index from *(ptr + byte-count offset).
691         I32 gather8 (UPtr ptr, int offset, I32 index);
692         I32 gather16(UPtr ptr, int offset, I32 index);
693         I32 gather32(UPtr ptr, int offset, I32 index);
gatherF(UPtr ptr,int offset,I32 index)694         F32 gatherF (UPtr ptr, int offset, I32 index) {
695             return pun_to_F32(gather32(ptr, offset, index));
696         }
697 
698         // Convenience methods for working with skvm::Uniform(s).
uniform32(Uniform u)699         I32 uniform32(Uniform u)            { return this->uniform32(u.ptr, u.offset); }
uniformF(Uniform u)700         F32 uniformF (Uniform u)            { return this->uniformF (u.ptr, u.offset); }
gather8(Uniform u,I32 index)701         I32 gather8  (Uniform u, I32 index) { return this->gather8  (u.ptr, u.offset, index); }
gather16(Uniform u,I32 index)702         I32 gather16 (Uniform u, I32 index) { return this->gather16 (u.ptr, u.offset, index); }
gather32(Uniform u,I32 index)703         I32 gather32 (Uniform u, I32 index) { return this->gather32 (u.ptr, u.offset, index); }
gatherF(Uniform u,I32 index)704         F32 gatherF  (Uniform u, I32 index) { return this->gatherF  (u.ptr, u.offset, index); }
705 
706         // Convenience methods for working with array pointers in skvm::Uniforms. Index is an
707         // array index and not a byte offset. The array pointer is stored at u.
array32(Uniform a,int index)708         I32 array32  (Uniform a, int index) { return this->array32  (a.ptr, a.offset, index); }
arrayF(Uniform a,int index)709         F32 arrayF   (Uniform a, int index) { return this->arrayF   (a.ptr, a.offset, index); }
710 
711         // Load an immediate constant.
712         I32 splat(int      n);
splat(unsigned u)713         I32 splat(unsigned u) { return splat((int)u); }
splat(float f)714         F32 splat(float    f) {
715             int bits;
716             memcpy(&bits, &f, 4);
717             return pun_to_F32(splat(bits));
718         }
719 
720         // Some operations make sense with immediate arguments,
721         // so we provide overloads inline to make that seamless.
722         //
723         // We omit overloads that may indicate a bug or performance issue.
724         // In general it does not make sense to pass immediates to unary operations,
725         // and even sometimes not for binary operations, e.g.
726         //
727         //   div(x, y)    -- normal every day divide
728         //   div(3.0f, y) -- yep, makes sense
729         //   div(x, 3.0f) -- omitted as a reminder you probably want mul(x, 1/3.0f).
730         //
731         // You can of course always splat() to override these opinions.
732 
733         // float math, comparisons, etc.
734         F32 add(F32, F32);
add(F32 x,float y)735         F32 add(F32 x, float y) { return add(x, splat(y)); }
add(float x,F32 y)736         F32 add(float x, F32 y) { return add(splat(x), y); }
737 
738         F32 sub(F32, F32);
sub(F32 x,float y)739         F32 sub(F32 x, float y) { return sub(x, splat(y)); }
sub(float x,F32 y)740         F32 sub(float x, F32 y) { return sub(splat(x), y); }
741 
742         F32 mul(F32, F32);
mul(F32 x,float y)743         F32 mul(F32 x, float y) { return mul(x, splat(y)); }
mul(float x,F32 y)744         F32 mul(float x, F32 y) { return mul(splat(x), y); }
745 
746         // mul(), but allowing optimizations not strictly legal under IEEE-754 rules.
747         F32 fast_mul(F32, F32);
fast_mul(F32 x,float y)748         F32 fast_mul(F32 x, float y) { return fast_mul(x, splat(y)); }
fast_mul(float x,F32 y)749         F32 fast_mul(float x, F32 y) { return fast_mul(splat(x), y); }
750 
751         F32 div(F32, F32);
div(float x,F32 y)752         F32 div(float x, F32 y) { return div(splat(x), y); }
753 
754         F32 min(F32, F32);
min(F32 x,float y)755         F32 min(F32 x, float y) { return min(x, splat(y)); }
min(float x,F32 y)756         F32 min(float x, F32 y) { return min(splat(x), y); }
757 
758         F32 max(F32, F32);
max(F32 x,float y)759         F32 max(F32 x, float y) { return max(x, splat(y)); }
max(float x,F32 y)760         F32 max(float x, F32 y) { return max(splat(x), y); }
761 
762         // TODO: remove mad()?  It's just sugar.
mad(F32 x,F32 y,F32 z)763         F32 mad(F32   x, F32   y, F32   z) { return add(mul(x,y), z); }
mad(F32 x,F32 y,float z)764         F32 mad(F32   x, F32   y, float z) { return mad(      x ,       y , splat(z)); }
mad(F32 x,float y,F32 z)765         F32 mad(F32   x, float y, F32   z) { return mad(      x , splat(y),       z ); }
mad(F32 x,float y,float z)766         F32 mad(F32   x, float y, float z) { return mad(      x , splat(y), splat(z)); }
mad(float x,F32 y,F32 z)767         F32 mad(float x, F32   y, F32   z) { return mad(splat(x),       y ,       z ); }
mad(float x,F32 y,float z)768         F32 mad(float x, F32   y, float z) { return mad(splat(x),       y , splat(z)); }
mad(float x,float y,F32 z)769         F32 mad(float x, float y, F32   z) { return mad(splat(x), splat(y),       z ); }
770 
771         F32        sqrt(F32);
772         F32 approx_log2(F32);
773         F32 approx_pow2(F32);
approx_log(F32 x)774         F32 approx_log (F32 x) { return mul(0.69314718f, approx_log2(x)); }
approx_exp(F32 x)775         F32 approx_exp (F32 x) { return approx_pow2(mul(x, 1.4426950408889634074f)); }
776 
777         F32 approx_powf(F32 base, F32 exp);
approx_powf(F32 base,float exp)778         F32 approx_powf(F32 base, float exp) { return approx_powf(base, splat(exp)); }
approx_powf(float base,F32 exp)779         F32 approx_powf(float base, F32 exp) { return approx_powf(splat(base), exp); }
780 
781 
782         F32 approx_sin(F32 radians);
approx_cos(F32 radians)783         F32 approx_cos(F32 radians) { return approx_sin(add(radians, SK_ScalarPI/2)); }
784         F32 approx_tan(F32 radians);
785 
786         F32 approx_asin(F32 x);
approx_acos(F32 x)787         F32 approx_acos(F32 x) { return sub(SK_ScalarPI/2, approx_asin(x)); }
788         F32 approx_atan(F32 x);
789         F32 approx_atan2(F32 y, F32 x);
790 
791         F32 lerp(F32   lo, F32   hi, F32   t);
lerp(F32 lo,F32 hi,float t)792         F32 lerp(F32   lo, F32   hi, float t) { return lerp(      lo ,       hi , splat(t)); }
lerp(F32 lo,float hi,float t)793         F32 lerp(F32   lo, float hi, float t) { return lerp(      lo , splat(hi), splat(t)); }
lerp(F32 lo,float hi,F32 t)794         F32 lerp(F32   lo, float hi, F32   t) { return lerp(      lo , splat(hi),       t ); }
lerp(float lo,F32 hi,F32 t)795         F32 lerp(float lo, F32   hi, F32   t) { return lerp(splat(lo),       hi ,       t ); }
lerp(float lo,F32 hi,float t)796         F32 lerp(float lo, F32   hi, float t) { return lerp(splat(lo),       hi , splat(t)); }
lerp(float lo,float hi,F32 t)797         F32 lerp(float lo, float hi, F32   t) { return lerp(splat(lo), splat(hi),       t ); }
798 
clamp(F32 x,F32 lo,F32 hi)799         F32 clamp(F32   x, F32   lo, F32   hi) { return max(lo, min(x, hi)); }
clamp(F32 x,F32 lo,float hi)800         F32 clamp(F32   x, F32   lo, float hi) { return clamp(      x ,       lo , splat(hi)); }
clamp(F32 x,float lo,float hi)801         F32 clamp(F32   x, float lo, float hi) { return clamp(      x , splat(lo), splat(hi)); }
clamp(F32 x,float lo,F32 hi)802         F32 clamp(F32   x, float lo, F32   hi) { return clamp(      x , splat(lo),       hi ); }
clamp(float x,F32 lo,F32 hi)803         F32 clamp(float x, F32   lo, F32   hi) { return clamp(splat(x),       lo ,       hi ); }
clamp(float x,F32 lo,float hi)804         F32 clamp(float x, F32   lo, float hi) { return clamp(splat(x),       lo , splat(hi)); }
clamp(float x,float lo,F32 hi)805         F32 clamp(float x, float lo, F32   hi) { return clamp(splat(x), splat(lo),       hi ); }
806 
clamp01(F32 x)807         F32 clamp01(F32 x) { return clamp(x, 0.0f, 1.0f); }
808 
abs(F32 x)809         F32    abs(F32 x) { return pun_to_F32(bit_and(pun_to_I32(x), 0x7fff'ffff)); }
810         F32  fract(F32 x) { return sub(x, floor(x)); }
811         F32   ceil(F32);
812         F32  floor(F32);
813         I32 is_NaN   (F32 x) { return neq(x,x); }
814         I32 is_finite(F32 x) { return lt(bit_and(pun_to_I32(x), 0x7f80'0000), 0x7f80'0000); }
815 
816         I32 trunc(F32 x);
817         I32 round(F32 x);  // Round to int using current rounding mode (as if lrintf()).
818         I32 pun_to_I32(F32 x) { return {x.builder, x.id}; }
819 
820         I32   to_fp16(F32 x);
821         F32 from_fp16(I32 x);
822 
823         I32 eq(F32, F32);
824         I32 eq(F32 x, float y) { return eq(x, splat(y)); }
825         I32 eq(float x, F32 y) { return eq(splat(x), y); }
826 
827         I32 neq(F32, F32);
828         I32 neq(F32 x, float y) { return neq(x, splat(y)); }
829         I32 neq(float x, F32 y) { return neq(splat(x), y); }
830 
831         I32 lt(F32, F32);
832         I32 lt(F32 x, float y) { return lt(x, splat(y)); }
833         I32 lt(float x, F32 y) { return lt(splat(x), y); }
834 
835         I32 lte(F32, F32);
836         I32 lte(F32 x, float y) { return lte(x, splat(y)); }
837         I32 lte(float x, F32 y) { return lte(splat(x), y); }
838 
839         I32 gt(F32, F32);
840         I32 gt(F32 x, float y) { return gt(x, splat(y)); }
841         I32 gt(float x, F32 y) { return gt(splat(x), y); }
842 
843         I32 gte(F32, F32);
844         I32 gte(F32 x, float y) { return gte(x, splat(y)); }
845         I32 gte(float x, F32 y) { return gte(splat(x), y); }
846 
847         // int math, comparisons, etc.
848         I32 add(I32, I32);
849         I32 add(I32 x, int y) { return add(x, splat(y)); }
850         I32 add(int x, I32 y) { return add(splat(x), y); }
851 
852         I32 sub(I32, I32);
853         I32 sub(I32 x, int y) { return sub(x, splat(y)); }
854         I32 sub(int x, I32 y) { return sub(splat(x), y); }
855 
856         I32 mul(I32, I32);
857         I32 mul(I32 x, int y) { return mul(x, splat(y)); }
858         I32 mul(int x, I32 y) { return mul(splat(x), y); }
859 
860         I32 shl(I32 x, int bits);
861         I32 shr(I32 x, int bits);
862         I32 sra(I32 x, int bits);
863 
864         I32 eq(I32, I32);
865         I32 eq(I32 x, int y) { return eq(x, splat(y)); }
866         I32 eq(int x, I32 y) { return eq(splat(x), y); }
867 
868         I32 neq(I32, I32);
869         I32 neq(I32 x, int y) { return neq(x, splat(y)); }
870         I32 neq(int x, I32 y) { return neq(splat(x), y); }
871 
872         I32 lt(I32, I32);
873         I32 lt(I32 x, int y) { return lt(x, splat(y)); }
874         I32 lt(int x, I32 y) { return lt(splat(x), y); }
875 
876         I32 lte(I32, I32);
877         I32 lte(I32 x, int y) { return lte(x, splat(y)); }
878         I32 lte(int x, I32 y) { return lte(splat(x), y); }
879 
880         I32 gt(I32, I32);
881         I32 gt(I32 x, int y) { return gt(x, splat(y)); }
882         I32 gt(int x, I32 y) { return gt(splat(x), y); }
883 
884         I32 gte(I32, I32);
885         I32 gte(I32 x, int y) { return gte(x, splat(y)); }
886         I32 gte(int x, I32 y) { return gte(splat(x), y); }
887 
888         F32 to_F32(I32 x);
889         F32 pun_to_F32(I32 x) { return {x.builder, x.id}; }
890 
891         // Bitwise operations.
892         I32 bit_and(I32, I32);
893         I32 bit_and(I32 x, int y) { return bit_and(x, splat(y)); }
894         I32 bit_and(int x, I32 y) { return bit_and(splat(x), y); }
895 
896         I32 bit_or(I32, I32);
897         I32 bit_or(I32 x, int y) { return bit_or(x, splat(y)); }
898         I32 bit_or(int x, I32 y) { return bit_or(splat(x), y); }
899 
900         I32 bit_xor(I32, I32);
901         I32 bit_xor(I32 x, int y) { return bit_xor(x, splat(y)); }
902         I32 bit_xor(int x, I32 y) { return bit_xor(splat(x), y); }
903 
904         I32 bit_clear(I32, I32);
905         I32 bit_clear(I32 x, int y) { return bit_clear(x, splat(y)); }
906         I32 bit_clear(int x, I32 y) { return bit_clear(splat(x), y); }
907 
908         I32 min(I32 x, I32 y) { return select(lte(x,y), x, y); }
909         I32 min(I32 x, int y) { return min(x, splat(y)); }
910         I32 min(int x, I32 y) { return min(splat(x), y); }
911 
912         I32 max(I32 x, I32 y) { return select(gte(x,y), x, y); }
913         I32 max(I32 x, int y) { return max(x, splat(y)); }
914         I32 max(int x, I32 y) { return max(splat(x), y); }
915 
916         I32 select(I32 cond, I32 t, I32 f);  // cond ? t : f
917         I32 select(I32 cond, int t, I32 f) { return select(cond, splat(t),       f ); }
918         I32 select(I32 cond, I32 t, int f) { return select(cond,       t , splat(f)); }
919         I32 select(I32 cond, int t, int f) { return select(cond, splat(t), splat(f)); }
920 
921         F32 select(I32 cond, F32 t, F32 f) {
922             return pun_to_F32(select(cond, pun_to_I32(t)
923                                          , pun_to_I32(f)));
924         }
925         F32 select(I32 cond, float t, F32   f) { return select(cond, splat(t),       f ); }
926         F32 select(I32 cond, F32   t, float f) { return select(cond,       t , splat(f)); }
927         F32 select(I32 cond, float t, float f) { return select(cond, splat(t), splat(f)); }
928 
929         I32 extract(I32 x, int bits, I32 z);   // (x>>bits) & z
930         I32 extract(I32 x, int bits, int z) { return extract(x, bits, splat(z)); }
931         I32 extract(int x, int bits, I32 z) { return extract(splat(x), bits, z); }
932 
933         I32 pack(I32 x, I32 y, int bits);   // x | (y<<bits)
934         I32 pack(I32 x, int y, int bits) { return pack(x, splat(y), bits); }
935         I32 pack(int x, I32 y, int bits) { return pack(splat(x), y, bits); }
936 
937 
938         // Common idioms used in several places, worth centralizing for consistency.
939         F32 from_unorm(int bits, I32);   // E.g. from_unorm(8, x) -> x * (1/255.0f)
940         I32   to_unorm(int bits, F32);   // E.g.   to_unorm(8, x) -> round(x * 255)
941 
942         Color   load(PixelFormat, Ptr ptr);
943         void   store(PixelFormat, Ptr ptr, Color);
944         Color gather(PixelFormat, UPtr ptr, int offset, I32 index);
945         Color gather(PixelFormat f, Uniform u, I32 index) {
946             return gather(f, u.ptr, u.offset, index);
947         }
948 
949         void   premul(F32* r, F32* g, F32* b, F32 a);
950         void unpremul(F32* r, F32* g, F32* b, F32 a);
951 
952         Color   premul(Color c) {   this->premul(&c.r, &c.g, &c.b, c.a); return c; }
953         Color unpremul(Color c) { this->unpremul(&c.r, &c.g, &c.b, c.a); return c; }
954 
955         Color lerp(Color lo, Color hi, F32 t);
956         Color blend(SkBlendMode, Color src, Color dst);
957 
958         Color clamp01(Color c) {
959             return { clamp01(c.r), clamp01(c.g), clamp01(c.b), clamp01(c.a) };
960         }
961 
962         HSLA  to_hsla(Color);
963         Color to_rgba(HSLA);
964 
965         void dump(SkWStream* = nullptr) const;
966 
967         uint64_t hash() const;
968 
969         Val push(Instruction);
970 
971         bool allImm() const { return true; }
972 
973         template <typename T, typename... Rest>
974         bool allImm(Val id, T* imm, Rest... rest) const {
975             if (fProgram[id].op == Op::splat) {
976                 static_assert(sizeof(T) == 4);
977                 memcpy(imm, &fProgram[id].immA, 4);
978                 return this->allImm(rest...);
979             }
980             return false;
981         }
982 
983         bool allUniform() const { return true; }
984 
985         template <typename... Rest>
986         bool allUniform(Val id, Uniform* uni, Rest... rest) const {
987             if (fProgram[id].op == Op::uniform32) {
988                 uni->ptr.ix = fProgram[id].immA;
989                 uni->offset = fProgram[id].immB;
990                 return this->allUniform(rest...);
991             }
992             return false;
993         }
994 
995     private:
996         // Declare an argument with given stride (use stride=0 for uniforms).
997         Ptr arg(int stride);
998 
999         Val push(
1000                 Op op, Val x=NA, Val y=NA, Val z=NA, Val w=NA, int immA=0, int immB=0, int immC=0) {
1001             return this->push(Instruction{op, x,y,z,w, immA,immB,immC});
1002         }
1003 
1004         template <typename T>
1005         bool isImm(Val id, T want) const {
1006             T imm = 0;
1007             return this->allImm(id, &imm) && imm == want;
1008         }
1009 
1010         SkTHashMap<Instruction, Val, InstructionHash> fIndex;
1011         std::vector<Instruction>                      fProgram;
1012         std::vector<TraceHook*>                       fTraceHooks;
1013         std::vector<int>                              fStrides;
1014         const Features                                fFeatures;
1015         bool                                          fCreateDuplicates;
1016     };
1017 
1018     // Optimization passes and data structures normally used by Builder::optimize(),
1019     // extracted here so they can be unit tested.
1020     std::vector<Instruction> eliminate_dead_code(std::vector<Instruction>,
1021                                                  viz::Visualizer* visualizer = nullptr);
1022     std::vector<OptimizedInstruction> finalize(std::vector<Instruction>,
1023                                                viz::Visualizer* visualizer = nullptr);
1024 
1025     using Reg = int;
1026 
1027     // d = op(x,y,z,w, immA,immB)
1028     struct InterpreterInstruction {
1029         Op  op;
1030         Reg d,x,y,z,w;
1031         int immA,immB,immC;
1032     };
1033 
1034     class Program {
1035     public:
1036         Program(const std::vector<OptimizedInstruction>& instructions,
1037                 std::unique_ptr<viz::Visualizer> visualizer,
1038                 const std::vector<int>& strides,
1039                 const std::vector<TraceHook*>& traceHooks,
1040                 const char* debug_name, bool allow_jit);
1041 
1042         Program();
1043         ~Program();
1044 
1045         Program(Program&&);
1046         Program& operator=(Program&&);
1047 
1048         Program(const Program&) = delete;
1049         Program& operator=(const Program&) = delete;
1050 
1051         void eval(int n, void* args[]) const;
1052 
1053         template <typename... T>
1054         void eval(int n, T*... arg) const {
1055             SkASSERT(sizeof...(arg) == this->nargs());
1056             // This nullptr isn't important except that it makes args[] non-empty if you pass none.
1057             void* args[] = { (void*)arg..., nullptr };
1058             this->eval(n, args);
1059         }
1060 
1061         std::vector<InterpreterInstruction> instructions() const;
1062         int  nargs() const;
1063         int  nregs() const;
1064         int  loop () const;
1065         bool empty() const;
1066 
1067         bool hasJIT() const;         // Has this Program been JITted?
1068         bool hasTraceHooks() const;  // Is this program instrumented for debugging?
1069 
1070         void visualize(SkWStream* output, const char* code) const;
1071         void dump(SkWStream* = nullptr) const;
1072         void disassemble(SkWStream* = nullptr) const;
1073         viz::Visualizer* visualizer();
1074 
1075     private:
1076         void setupInterpreter(const std::vector<OptimizedInstruction>&);
1077         void setupJIT        (const std::vector<OptimizedInstruction>&, const char* debug_name);
1078         void setupLLVM       (const std::vector<OptimizedInstruction>&, const char* debug_name);
1079 
1080         bool jit(const std::vector<OptimizedInstruction>&,
1081                  int* stack_hint, uint32_t* registers_used,
1082                  Assembler*) const;
1083 
1084         void waitForLLVM() const;
1085         void dropJIT();
1086 
1087         struct Impl;
1088         std::unique_ptr<Impl> fImpl;
1089     };
1090 
1091     // TODO: control flow
1092     // TODO: 64-bit values?
1093 
1094 #define SI static inline
1095 
1096     SI I32 operator+(I32 x, I32 y) { return x->add(x,y); }
1097     SI I32 operator+(I32 x, int y) { return x->add(x,y); }
1098     SI I32 operator+(int x, I32 y) { return y->add(x,y); }
1099 
1100     SI I32 operator-(I32 x, I32 y) { return x->sub(x,y); }
1101     SI I32 operator-(I32 x, int y) { return x->sub(x,y); }
1102     SI I32 operator-(int x, I32 y) { return y->sub(x,y); }
1103 
1104     SI I32 operator*(I32 x, I32 y) { return x->mul(x,y); }
1105     SI I32 operator*(I32 x, int y) { return x->mul(x,y); }
1106     SI I32 operator*(int x, I32 y) { return y->mul(x,y); }
1107 
min(I32 x,I32 y)1108     SI I32 min(I32 x, I32 y) { return x->min(x,y); }
min(I32 x,int y)1109     SI I32 min(I32 x, int y) { return x->min(x,y); }
min(int x,I32 y)1110     SI I32 min(int x, I32 y) { return y->min(x,y); }
1111 
max(I32 x,I32 y)1112     SI I32 max(I32 x, I32 y) { return x->max(x,y); }
max(I32 x,int y)1113     SI I32 max(I32 x, int y) { return x->max(x,y); }
max(int x,I32 y)1114     SI I32 max(int x, I32 y) { return y->max(x,y); }
1115 
1116     SI I32 operator==(I32 x, I32 y) { return x->eq(x,y); }
1117     SI I32 operator==(I32 x, int y) { return x->eq(x,y); }
1118     SI I32 operator==(int x, I32 y) { return y->eq(x,y); }
1119 
1120     SI I32 operator!=(I32 x, I32 y) { return x->neq(x,y); }
1121     SI I32 operator!=(I32 x, int y) { return x->neq(x,y); }
1122     SI I32 operator!=(int x, I32 y) { return y->neq(x,y); }
1123 
1124     SI I32 operator< (I32 x, I32 y) { return x->lt(x,y); }
1125     SI I32 operator< (I32 x, int y) { return x->lt(x,y); }
1126     SI I32 operator< (int x, I32 y) { return y->lt(x,y); }
1127 
1128     SI I32 operator<=(I32 x, I32 y) { return x->lte(x,y); }
1129     SI I32 operator<=(I32 x, int y) { return x->lte(x,y); }
1130     SI I32 operator<=(int x, I32 y) { return y->lte(x,y); }
1131 
1132     SI I32 operator> (I32 x, I32 y) { return x->gt(x,y); }
1133     SI I32 operator> (I32 x, int y) { return x->gt(x,y); }
1134     SI I32 operator> (int x, I32 y) { return y->gt(x,y); }
1135 
1136     SI I32 operator>=(I32 x, I32 y) { return x->gte(x,y); }
1137     SI I32 operator>=(I32 x, int y) { return x->gte(x,y); }
1138     SI I32 operator>=(int x, I32 y) { return y->gte(x,y); }
1139 
1140 
1141     SI F32 operator+(F32   x, F32   y) { return x->add(x,y); }
1142     SI F32 operator+(F32   x, float y) { return x->add(x,y); }
1143     SI F32 operator+(float x, F32   y) { return y->add(x,y); }
1144 
1145     SI F32 operator-(F32   x, F32   y) { return x->sub(x,y); }
1146     SI F32 operator-(F32   x, float y) { return x->sub(x,y); }
1147     SI F32 operator-(float x, F32   y) { return y->sub(x,y); }
1148 
1149     SI F32 operator*(F32   x, F32   y) { return x->mul(x,y); }
1150     SI F32 operator*(F32   x, float y) { return x->mul(x,y); }
1151     SI F32 operator*(float x, F32   y) { return y->mul(x,y); }
1152 
fast_mul(F32 x,F32 y)1153     SI F32 fast_mul(F32   x, F32   y) { return x->fast_mul(x,y); }
fast_mul(F32 x,float y)1154     SI F32 fast_mul(F32   x, float y) { return x->fast_mul(x,y); }
fast_mul(float x,F32 y)1155     SI F32 fast_mul(float x, F32   y) { return y->fast_mul(x,y); }
1156 
1157     SI F32 operator/(F32   x, F32  y) { return x->div(x,y); }
1158     SI F32 operator/(float x, F32  y) { return y->div(x,y); }
1159 
min(F32 x,F32 y)1160     SI F32 min(F32   x, F32   y) { return x->min(x,y); }
min(F32 x,float y)1161     SI F32 min(F32   x, float y) { return x->min(x,y); }
min(float x,F32 y)1162     SI F32 min(float x, F32   y) { return y->min(x,y); }
1163 
max(F32 x,F32 y)1164     SI F32 max(F32   x, F32   y) { return x->max(x,y); }
max(F32 x,float y)1165     SI F32 max(F32   x, float y) { return x->max(x,y); }
max(float x,F32 y)1166     SI F32 max(float x, F32   y) { return y->max(x,y); }
1167 
1168     SI I32 operator==(F32   x, F32   y) { return x->eq(x,y); }
1169     SI I32 operator==(F32   x, float y) { return x->eq(x,y); }
1170     SI I32 operator==(float x, F32   y) { return y->eq(x,y); }
1171 
1172     SI I32 operator!=(F32   x, F32   y) { return x->neq(x,y); }
1173     SI I32 operator!=(F32   x, float y) { return x->neq(x,y); }
1174     SI I32 operator!=(float x, F32   y) { return y->neq(x,y); }
1175 
1176     SI I32 operator< (F32   x, F32   y) { return x->lt(x,y); }
1177     SI I32 operator< (F32   x, float y) { return x->lt(x,y); }
1178     SI I32 operator< (float x, F32   y) { return y->lt(x,y); }
1179 
1180     SI I32 operator<=(F32   x, F32   y) { return x->lte(x,y); }
1181     SI I32 operator<=(F32   x, float y) { return x->lte(x,y); }
1182     SI I32 operator<=(float x, F32   y) { return y->lte(x,y); }
1183 
1184     SI I32 operator> (F32   x, F32   y) { return x->gt(x,y); }
1185     SI I32 operator> (F32   x, float y) { return x->gt(x,y); }
1186     SI I32 operator> (float x, F32   y) { return y->gt(x,y); }
1187 
1188     SI I32 operator>=(F32   x, F32   y) { return x->gte(x,y); }
1189     SI I32 operator>=(F32   x, float y) { return x->gte(x,y); }
1190     SI I32 operator>=(float x, F32   y) { return y->gte(x,y); }
1191 
1192     SI I32& operator+=(I32& x, I32 y) { return (x = x + y); }
1193     SI I32& operator+=(I32& x, int y) { return (x = x + y); }
1194 
1195     SI I32& operator-=(I32& x, I32 y) { return (x = x - y); }
1196     SI I32& operator-=(I32& x, int y) { return (x = x - y); }
1197 
1198     SI I32& operator*=(I32& x, I32 y) { return (x = x * y); }
1199     SI I32& operator*=(I32& x, int y) { return (x = x * y); }
1200 
1201     SI F32& operator+=(F32& x, F32   y) { return (x = x + y); }
1202     SI F32& operator+=(F32& x, float y) { return (x = x + y); }
1203 
1204     SI F32& operator-=(F32& x, F32   y) { return (x = x - y); }
1205     SI F32& operator-=(F32& x, float y) { return (x = x - y); }
1206 
1207     SI F32& operator*=(F32& x, F32   y) { return (x = x * y); }
1208     SI F32& operator*=(F32& x, float y) { return (x = x * y); }
1209 
1210     SI F32& operator/=(F32& x, F32   y) { return (x = x / y); }
1211 
assert_true(I32 cond,I32 debug)1212     SI void assert_true(I32 cond, I32 debug) { cond->assert_true(cond,debug); }
assert_true(I32 cond,F32 debug)1213     SI void assert_true(I32 cond, F32 debug) { cond->assert_true(cond,debug); }
assert_true(I32 cond)1214     SI void assert_true(I32 cond)            { cond->assert_true(cond); }
1215 
store8(Ptr ptr,I32 val)1216     SI void store8  (Ptr ptr, I32 val)                    { val->store8  (ptr, val); }
store16(Ptr ptr,I32 val)1217     SI void store16 (Ptr ptr, I32 val)                    { val->store16 (ptr, val); }
store32(Ptr ptr,I32 val)1218     SI void store32 (Ptr ptr, I32 val)                    { val->store32 (ptr, val); }
storeF(Ptr ptr,F32 val)1219     SI void storeF  (Ptr ptr, F32 val)                    { val->storeF  (ptr, val); }
store64(Ptr ptr,I32 lo,I32 hi)1220     SI void store64 (Ptr ptr, I32 lo, I32 hi)             { lo ->store64 (ptr, lo,hi); }
store128(Ptr ptr,I32 x,I32 y,I32 z,I32 w)1221     SI void store128(Ptr ptr, I32 x, I32 y, I32 z, I32 w) { x  ->store128(ptr, x,y,z,w); }
1222 
gather8(UPtr ptr,int off,I32 ix)1223     SI I32 gather8 (UPtr ptr, int off, I32 ix) { return ix->gather8 (ptr, off, ix); }
gather16(UPtr ptr,int off,I32 ix)1224     SI I32 gather16(UPtr ptr, int off, I32 ix) { return ix->gather16(ptr, off, ix); }
gather32(UPtr ptr,int off,I32 ix)1225     SI I32 gather32(UPtr ptr, int off, I32 ix) { return ix->gather32(ptr, off, ix); }
gatherF(UPtr ptr,int off,I32 ix)1226     SI F32 gatherF (UPtr ptr, int off, I32 ix) { return ix->gatherF (ptr, off, ix); }
1227 
gather8(Uniform u,I32 ix)1228     SI I32 gather8 (Uniform u, I32 ix) { return ix->gather8 (u, ix); }
gather16(Uniform u,I32 ix)1229     SI I32 gather16(Uniform u, I32 ix) { return ix->gather16(u, ix); }
gather32(Uniform u,I32 ix)1230     SI I32 gather32(Uniform u, I32 ix) { return ix->gather32(u, ix); }
gatherF(Uniform u,I32 ix)1231     SI F32 gatherF (Uniform u, I32 ix) { return ix->gatherF (u, ix); }
1232 
sqrt(F32 x)1233     SI F32        sqrt(F32 x) { return x->       sqrt(x); }
approx_log2(F32 x)1234     SI F32 approx_log2(F32 x) { return x->approx_log2(x); }
approx_pow2(F32 x)1235     SI F32 approx_pow2(F32 x) { return x->approx_pow2(x); }
approx_log(F32 x)1236     SI F32 approx_log (F32 x) { return x->approx_log (x); }
approx_exp(F32 x)1237     SI F32 approx_exp (F32 x) { return x->approx_exp (x); }
1238 
approx_powf(F32 base,F32 exp)1239     SI F32 approx_powf(F32   base, F32   exp) { return base->approx_powf(base, exp); }
approx_powf(F32 base,float exp)1240     SI F32 approx_powf(F32   base, float exp) { return base->approx_powf(base, exp); }
approx_powf(float base,F32 exp)1241     SI F32 approx_powf(float base, F32   exp) { return  exp->approx_powf(base, exp); }
1242 
approx_sin(F32 radians)1243     SI F32 approx_sin(F32 radians) { return radians->approx_sin(radians); }
approx_cos(F32 radians)1244     SI F32 approx_cos(F32 radians) { return radians->approx_cos(radians); }
approx_tan(F32 radians)1245     SI F32 approx_tan(F32 radians) { return radians->approx_tan(radians); }
1246 
approx_asin(F32 x)1247     SI F32 approx_asin(F32 x) { return x->approx_asin(x); }
approx_acos(F32 x)1248     SI F32 approx_acos(F32 x) { return x->approx_acos(x); }
approx_atan(F32 x)1249     SI F32 approx_atan(F32 x) { return x->approx_atan(x); }
approx_atan2(F32 y,F32 x)1250     SI F32 approx_atan2(F32 y, F32 x) { return x->approx_atan2(y, x); }
1251 
clamp01(F32 x)1252     SI F32   clamp01(F32 x) { return x->  clamp01(x); }
abs(F32 x)1253     SI F32       abs(F32 x) { return x->      abs(x); }
ceil(F32 x)1254     SI F32      ceil(F32 x) { return x->     ceil(x); }
fract(F32 x)1255     SI F32     fract(F32 x) { return x->    fract(x); }
floor(F32 x)1256     SI F32     floor(F32 x) { return x->    floor(x); }
is_NaN(F32 x)1257     SI I32    is_NaN(F32 x) { return x->   is_NaN(x); }
is_finite(F32 x)1258     SI I32 is_finite(F32 x) { return x->is_finite(x); }
1259 
trunc(F32 x)1260     SI I32      trunc(F32 x) { return x->      trunc(x); }
round(F32 x)1261     SI I32      round(F32 x) { return x->      round(x); }
pun_to_I32(F32 x)1262     SI I32 pun_to_I32(F32 x) { return x-> pun_to_I32(x); }
pun_to_F32(I32 x)1263     SI F32 pun_to_F32(I32 x) { return x-> pun_to_F32(x); }
to_F32(I32 x)1264     SI F32     to_F32(I32 x) { return x->     to_F32(x); }
to_fp16(F32 x)1265     SI I32    to_fp16(F32 x) { return x->    to_fp16(x); }
from_fp16(I32 x)1266     SI F32  from_fp16(I32 x) { return x->  from_fp16(x); }
1267 
lerp(F32 lo,F32 hi,F32 t)1268     SI F32 lerp(F32   lo, F32   hi, F32   t) { return lo->lerp(lo,hi,t); }
lerp(F32 lo,F32 hi,float t)1269     SI F32 lerp(F32   lo, F32   hi, float t) { return lo->lerp(lo,hi,t); }
lerp(F32 lo,float hi,F32 t)1270     SI F32 lerp(F32   lo, float hi, F32   t) { return lo->lerp(lo,hi,t); }
lerp(F32 lo,float hi,float t)1271     SI F32 lerp(F32   lo, float hi, float t) { return lo->lerp(lo,hi,t); }
lerp(float lo,F32 hi,F32 t)1272     SI F32 lerp(float lo, F32   hi, F32   t) { return hi->lerp(lo,hi,t); }
lerp(float lo,F32 hi,float t)1273     SI F32 lerp(float lo, F32   hi, float t) { return hi->lerp(lo,hi,t); }
lerp(float lo,float hi,F32 t)1274     SI F32 lerp(float lo, float hi, F32   t) { return  t->lerp(lo,hi,t); }
1275 
clamp(F32 x,F32 lo,F32 hi)1276     SI F32 clamp(F32   x, F32   lo, F32   hi) { return  x->clamp(x,lo,hi); }
clamp(F32 x,F32 lo,float hi)1277     SI F32 clamp(F32   x, F32   lo, float hi) { return  x->clamp(x,lo,hi); }
clamp(F32 x,float lo,F32 hi)1278     SI F32 clamp(F32   x, float lo, F32   hi) { return  x->clamp(x,lo,hi); }
clamp(F32 x,float lo,float hi)1279     SI F32 clamp(F32   x, float lo, float hi) { return  x->clamp(x,lo,hi); }
clamp(float x,F32 lo,F32 hi)1280     SI F32 clamp(float x, F32   lo, F32   hi) { return lo->clamp(x,lo,hi); }
clamp(float x,F32 lo,float hi)1281     SI F32 clamp(float x, F32   lo, float hi) { return lo->clamp(x,lo,hi); }
clamp(float x,float lo,F32 hi)1282     SI F32 clamp(float x, float lo, F32   hi) { return hi->clamp(x,lo,hi); }
1283 
1284     SI I32 operator<<(I32 x, int bits) { return x->shl(x, bits); }
shl(I32 x,int bits)1285     SI I32        shl(I32 x, int bits) { return x->shl(x, bits); }
shr(I32 x,int bits)1286     SI I32        shr(I32 x, int bits) { return x->shr(x, bits); }
sra(I32 x,int bits)1287     SI I32        sra(I32 x, int bits) { return x->sra(x, bits); }
1288 
1289     SI I32 operator&(I32 x, I32 y) { return x->bit_and(x,y); }
1290     SI I32 operator&(I32 x, int y) { return x->bit_and(x,y); }
1291     SI I32 operator&(int x, I32 y) { return y->bit_and(x,y); }
1292 
1293     SI I32 operator|(I32 x, I32 y) { return x->bit_or (x,y); }
1294     SI I32 operator|(I32 x, int y) { return x->bit_or (x,y); }
1295     SI I32 operator|(int x, I32 y) { return y->bit_or (x,y); }
1296 
1297     SI I32 operator^(I32 x, I32 y) { return x->bit_xor(x,y); }
1298     SI I32 operator^(I32 x, int y) { return x->bit_xor(x,y); }
1299     SI I32 operator^(int x, I32 y) { return y->bit_xor(x,y); }
1300 
1301     SI I32& operator&=(I32& x, I32 y) { return (x = x & y); }
1302     SI I32& operator&=(I32& x, int y) { return (x = x & y); }
1303     SI I32& operator|=(I32& x, I32 y) { return (x = x | y); }
1304     SI I32& operator|=(I32& x, int y) { return (x = x | y); }
1305     SI I32& operator^=(I32& x, I32 y) { return (x = x ^ y); }
1306     SI I32& operator^=(I32& x, int y) { return (x = x ^ y); }
1307 
bit_clear(I32 x,I32 y)1308     SI I32 bit_clear(I32 x, I32 y) { return x->bit_clear(x,y); }
bit_clear(I32 x,int y)1309     SI I32 bit_clear(I32 x, int y) { return x->bit_clear(x,y); }
bit_clear(int x,I32 y)1310     SI I32 bit_clear(int x, I32 y) { return y->bit_clear(x,y); }
1311 
select(I32 c,I32 t,I32 f)1312     SI I32 select(I32 c, I32 t, I32 f) { return c->select(c,          t ,          f ); }
select(I32 c,I32 t,int f)1313     SI I32 select(I32 c, I32 t, int f) { return c->select(c,          t , c->splat(f)); }
select(I32 c,int t,I32 f)1314     SI I32 select(I32 c, int t, I32 f) { return c->select(c, c->splat(t),          f ); }
select(I32 c,int t,int f)1315     SI I32 select(I32 c, int t, int f) { return c->select(c, c->splat(t), c->splat(f)); }
1316 
select(I32 c,F32 t,F32 f)1317     SI F32 select(I32 c, F32   t, F32   f) { return c->select(c,          t ,          f ); }
select(I32 c,F32 t,float f)1318     SI F32 select(I32 c, F32   t, float f) { return c->select(c,          t , c->splat(f)); }
select(I32 c,float t,F32 f)1319     SI F32 select(I32 c, float t, F32   f) { return c->select(c, c->splat(t),          f ); }
select(I32 c,float t,float f)1320     SI F32 select(I32 c, float t, float f) { return c->select(c, c->splat(t), c->splat(f)); }
1321 
extract(I32 x,int bits,I32 z)1322     SI I32 extract(I32 x, int bits, I32 z) { return x->extract(x,bits,z); }
extract(I32 x,int bits,int z)1323     SI I32 extract(I32 x, int bits, int z) { return x->extract(x,bits,z); }
extract(int x,int bits,I32 z)1324     SI I32 extract(int x, int bits, I32 z) { return z->extract(x,bits,z); }
1325 
pack(I32 x,I32 y,int bits)1326     SI I32 pack(I32 x, I32 y, int bits) { return x->pack   (x,y,bits); }
pack(I32 x,int y,int bits)1327     SI I32 pack(I32 x, int y, int bits) { return x->pack   (x,y,bits); }
pack(int x,I32 y,int bits)1328     SI I32 pack(int x, I32 y, int bits) { return y->pack   (x,y,bits); }
1329 
1330     SI I32 operator~(I32 x) { return ~0 ^ x; }
1331     SI I32 operator-(I32 x) { return  0 - x; }
1332     SI F32 operator-(F32 x) { return 0.0f - x; }
1333 
from_unorm(int bits,I32 x)1334     SI F32 from_unorm(int bits, I32 x) { return x->from_unorm(bits,x); }
to_unorm(int bits,F32 x)1335     SI I32   to_unorm(int bits, F32 x) { return x->  to_unorm(bits,x); }
1336 
store(PixelFormat f,Ptr p,Color c)1337     SI void store(PixelFormat f, Ptr p, Color c) { return c->store(f,p,c); }
1338 
gather(PixelFormat f,UPtr p,int off,I32 ix)1339     SI Color gather(PixelFormat f, UPtr p, int off, I32 ix) { return ix->gather(f,p,off,ix); }
gather(PixelFormat f,Uniform u,I32 ix)1340     SI Color gather(PixelFormat f, Uniform u     , I32 ix)  { return ix->gather(f,u,ix); }
1341 
premul(F32 * r,F32 * g,F32 * b,F32 a)1342     SI void   premul(F32* r, F32* g, F32* b, F32 a) { a->  premul(r,g,b,a); }
unpremul(F32 * r,F32 * g,F32 * b,F32 a)1343     SI void unpremul(F32* r, F32* g, F32* b, F32 a) { a->unpremul(r,g,b,a); }
1344 
premul(Color c)1345     SI Color   premul(Color c) { return c->  premul(c); }
unpremul(Color c)1346     SI Color unpremul(Color c) { return c->unpremul(c); }
1347 
lerp(Color lo,Color hi,F32 t)1348     SI Color lerp(Color lo, Color hi, F32 t) { return t->lerp(lo,hi,t); }
1349 
blend(SkBlendMode m,Color s,Color d)1350     SI Color blend(SkBlendMode m, Color s, Color d) { return s->blend(m,s,d); }
1351 
clamp01(Color c)1352     SI Color clamp01(Color c) { return c->clamp01(c); }
1353 
to_hsla(Color c)1354     SI HSLA  to_hsla(Color c) { return c->to_hsla(c); }
to_rgba(HSLA c)1355     SI Color to_rgba(HSLA  c) { return c->to_rgba(c); }
1356 
1357     // Evaluate polynomials: ax^n + bx^(n-1) + ... for n >= 1
1358     template <typename F32_or_float, typename... Rest>
poly(F32 x,F32_or_float a,float b,Rest...rest)1359     SI F32 poly(F32 x, F32_or_float a, float b, Rest... rest) {
1360         if constexpr (sizeof...(rest) == 0) {
1361             return x*a+b;
1362         } else {
1363             return poly(x, x*a+b, rest...);
1364         }
1365     }
1366 #undef SI
1367 }  // namespace skvm
1368 
1369 #endif//SkVM_DEFINED
1370