• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2019 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef SkVM_DEFINED
9 #define SkVM_DEFINED
10 
11 #include "include/core/SkBlendMode.h"
12 #include "include/core/SkColor.h"
13 #include "include/core/SkColorType.h"
14 #include "include/core/SkSpan.h"
15 #include "include/private/base/SkMacros.h"
16 #include "include/private/base/SkTArray.h"
17 #include "src/core/SkTHash.h"
18 #include "src/core/SkVM_fwd.h"
19 #include <vector>      // std::vector
20 
21 class SkWStream;
22 
23 #if defined(SKVM_JIT_WHEN_POSSIBLE) && !defined(SK_BUILD_FOR_IOS)
24     #if defined(__x86_64__) || defined(_M_X64)
25         #if defined(_WIN32) || defined(__linux) || defined(__APPLE__)
26             #define SKVM_JIT
27         #endif
28     #endif
29     #if defined(__aarch64__)
30         #if defined(__ANDROID__) || defined(__APPLE__)
31             #define SKVM_JIT
32         #endif
33     #endif
34 #endif
35 
36 #if 0
37     #undef SKVM_JIT
38 #endif
39 
40 namespace skvm {
41 
42     namespace viz {
43         class Visualizer;
44     }
45 
46     class Assembler {
47     public:
48         explicit Assembler(void* buf);
49 
50         size_t size() const;
51 
52         // Order matters... GP64, Xmm, Ymm values match 4-bit register encoding for each.
53         enum GP64 {
54             rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
55             r8 , r9 , r10, r11, r12, r13, r14, r15,
56         };
57         enum Xmm {
58             xmm0, xmm1, xmm2 , xmm3 , xmm4 , xmm5 , xmm6 , xmm7 ,
59             xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
60         };
61         enum Ymm {
62             ymm0, ymm1, ymm2 , ymm3 , ymm4 , ymm5 , ymm6 , ymm7 ,
63             ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15,
64         };
65 
66         // X and V values match 5-bit encoding for each (nothing tricky).
67         enum X {
68             x0 , x1 , x2 , x3 , x4 , x5 , x6 , x7 ,
69             x8 , x9 , x10, x11, x12, x13, x14, x15,
70             x16, x17, x18, x19, x20, x21, x22, x23,
71             x24, x25, x26, x27, x28, x29, x30, xzr, sp=xzr,
72         };
73         enum V {
74             v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 ,
75             v8 , v9 , v10, v11, v12, v13, v14, v15,
76             v16, v17, v18, v19, v20, v21, v22, v23,
77             v24, v25, v26, v27, v28, v29, v30, v31,
78         };
79 
80         void bytes(const void*, int);
81         void byte(uint8_t);
82         void word(uint32_t);
83 
84         struct Label {
85             int                                      offset = 0;
86             enum { NotYetSet, ARMDisp19, X86Disp32 } kind = NotYetSet;
87             SkSTArray<2, int>                        references;
88         };
89 
90         // x86-64
91 
92         void align(int mod);
93 
94         void int3();
95         void vzeroupper();
96         void ret();
97 
98         // Mem represents a value at base + disp + scale*index,
99         // or simply at base + disp if index=rsp.
100         enum Scale { ONE, TWO, FOUR, EIGHT };
101         struct Mem {
102             GP64  base;
103             int   disp  = 0;
104             GP64  index = rsp;
105             Scale scale = ONE;
106         };
107 
108         struct Operand {
109             union {
110                 int    reg;
111                 Mem    mem;
112                 Label* label;
113             };
114             enum { REG, MEM, LABEL } kind;
115 
OperandOperand116             Operand(GP64   r) : reg  (r), kind(REG  ) {}
OperandOperand117             Operand(Xmm    r) : reg  (r), kind(REG  ) {}
OperandOperand118             Operand(Ymm    r) : reg  (r), kind(REG  ) {}
OperandOperand119             Operand(Mem    m) : mem  (m), kind(MEM  ) {}
OperandOperand120             Operand(Label* l) : label(l), kind(LABEL) {}
121         };
122 
123         void vpand (Ymm dst, Ymm x, Operand y);
124         void vpandn(Ymm dst, Ymm x, Operand y);
125         void vpor  (Ymm dst, Ymm x, Operand y);
126         void vpxor (Ymm dst, Ymm x, Operand y);
127 
128         void vpaddd (Ymm dst, Ymm x, Operand y);
129         void vpsubd (Ymm dst, Ymm x, Operand y);
130         void vpmulld(Ymm dst, Ymm x, Operand y);
131 
132         void vpaddw   (Ymm dst, Ymm x, Operand y);
133         void vpsubw   (Ymm dst, Ymm x, Operand y);
134         void vpmullw  (Ymm dst, Ymm x, Operand y);
135 
136         void vpabsw   (Ymm dst, Operand x);
137         void vpavgw   (Ymm dst, Ymm x, Operand y);  // dst = (x+y+1)>>1, unsigned.
138         void vpmulhrsw(Ymm dst, Ymm x, Operand y);  // dst = (x*y + (1<<14)) >> 15, signed.
139         void vpminsw  (Ymm dst, Ymm x, Operand y);
140         void vpminuw  (Ymm dst, Ymm x, Operand y);
141         void vpmaxsw  (Ymm dst, Ymm x, Operand y);
142         void vpmaxuw  (Ymm dst, Ymm x, Operand y);
143 
144         void vaddps(Ymm dst, Ymm x, Operand y);
145         void vsubps(Ymm dst, Ymm x, Operand y);
146         void vmulps(Ymm dst, Ymm x, Operand y);
147         void vdivps(Ymm dst, Ymm x, Operand y);
148         void vminps(Ymm dst, Ymm x, Operand y);
149         void vmaxps(Ymm dst, Ymm x, Operand y);
150 
151         void vsqrtps(Ymm dst, Operand x);
152 
153         void vfmadd132ps(Ymm dst, Ymm x, Operand y);
154         void vfmadd213ps(Ymm dst, Ymm x, Operand y);
155         void vfmadd231ps(Ymm dst, Ymm x, Operand y);
156 
157         void vfmsub132ps(Ymm dst, Ymm x, Operand y);
158         void vfmsub213ps(Ymm dst, Ymm x, Operand y);
159         void vfmsub231ps(Ymm dst, Ymm x, Operand y);
160 
161         void vfnmadd132ps(Ymm dst, Ymm x, Operand y);
162         void vfnmadd213ps(Ymm dst, Ymm x, Operand y);
163         void vfnmadd231ps(Ymm dst, Ymm x, Operand y);
164 
165         void vpackusdw(Ymm dst, Ymm x, Operand y);
166         void vpackuswb(Ymm dst, Ymm x, Operand y);
167 
168         void vpunpckldq(Ymm dst, Ymm x, Operand y);
169         void vpunpckhdq(Ymm dst, Ymm x, Operand y);
170 
171         void vpcmpeqd(Ymm dst, Ymm x, Operand y);
172         void vpcmpgtd(Ymm dst, Ymm x, Operand y);
173         void vpcmpeqw(Ymm dst, Ymm x, Operand y);
174         void vpcmpgtw(Ymm dst, Ymm x, Operand y);
175 
176         void vcmpps   (Ymm dst, Ymm x, Operand y, int imm);
vcmpeqps(Ymm dst,Ymm x,Operand y)177         void vcmpeqps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,0); }
vcmpltps(Ymm dst,Ymm x,Operand y)178         void vcmpltps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,1); }
vcmpleps(Ymm dst,Ymm x,Operand y)179         void vcmpleps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,2); }
vcmpneqps(Ymm dst,Ymm x,Operand y)180         void vcmpneqps(Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,4); }
181 
182         // Sadly, the x parameter cannot be a general Operand for these shifts.
183         void vpslld(Ymm dst, Ymm x, int imm);
184         void vpsrld(Ymm dst, Ymm x, int imm);
185         void vpsrad(Ymm dst, Ymm x, int imm);
186 
187         void vpsllw(Ymm dst, Ymm x, int imm);
188         void vpsrlw(Ymm dst, Ymm x, int imm);
189         void vpsraw(Ymm dst, Ymm x, int imm);
190 
191         void vpermq    (Ymm dst, Operand x, int imm);
192         void vperm2f128(Ymm dst, Ymm x, Operand y, int imm);
193         void vpermps   (Ymm dst, Ymm ix, Operand src);        // dst[i] = src[ix[i]]
194 
195         enum Rounding { NEAREST, FLOOR, CEIL, TRUNC, CURRENT };
196         void vroundps(Ymm dst, Operand x, Rounding);
197 
198         void vmovdqa(Ymm dst, Operand x);
199         void vmovups(Ymm dst, Operand x);
200         void vmovups(Xmm dst, Operand x);
201         void vmovups(Operand dst, Ymm x);
202         void vmovups(Operand dst, Xmm x);
203 
204         void vcvtdq2ps (Ymm dst, Operand x);
205         void vcvttps2dq(Ymm dst, Operand x);
206         void vcvtps2dq (Ymm dst, Operand x);
207 
208         void vcvtps2ph(Operand dst, Ymm x, Rounding);
209         void vcvtph2ps(Ymm dst, Operand x);
210 
211         void vpblendvb(Ymm dst, Ymm x, Operand y, Ymm z);
212 
213         void vpshufb(Ymm dst, Ymm x, Operand y);
214 
215         void vptest(Ymm x, Operand y);
216 
217         void vbroadcastss(Ymm dst, Operand y);
218 
219         void vpmovzxwd(Ymm dst, Operand src);   // dst = src, 128-bit, uint16_t -> int
220         void vpmovzxbd(Ymm dst, Operand src);   // dst = src,  64-bit, uint8_t  -> int
221 
222         void vmovq(Operand dst, Xmm src);  // dst = src,  64-bit
223         void vmovd(Operand dst, Xmm src);  // dst = src,  32-bit
224         void vmovd(Xmm dst, Operand src);  // dst = src,  32-bit
225 
226         void vpinsrd(Xmm dst, Xmm src, Operand y, int imm);  // dst = src; dst[imm] = y, 32-bit
227         void vpinsrw(Xmm dst, Xmm src, Operand y, int imm);  // dst = src; dst[imm] = y, 16-bit
228         void vpinsrb(Xmm dst, Xmm src, Operand y, int imm);  // dst = src; dst[imm] = y,  8-bit
229 
230         void vextracti128(Operand dst, Ymm src, int imm);    // dst = src[imm], 128-bit
231         void vpextrd     (Operand dst, Xmm src, int imm);    // dst = src[imm],  32-bit
232         void vpextrw     (Operand dst, Xmm src, int imm);    // dst = src[imm],  16-bit
233         void vpextrb     (Operand dst, Xmm src, int imm);    // dst = src[imm],   8-bit
234 
235         // if (mask & 0x8000'0000) {
236         //     dst = base[scale*ix];
237         // }
238         // mask = 0;
239         void vgatherdps(Ymm dst, Scale scale, Ymm ix, GP64 base, Ymm mask);
240 
241 
242         void label(Label*);
243 
244         void jmp(Label*);
245         void je (Label*);
246         void jne(Label*);
247         void jl (Label*);
248         void jc (Label*);
249 
250         void add (Operand dst, int imm);
251         void sub (Operand dst, int imm);
252         void cmp (Operand dst, int imm);
253         void mov (Operand dst, int imm);
254         void movb(Operand dst, int imm);
255 
256         void add (Operand dst, GP64 x);
257         void sub (Operand dst, GP64 x);
258         void cmp (Operand dst, GP64 x);
259         void mov (Operand dst, GP64 x);
260         void movb(Operand dst, GP64 x);
261 
262         void add (GP64 dst, Operand x);
263         void sub (GP64 dst, Operand x);
264         void cmp (GP64 dst, Operand x);
265         void mov (GP64 dst, Operand x);
266         void movb(GP64 dst, Operand x);
267 
268         // Disambiguators... choice is arbitrary (but generates different code!).
add(GP64 dst,GP64 x)269         void add (GP64 dst, GP64 x) { this->add (Operand(dst), x); }
sub(GP64 dst,GP64 x)270         void sub (GP64 dst, GP64 x) { this->sub (Operand(dst), x); }
cmp(GP64 dst,GP64 x)271         void cmp (GP64 dst, GP64 x) { this->cmp (Operand(dst), x); }
mov(GP64 dst,GP64 x)272         void mov (GP64 dst, GP64 x) { this->mov (Operand(dst), x); }
movb(GP64 dst,GP64 x)273         void movb(GP64 dst, GP64 x) { this->movb(Operand(dst), x); }
274 
275         void movzbq(GP64 dst, Operand x);  // dst = x, uint8_t  -> int
276         void movzwq(GP64 dst, Operand x);  // dst = x, uint16_t -> int
277 
278         // aarch64
279 
280         // d = op(n,m)
281         using DOpNM = void(V d, V n, V m);
282         DOpNM  and16b, orr16b, eor16b, bic16b, bsl16b,
283                add4s,  sub4s,  mul4s,
284               cmeq4s, cmgt4s,
285                        sub8h,  mul8h,
286               fadd4s, fsub4s, fmul4s, fdiv4s, fmin4s, fmax4s,
287               fcmeq4s, fcmgt4s, fcmge4s,
288               tbl,
289               uzp14s, uzp24s,
290               zip14s, zip24s;
291 
292         // TODO: there are also float ==,<,<=,>,>= instructions with an immediate 0.0f,
293         // and the register comparison > and >= can also compare absolute values.  Interesting.
294 
295         // d += n*m
296         void fmla4s(V d, V n, V m);
297 
298         // d -= n*m
299         void fmls4s(V d, V n, V m);
300 
301         // d = op(n,imm)
302         using DOpNImm = void(V d, V n, int imm);
303         DOpNImm sli4s,
304                 shl4s, sshr4s, ushr4s,
305                                ushr8h;
306 
307         // d = op(n)
308         using DOpN = void(V d, V n);
309         DOpN not16b,    // d = ~n
310              fneg4s,    // d = -n
311              fsqrt4s,   // d = sqrtf(n)
312              scvtf4s,   // int -> float
313              fcvtzs4s,  // truncate float -> int
314              fcvtns4s,  // round float -> int  (nearest even)
315              frintp4s,  // round float -> int as float, toward plus infinity  (ceil)
316              frintm4s,  // round float -> int as float, toward minus infinity (floor)
317              fcvtn,     // f32 -> f16 in low half
318              fcvtl,     // f16 in low half -> f32
319              xtns2h,    // u32 -> u16
320              xtnh2b,    // u16 -> u8
321              uxtlb2h,   // u8 -> u16    (TODO: this is a special case of ushll.8h)
322              uxtlh2s,   // u16 -> u32   (TODO: this is a special case of ushll.4s)
323              uminv4s;   // dst[0] = min(n[0],n[1],n[2],n[3]), n as unsigned
324 
325         void brk (int imm16);
326         void ret (X);
327         void add (X d, X n, int imm12);
328         void sub (X d, X n, int imm12);
329         void subs(X d, X n, int imm12);  // subtract setting condition flags
330 
331         enum Shift { LSL,LSR,ASR,ROR };
332         void add (X d, X n, X m, Shift=LSL, int imm6=0);  // d=n+Shift(m,imm6), for Shift != ROR.
333 
334         // There's another encoding for unconditional branches that can jump further,
335         // but this one encoded as b.al is simple to implement and should be fine.
b(Label * l)336         void b  (Label* l) { this->b(Condition::al, l); }
bne(Label * l)337         void bne(Label* l) { this->b(Condition::ne, l); }
blt(Label * l)338         void blt(Label* l) { this->b(Condition::lt, l); }
339 
340         // "cmp ..." is just an assembler mnemonic for "subs xzr, ..."!
cmp(X n,int imm12)341         void cmp(X n, int imm12) { this->subs(xzr, n, imm12); }
342 
343         // Compare and branch if zero/non-zero, as if
344         //      cmp(t,0)
345         //      beq/bne(l)
346         // but without setting condition flags.
347         void cbz (X t, Label* l);
348         void cbnz(X t, Label* l);
349 
350         // TODO: there are ldur variants with unscaled imm, useful?
351         void ldrd(X dst, X src, int imm12=0);  // 64-bit dst = *(src+imm12*8)
352         void ldrs(X dst, X src, int imm12=0);  // 32-bit dst = *(src+imm12*4)
353         void ldrh(X dst, X src, int imm12=0);  // 16-bit dst = *(src+imm12*2)
354         void ldrb(X dst, X src, int imm12=0);  //  8-bit dst = *(src+imm12)
355 
356         void ldrq(V dst, Label*);  // 128-bit PC-relative load
357 
358         void ldrq(V dst, X src, int imm12=0);  // 128-bit dst = *(src+imm12*16)
359         void ldrd(V dst, X src, int imm12=0);  //  64-bit dst = *(src+imm12*8)
360         void ldrs(V dst, X src, int imm12=0);  //  32-bit dst = *(src+imm12*4)
361         void ldrh(V dst, X src, int imm12=0);  //  16-bit dst = *(src+imm12*2)
362         void ldrb(V dst, X src, int imm12=0);  //   8-bit dst = *(src+imm12)
363 
364         void strs(X src, X dst, int imm12=0);  // 32-bit *(dst+imm12*4) = src
365 
366         void strq(V src, X dst, int imm12=0);  // 128-bit *(dst+imm12*16) = src
367         void strd(V src, X dst, int imm12=0);  //  64-bit *(dst+imm12*8)  = src
368         void strs(V src, X dst, int imm12=0);  //  32-bit *(dst+imm12*4)  = src
369         void strh(V src, X dst, int imm12=0);  //  16-bit *(dst+imm12*2)  = src
370         void strb(V src, X dst, int imm12=0);  //   8-bit *(dst+imm12)    = src
371 
372         void movs(X dst, V src, int lane);  // dst = 32-bit src[lane]
373         void inss(V dst, X src, int lane);  // dst[lane] = 32-bit src
374 
375         void dup4s  (V dst, X src);  // Each 32-bit lane = src
376 
377         void ld1r4s (V dst, X src);  // Each 32-bit lane = *src
378         void ld1r8h (V dst, X src);  // Each 16-bit lane = *src
379         void ld1r16b(V dst, X src);  // Each  8-bit lane = *src
380 
381         void ld24s(V dst, X src);  // deinterleave(dst,dst+1)             = 256-bit *src
382         void ld44s(V dst, X src);  // deinterleave(dst,dst+1,dst+2,dst+3) = 512-bit *src
383         void st24s(V src, X dst);  // 256-bit *dst = interleave_32bit_lanes(src,src+1)
384         void st44s(V src, X dst);  // 512-bit *dst = interleave_32bit_lanes(src,src+1,src+2,src+3)
385 
386         void ld24s(V dst, X src, int lane);  // Load 2 32-bit values into given lane of dst..dst+1
387         void ld44s(V dst, X src, int lane);  // Load 4 32-bit values into given lane of dst..dst+3
388 
389     private:
390         uint8_t* fCode;
391         size_t   fSize;
392 
393         // x86-64
394         enum W { W0, W1 };      // Are the lanes 64-bit (W1) or default (W0)?  Intel Vol 2A 2.3.5.5
395         enum L { L128, L256 };  // Is this a 128- or 256-bit operation?        Intel Vol 2A 2.3.6.2
396 
397         // Helpers for vector instructions.
398         void op(int prefix, int map, int opcode, int dst, int x, Operand y, W,L);
399         void op(int p, int m, int o, Ymm d, Ymm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L256); }
400         void op(int p, int m, int o, Ymm d,        Operand y, W w=W0) { op(p,m,o, d,0,y,w,L256); }
401         void op(int p, int m, int o, Xmm d, Xmm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L128); }
402         void op(int p, int m, int o, Xmm d,        Operand y, W w=W0) { op(p,m,o, d,0,y,w,L128); }
403 
404         // Helpers for GP64 instructions.
405         void op(int opcode, Operand dst, GP64 x);
406         void op(int opcode, int opcode_ext, Operand dst, int imm);
407 
408         void jump(uint8_t condition, Label*);
409         int disp32(Label*);
410         void imm_byte_after_operand(const Operand&, int byte);
411 
412         // aarch64
413 
414         // Opcode for 3-arguments ops is split between hi and lo:
415         //    [11 bits hi] [5 bits m] [6 bits lo] [5 bits n] [5 bits d]
416         void op(uint32_t hi, V m, uint32_t lo, V n, V d);
417 
418         // 0,1,2-argument ops, with or without an immediate:
419         //    [ 22 bits op ] [5 bits n] [5 bits d]
420         // Any immediate falls in the middle somewhere overlapping with either op, n, or both.
421         void op(uint32_t op22, V n, V d, int imm=0);
422         void op(uint32_t op22, X n, V d, int imm=0) { this->op(op22,(V)n,   d,imm); }
423         void op(uint32_t op22, V n, X d, int imm=0) { this->op(op22,   n,(V)d,imm); }
424         void op(uint32_t op22, X n, X d, int imm=0) { this->op(op22,(V)n,(V)d,imm); }
425         void op(uint32_t op22,           int imm=0) { this->op(op22,(V)0,(V)0,imm); }
426         // (1-argument ops don't seem to have a consistent convention of passing as n or d.)
427 
428 
429         // Order matters... value is 4-bit encoding for condition code.
430         enum class Condition { eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,al };
431         void b(Condition, Label*);
432         int disp19(Label*);
433     };
434 
435     // Order matters a little: Ops <=store128 are treated as having side effects.
436     #define SKVM_OPS(M)                                              \
437         M(assert_true)                                               \
438         M(trace_line) M(trace_var)                                   \
439         M(trace_enter) M(trace_exit) M(trace_scope)                  \
440         M(store8)   M(store16)   M(store32) M(store64) M(store128)   \
441         M(load8)    M(load16)    M(load32)  M(load64) M(load128)     \
442         M(index)                                                     \
443         M(gather8)  M(gather16)  M(gather32)                         \
444                                  M(uniform32)                        \
445                                  M(array32)                          \
446         M(splat)                                                     \
447         M(add_f32) M(add_i32)                                        \
448         M(sub_f32) M(sub_i32)                                        \
449         M(mul_f32) M(mul_i32)                                        \
450         M(div_f32)                                                   \
451         M(min_f32) M(max_f32)                                        \
452         M(fma_f32) M(fms_f32) M(fnma_f32)                            \
453         M(sqrt_f32)                                                  \
454         M(shl_i32) M(shr_i32) M(sra_i32)                             \
455         M(ceil) M(floor) M(trunc) M(round) M(to_fp16) M(from_fp16)   \
456         M(to_f32)                                                    \
457         M(neq_f32) M(eq_f32) M(eq_i32)                               \
458         M(gte_f32) M(gt_f32) M(gt_i32)                               \
459         M(bit_and)     M(bit_or)     M(bit_xor)     M(bit_clear)     \
460         M(select)                                                    \
461         M(duplicate)
462     // End of SKVM_OPS
463 
464     enum class Op : int {
465     #define M(op) op,
466         SKVM_OPS(M)
467     #undef M
468     };
469 
has_side_effect(Op op)470     static inline bool has_side_effect(Op op) {
471         return op <= Op::store128;
472     }
touches_varying_memory(Op op)473     static inline bool touches_varying_memory(Op op) {
474         return Op::store8 <= op && op <= Op::load128;
475     }
is_always_varying(Op op)476     static inline bool is_always_varying(Op op) {
477         return Op::store8 <= op && op <= Op::index;
478     }
is_trace(Op op)479     static inline bool is_trace(Op op) {
480         return Op::trace_line <= op && op <= Op::trace_scope;
481     }
482 
483     using Val = int;
484     // We reserve an impossible Val ID as a sentinel
485     // NA meaning none, n/a, null, nil, etc.
486     static const Val NA = -1;
487 
488     // Ptr and UPtr are an index into the registers args[]. The two styles of using args are
489     // varyings and uniforms. Varyings use Ptr, have a stride associated with them, and are
490     // evaluated everytime through the loop. Uniforms use UPtr, don't have a stride, and are
491     // usually hoisted above the loop.
492     struct Ptr { int ix; };
493     struct UPtr : public Ptr {};
494 
495     bool operator!=(Ptr a, Ptr b);
496 
497     struct I32 {
498         Builder* builder = nullptr;
499         Val      id      = NA;
500         explicit operator bool() const { return id != NA; }
501         Builder* operator->()    const { return builder; }
502     };
503 
504     struct F32 {
505         Builder* builder = nullptr;
506         Val      id      = NA;
507         explicit operator bool() const { return id != NA; }
508         Builder* operator->()    const { return builder; }
509     };
510 
511     struct Color {
512         F32 r,g,b,a;
513         explicit operator bool() const { return r && g && b && a; }
514         Builder* operator->()    const { return a.operator->(); }
515     };
516 
517     struct HSLA {
518         F32 h,s,l,a;
519         explicit operator bool() const { return h && s && l && a; }
520         Builder* operator->()    const { return a.operator->(); }
521     };
522 
523     struct Coord {
524         F32 x,y;
525         explicit operator bool() const { return x && y; }
526         Builder* operator->()    const { return x.operator->(); }
527     };
528 
529     struct Uniform {
530         UPtr ptr;
531         int offset;
532     };
533     struct Uniforms {
534         UPtr             base;
535         std::vector<int> buf;
536 
UniformsUniforms537         Uniforms(UPtr ptr, int init) : base(ptr), buf(init) {}
538 
pushUniforms539         Uniform push(int val) {
540             buf.push_back(val);
541             return {base, (int)( sizeof(int)*(buf.size() - 1) )};
542         }
543 
pushFUniforms544         Uniform pushF(float val) {
545             int bits;
546             memcpy(&bits, &val, sizeof(int));
547             return this->push(bits);
548         }
549 
pushPtrUniforms550         Uniform pushPtr(const void* ptr) {
551             // Jam the pointer into 1 or 2 ints.
552             int ints[sizeof(ptr) / sizeof(int)];
553             memcpy(ints, &ptr, sizeof(ptr));
554             for (int bits : ints) {
555                 buf.push_back(bits);
556             }
557             return {base, (int)( sizeof(int)*(buf.size() - std::size(ints)) )};
558         }
559 
pushArrayUniforms560         Uniform pushArray(int32_t a[]) {
561             return this->pushPtr(a);
562         }
563 
pushArrayFUniforms564         Uniform pushArrayF(float a[]) {
565             return this->pushPtr(a);
566         }
567     };
568 
569     struct PixelFormat {
570         enum { UNORM, SRGB, FLOAT, XRNG } encoding;
571         int r_bits,  g_bits,  b_bits,  a_bits,
572             r_shift, g_shift, b_shift, a_shift;
573     };
574     PixelFormat SkColorType_to_PixelFormat(SkColorType);
575 
576     SK_BEGIN_REQUIRE_DENSE
577     struct Instruction {
578         Op  op;              // v* = op(x,y,z,w,immA,immB), where * == index of this Instruction.
579         Val x,y,z,w;         // Enough arguments for Op::store128.
580         int immA,immB,immC;  // Immediate bit pattern, shift count, pointer index, byte offset, etc.
581     };
582     SK_END_REQUIRE_DENSE
583 
584     bool operator==(const Instruction&, const Instruction&);
585     struct InstructionHash {
586         uint32_t operator()(const Instruction&, uint32_t seed=0) const;
587     };
588 
589     struct OptimizedInstruction {
590         Op op;
591         Val x,y,z,w;
592         int immA,immB,immC;
593 
594         Val  death;
595         bool can_hoist;
596     };
597 
598     struct Features {
599         bool fma   = false;
600         bool fp16  = false;
601     };
602 
603     class TraceHook {
604     public:
605         virtual ~TraceHook() = default;
606         virtual void line(int lineNum) = 0;
607         virtual void var(int slot, int32_t val) = 0;
608         virtual void enter(int fnIdx) = 0;
609         virtual void exit(int fnIdx) = 0;
610         virtual void scope(int delta) = 0;
611     };
612 
613     class Builder {
614     public:
615         Builder(bool createDuplicates = false);
616         Builder(Features, bool createDuplicates = false);
617 
618         Program done(const char* debug_name,
619                      bool allow_jit,
620                      std::unique_ptr<viz::Visualizer> visualizer) const;
621         Program done(const char* debug_name = nullptr,
622                      bool allow_jit=true) const;
623 
624         // Mostly for debugging, tests, etc.
program()625         std::vector<Instruction> program() const { return fProgram; }
626         std::vector<OptimizedInstruction> optimize(viz::Visualizer* visualizer = nullptr) const;
627 
628         // Returns a trace-hook ID which must be passed to the trace opcodes.
629         int attachTraceHook(TraceHook*);
630 
631         // Convenience arg() wrappers for most common strides, sizeof(T) and 0.
632         template <typename T>
varying()633         Ptr varying() { return this->arg(sizeof(T)); }
varying(int stride)634         Ptr varying(int stride) { SkASSERT(stride > 0); return this->arg(stride); }
uniform()635         UPtr uniform() { Ptr p = this->arg(0); return UPtr{{p.ix}}; }
636 
637         // TODO: allow uniform (i.e. Ptr) offsets to store* and load*?
638         // TODO: sign extension (signed types) for <32-bit loads?
639         // TODO: unsigned integer operations where relevant (just comparisons?)?
640 
641         // Assert cond is true, printing debug when not.
642         void assert_true(I32 cond, I32 debug);
assert_true(I32 cond,F32 debug)643         void assert_true(I32 cond, F32 debug) { assert_true(cond, pun_to_I32(debug)); }
assert_true(I32 cond)644         void assert_true(I32 cond)            { assert_true(cond, cond); }
645 
646         // Insert debug traces into the instruction stream
647         bool mergeMasks(I32& mask, I32& traceMask);
648         void trace_line (int traceHookID, I32 mask, I32 traceMask, int line);
649         void trace_var  (int traceHookID, I32 mask, I32 traceMask, int slot, I32 val);
650         void trace_enter(int traceHookID, I32 mask, I32 traceMask, int fnIdx);
651         void trace_exit (int traceHookID, I32 mask, I32 traceMask, int fnIdx);
652         void trace_scope(int traceHookID, I32 mask, I32 traceMask, int delta);
653 
654         // Store {8,16,32,64,128}-bit varying.
655         void store8  (Ptr ptr, I32 val);
656         void store16 (Ptr ptr, I32 val);
657         void store32 (Ptr ptr, I32 val);
storeF(Ptr ptr,F32 val)658         void storeF  (Ptr ptr, F32 val) { store32(ptr, pun_to_I32(val)); }
659         void store64 (Ptr ptr, I32 lo, I32 hi);              // *ptr = lo|(hi<<32)
660         void store128(Ptr ptr, I32 x, I32 y, I32 z, I32 w);  // *ptr = x|(y<<32)|(z<<64)|(w<<96)
661 
662         // Returns varying {n, n-1, n-2, ..., 1}, where n is the argument to Program::eval().
663         I32 index();
664 
665         // Load {8,16,32,64,128}-bit varying.
666         I32 load8  (Ptr ptr);
667         I32 load16 (Ptr ptr);
668         I32 load32 (Ptr ptr);
loadF(Ptr ptr)669         F32 loadF  (Ptr ptr) { return pun_to_F32(load32(ptr)); }
670         I32 load64 (Ptr ptr, int lane);  // Load 32-bit lane 0-1 of  64-bit value.
671         I32 load128(Ptr ptr, int lane);  // Load 32-bit lane 0-3 of 128-bit value.
672 
673         // Load i32/f32 uniform with byte-count offset.
674         I32 uniform32(UPtr ptr, int offset);
uniformF(UPtr ptr,int offset)675         F32 uniformF (UPtr ptr, int offset) { return pun_to_F32(uniform32(ptr,offset)); }
676 
677         // Load i32/f32 uniform with byte-count offset and an c-style array index. The address of
678         // the element is (*(ptr + byte-count offset))[index].
679         I32 array32  (UPtr ptr, int offset, int index);
arrayF(UPtr ptr,int offset,int index)680         F32 arrayF   (UPtr ptr, int offset, int index) {
681             return pun_to_F32(array32(ptr, offset, index));
682         }
683 
684         // Push and load this color as a uniform.
685         Color uniformColor(SkColor4f, Uniforms*);
686 
687         // Gather u8,u16,i32 with varying element-count index from *(ptr + byte-count offset).
688         I32 gather8 (UPtr ptr, int offset, I32 index);
689         I32 gather16(UPtr ptr, int offset, I32 index);
690         I32 gather32(UPtr ptr, int offset, I32 index);
gatherF(UPtr ptr,int offset,I32 index)691         F32 gatherF (UPtr ptr, int offset, I32 index) {
692             return pun_to_F32(gather32(ptr, offset, index));
693         }
694 
695         // Convenience methods for working with skvm::Uniform(s).
uniform32(Uniform u)696         I32 uniform32(Uniform u)            { return this->uniform32(u.ptr, u.offset); }
uniformF(Uniform u)697         F32 uniformF (Uniform u)            { return this->uniformF (u.ptr, u.offset); }
gather8(Uniform u,I32 index)698         I32 gather8  (Uniform u, I32 index) { return this->gather8  (u.ptr, u.offset, index); }
gather16(Uniform u,I32 index)699         I32 gather16 (Uniform u, I32 index) { return this->gather16 (u.ptr, u.offset, index); }
gather32(Uniform u,I32 index)700         I32 gather32 (Uniform u, I32 index) { return this->gather32 (u.ptr, u.offset, index); }
gatherF(Uniform u,I32 index)701         F32 gatherF  (Uniform u, I32 index) { return this->gatherF  (u.ptr, u.offset, index); }
702 
703         // Convenience methods for working with array pointers in skvm::Uniforms. Index is an
704         // array index and not a byte offset. The array pointer is stored at u.
array32(Uniform a,int index)705         I32 array32  (Uniform a, int index) { return this->array32  (a.ptr, a.offset, index); }
arrayF(Uniform a,int index)706         F32 arrayF   (Uniform a, int index) { return this->arrayF   (a.ptr, a.offset, index); }
707 
708         // Load an immediate constant.
709         I32 splat(int      n);
splat(unsigned u)710         I32 splat(unsigned u) { return splat((int)u); }
splat(float f)711         F32 splat(float    f) {
712             int bits;
713             memcpy(&bits, &f, 4);
714             return pun_to_F32(splat(bits));
715         }
716 
717         // Some operations make sense with immediate arguments,
718         // so we provide overloads inline to make that seamless.
719         //
720         // We omit overloads that may indicate a bug or performance issue.
721         // In general it does not make sense to pass immediates to unary operations,
722         // and even sometimes not for binary operations, e.g.
723         //
724         //   div(x, y)    -- normal every day divide
725         //   div(3.0f, y) -- yep, makes sense
726         //   div(x, 3.0f) -- omitted as a reminder you probably want mul(x, 1/3.0f).
727         //
728         // You can of course always splat() to override these opinions.
729 
730         // float math, comparisons, etc.
731         F32 add(F32, F32);
add(F32 x,float y)732         F32 add(F32 x, float y) { return add(x, splat(y)); }
add(float x,F32 y)733         F32 add(float x, F32 y) { return add(splat(x), y); }
734 
735         F32 sub(F32, F32);
sub(F32 x,float y)736         F32 sub(F32 x, float y) { return sub(x, splat(y)); }
sub(float x,F32 y)737         F32 sub(float x, F32 y) { return sub(splat(x), y); }
738 
739         F32 mul(F32, F32);
mul(F32 x,float y)740         F32 mul(F32 x, float y) { return mul(x, splat(y)); }
mul(float x,F32 y)741         F32 mul(float x, F32 y) { return mul(splat(x), y); }
742 
743         // mul(), but allowing optimizations not strictly legal under IEEE-754 rules.
744         F32 fast_mul(F32, F32);
fast_mul(F32 x,float y)745         F32 fast_mul(F32 x, float y) { return fast_mul(x, splat(y)); }
fast_mul(float x,F32 y)746         F32 fast_mul(float x, F32 y) { return fast_mul(splat(x), y); }
747 
748         F32 div(F32, F32);
div(float x,F32 y)749         F32 div(float x, F32 y) { return div(splat(x), y); }
750 
751         F32 min(F32, F32);
min(F32 x,float y)752         F32 min(F32 x, float y) { return min(x, splat(y)); }
min(float x,F32 y)753         F32 min(float x, F32 y) { return min(splat(x), y); }
754 
755         F32 max(F32, F32);
max(F32 x,float y)756         F32 max(F32 x, float y) { return max(x, splat(y)); }
max(float x,F32 y)757         F32 max(float x, F32 y) { return max(splat(x), y); }
758 
759         // TODO: remove mad()?  It's just sugar.
mad(F32 x,F32 y,F32 z)760         F32 mad(F32   x, F32   y, F32   z) { return add(mul(x,y), z); }
mad(F32 x,F32 y,float z)761         F32 mad(F32   x, F32   y, float z) { return mad(      x ,       y , splat(z)); }
mad(F32 x,float y,F32 z)762         F32 mad(F32   x, float y, F32   z) { return mad(      x , splat(y),       z ); }
mad(F32 x,float y,float z)763         F32 mad(F32   x, float y, float z) { return mad(      x , splat(y), splat(z)); }
mad(float x,F32 y,F32 z)764         F32 mad(float x, F32   y, F32   z) { return mad(splat(x),       y ,       z ); }
mad(float x,F32 y,float z)765         F32 mad(float x, F32   y, float z) { return mad(splat(x),       y , splat(z)); }
mad(float x,float y,F32 z)766         F32 mad(float x, float y, F32   z) { return mad(splat(x), splat(y),       z ); }
767 
768         F32        sqrt(F32);
769         F32 approx_log2(F32);
770         F32 approx_pow2(F32);
approx_log(F32 x)771         F32 approx_log (F32 x) { return mul(0.69314718f, approx_log2(x)); }
approx_exp(F32 x)772         F32 approx_exp (F32 x) { return approx_pow2(mul(x, 1.4426950408889634074f)); }
773 
774         F32 approx_powf(F32 base, F32 exp);
approx_powf(F32 base,float exp)775         F32 approx_powf(F32 base, float exp) { return approx_powf(base, splat(exp)); }
approx_powf(float base,F32 exp)776         F32 approx_powf(float base, F32 exp) { return approx_powf(splat(base), exp); }
777 
778 
779         F32 approx_sin(F32 radians);
approx_cos(F32 radians)780         F32 approx_cos(F32 radians) { return approx_sin(add(radians, SK_ScalarPI/2)); }
781         F32 approx_tan(F32 radians);
782 
783         F32 approx_asin(F32 x);
approx_acos(F32 x)784         F32 approx_acos(F32 x) { return sub(SK_ScalarPI/2, approx_asin(x)); }
785         F32 approx_atan(F32 x);
786         F32 approx_atan2(F32 y, F32 x);
787 
788         F32 lerp(F32   lo, F32   hi, F32   t);
lerp(F32 lo,F32 hi,float t)789         F32 lerp(F32   lo, F32   hi, float t) { return lerp(      lo ,       hi , splat(t)); }
lerp(F32 lo,float hi,float t)790         F32 lerp(F32   lo, float hi, float t) { return lerp(      lo , splat(hi), splat(t)); }
lerp(F32 lo,float hi,F32 t)791         F32 lerp(F32   lo, float hi, F32   t) { return lerp(      lo , splat(hi),       t ); }
lerp(float lo,F32 hi,F32 t)792         F32 lerp(float lo, F32   hi, F32   t) { return lerp(splat(lo),       hi ,       t ); }
lerp(float lo,F32 hi,float t)793         F32 lerp(float lo, F32   hi, float t) { return lerp(splat(lo),       hi , splat(t)); }
lerp(float lo,float hi,F32 t)794         F32 lerp(float lo, float hi, F32   t) { return lerp(splat(lo), splat(hi),       t ); }
795 
clamp(F32 x,F32 lo,F32 hi)796         F32 clamp(F32   x, F32   lo, F32   hi) { return max(lo, min(x, hi)); }
clamp(F32 x,F32 lo,float hi)797         F32 clamp(F32   x, F32   lo, float hi) { return clamp(      x ,       lo , splat(hi)); }
clamp(F32 x,float lo,float hi)798         F32 clamp(F32   x, float lo, float hi) { return clamp(      x , splat(lo), splat(hi)); }
clamp(F32 x,float lo,F32 hi)799         F32 clamp(F32   x, float lo, F32   hi) { return clamp(      x , splat(lo),       hi ); }
clamp(float x,F32 lo,F32 hi)800         F32 clamp(float x, F32   lo, F32   hi) { return clamp(splat(x),       lo ,       hi ); }
clamp(float x,F32 lo,float hi)801         F32 clamp(float x, F32   lo, float hi) { return clamp(splat(x),       lo , splat(hi)); }
clamp(float x,float lo,F32 hi)802         F32 clamp(float x, float lo, F32   hi) { return clamp(splat(x), splat(lo),       hi ); }
803 
clamp01(F32 x)804         F32 clamp01(F32 x) { return clamp(x, 0.0f, 1.0f); }
805 
abs(F32 x)806         F32    abs(F32 x) { return pun_to_F32(bit_and(pun_to_I32(x), 0x7fff'ffff)); }
807         F32  fract(F32 x) { return sub(x, floor(x)); }
808         F32   ceil(F32);
809         F32  floor(F32);
810         I32 is_NaN   (F32 x) { return neq(x,x); }
811         I32 is_finite(F32 x) { return lt(bit_and(pun_to_I32(x), 0x7f80'0000), 0x7f80'0000); }
812 
813         I32 trunc(F32 x);
814         I32 round(F32 x);  // Round to int using current rounding mode (as if lrintf()).
815         I32 pun_to_I32(F32 x) { return {x.builder, x.id}; }
816 
817         I32   to_fp16(F32 x);
818         F32 from_fp16(I32 x);
819 
820         I32 eq(F32, F32);
821         I32 eq(F32 x, float y) { return eq(x, splat(y)); }
822         I32 eq(float x, F32 y) { return eq(splat(x), y); }
823 
824         I32 neq(F32, F32);
825         I32 neq(F32 x, float y) { return neq(x, splat(y)); }
826         I32 neq(float x, F32 y) { return neq(splat(x), y); }
827 
828         I32 lt(F32, F32);
829         I32 lt(F32 x, float y) { return lt(x, splat(y)); }
830         I32 lt(float x, F32 y) { return lt(splat(x), y); }
831 
832         I32 lte(F32, F32);
833         I32 lte(F32 x, float y) { return lte(x, splat(y)); }
834         I32 lte(float x, F32 y) { return lte(splat(x), y); }
835 
836         I32 gt(F32, F32);
837         I32 gt(F32 x, float y) { return gt(x, splat(y)); }
838         I32 gt(float x, F32 y) { return gt(splat(x), y); }
839 
840         I32 gte(F32, F32);
841         I32 gte(F32 x, float y) { return gte(x, splat(y)); }
842         I32 gte(float x, F32 y) { return gte(splat(x), y); }
843 
844         // int math, comparisons, etc.
845         I32 add(I32, I32);
846         I32 add(I32 x, int y) { return add(x, splat(y)); }
847         I32 add(int x, I32 y) { return add(splat(x), y); }
848 
849         I32 sub(I32, I32);
850         I32 sub(I32 x, int y) { return sub(x, splat(y)); }
851         I32 sub(int x, I32 y) { return sub(splat(x), y); }
852 
853         I32 mul(I32, I32);
854         I32 mul(I32 x, int y) { return mul(x, splat(y)); }
855         I32 mul(int x, I32 y) { return mul(splat(x), y); }
856 
857         I32 shl(I32 x, int bits);
858         I32 shr(I32 x, int bits);
859         I32 sra(I32 x, int bits);
860 
861         I32 eq(I32, I32);
862         I32 eq(I32 x, int y) { return eq(x, splat(y)); }
863         I32 eq(int x, I32 y) { return eq(splat(x), y); }
864 
865         I32 neq(I32, I32);
866         I32 neq(I32 x, int y) { return neq(x, splat(y)); }
867         I32 neq(int x, I32 y) { return neq(splat(x), y); }
868 
869         I32 lt(I32, I32);
870         I32 lt(I32 x, int y) { return lt(x, splat(y)); }
871         I32 lt(int x, I32 y) { return lt(splat(x), y); }
872 
873         I32 lte(I32, I32);
874         I32 lte(I32 x, int y) { return lte(x, splat(y)); }
875         I32 lte(int x, I32 y) { return lte(splat(x), y); }
876 
877         I32 gt(I32, I32);
878         I32 gt(I32 x, int y) { return gt(x, splat(y)); }
879         I32 gt(int x, I32 y) { return gt(splat(x), y); }
880 
881         I32 gte(I32, I32);
882         I32 gte(I32 x, int y) { return gte(x, splat(y)); }
883         I32 gte(int x, I32 y) { return gte(splat(x), y); }
884 
885         F32 to_F32(I32 x);
886         F32 pun_to_F32(I32 x) { return {x.builder, x.id}; }
887 
888         // Bitwise operations.
889         I32 bit_and(I32, I32);
890         I32 bit_and(I32 x, int y) { return bit_and(x, splat(y)); }
891         I32 bit_and(int x, I32 y) { return bit_and(splat(x), y); }
892 
893         I32 bit_or(I32, I32);
894         I32 bit_or(I32 x, int y) { return bit_or(x, splat(y)); }
895         I32 bit_or(int x, I32 y) { return bit_or(splat(x), y); }
896 
897         I32 bit_xor(I32, I32);
898         I32 bit_xor(I32 x, int y) { return bit_xor(x, splat(y)); }
899         I32 bit_xor(int x, I32 y) { return bit_xor(splat(x), y); }
900 
901         I32 bit_clear(I32, I32);
902         I32 bit_clear(I32 x, int y) { return bit_clear(x, splat(y)); }
903         I32 bit_clear(int x, I32 y) { return bit_clear(splat(x), y); }
904 
905         I32 min(I32 x, I32 y) { return select(lte(x,y), x, y); }
906         I32 min(I32 x, int y) { return min(x, splat(y)); }
907         I32 min(int x, I32 y) { return min(splat(x), y); }
908 
909         I32 max(I32 x, I32 y) { return select(gte(x,y), x, y); }
910         I32 max(I32 x, int y) { return max(x, splat(y)); }
911         I32 max(int x, I32 y) { return max(splat(x), y); }
912 
913         I32 select(I32 cond, I32 t, I32 f);  // cond ? t : f
914         I32 select(I32 cond, int t, I32 f) { return select(cond, splat(t),       f ); }
915         I32 select(I32 cond, I32 t, int f) { return select(cond,       t , splat(f)); }
916         I32 select(I32 cond, int t, int f) { return select(cond, splat(t), splat(f)); }
917 
918         F32 select(I32 cond, F32 t, F32 f) {
919             return pun_to_F32(select(cond, pun_to_I32(t)
920                                          , pun_to_I32(f)));
921         }
922         F32 select(I32 cond, float t, F32   f) { return select(cond, splat(t),       f ); }
923         F32 select(I32 cond, F32   t, float f) { return select(cond,       t , splat(f)); }
924         F32 select(I32 cond, float t, float f) { return select(cond, splat(t), splat(f)); }
925 
926         I32 extract(I32 x, int bits, I32 z);   // (x>>bits) & z
927         I32 extract(I32 x, int bits, int z) { return extract(x, bits, splat(z)); }
928         I32 extract(int x, int bits, I32 z) { return extract(splat(x), bits, z); }
929 
930         I32 pack(I32 x, I32 y, int bits);   // x | (y<<bits)
931         I32 pack(I32 x, int y, int bits) { return pack(x, splat(y), bits); }
932         I32 pack(int x, I32 y, int bits) { return pack(splat(x), y, bits); }
933 
934 
935         // Common idioms used in several places, worth centralizing for consistency.
936         F32 from_unorm(int bits, I32);   // E.g. from_unorm(8, x) -> x * (1/255.0f)
937         I32   to_unorm(int bits, F32);   // E.g.   to_unorm(8, x) -> round(x * 255)
938 
939         Color   load(PixelFormat, Ptr ptr);
940         void   store(PixelFormat, Ptr ptr, Color);
941         Color gather(PixelFormat, UPtr ptr, int offset, I32 index);
942         Color gather(PixelFormat f, Uniform u, I32 index) {
943             return gather(f, u.ptr, u.offset, index);
944         }
945 
946         void   premul(F32* r, F32* g, F32* b, F32 a);
947         void unpremul(F32* r, F32* g, F32* b, F32 a);
948 
949         Color   premul(Color c) {   this->premul(&c.r, &c.g, &c.b, c.a); return c; }
950         Color unpremul(Color c) { this->unpremul(&c.r, &c.g, &c.b, c.a); return c; }
951 
952         Color lerp(Color lo, Color hi, F32 t);
953         Color blend(SkBlendMode, Color src, Color dst);
954 
955         Color clamp01(Color c) {
956             return { clamp01(c.r), clamp01(c.g), clamp01(c.b), clamp01(c.a) };
957         }
958 
959         HSLA  to_hsla(Color);
960         Color to_rgba(HSLA);
961 
962         void dump(SkWStream* = nullptr) const;
963 
964         uint64_t hash() const;
965 
966         Val push(Instruction);
967 
968         bool allImm() const { return true; }
969 
970         template <typename T, typename... Rest>
971         bool allImm(Val id, T* imm, Rest... rest) const {
972             if (fProgram[id].op == Op::splat) {
973                 static_assert(sizeof(T) == 4);
974                 memcpy(imm, &fProgram[id].immA, 4);
975                 return this->allImm(rest...);
976             }
977             return false;
978         }
979 
980         bool allUniform() const { return true; }
981 
982         template <typename... Rest>
983         bool allUniform(Val id, Uniform* uni, Rest... rest) const {
984             if (fProgram[id].op == Op::uniform32) {
985                 uni->ptr.ix = fProgram[id].immA;
986                 uni->offset = fProgram[id].immB;
987                 return this->allUniform(rest...);
988             }
989             return false;
990         }
991 
992     private:
993         // Declare an argument with given stride (use stride=0 for uniforms).
994         Ptr arg(int stride);
995 
996         Val push(
997                 Op op, Val x=NA, Val y=NA, Val z=NA, Val w=NA, int immA=0, int immB=0, int immC=0) {
998             return this->push(Instruction{op, x,y,z,w, immA,immB,immC});
999         }
1000 
1001         template <typename T>
1002         bool isImm(Val id, T want) const {
1003             T imm = 0;
1004             return this->allImm(id, &imm) && imm == want;
1005         }
1006 
1007         // `canonicalizeIdOrder` and has two rules:
1008         // - Immediate values go last; that is, `x + 1` is preferred over `1 + x`.
1009         // - If both/neither of x and y are immediate, lower IDs go before higher IDs.
1010         // Canonicalizing the IDs helps with opcode deduplication. Putting immediates in a
1011         // consistent position makes it easier to detect no-op arithmetic like `x + 0`.
1012         template <typename F32_or_I32>
1013         void canonicalizeIdOrder(F32_or_I32& x, F32_or_I32& y);
1014 
1015         // If the passed in ID is a bit-not, return the value being bit-notted. Otherwise, NA.
1016         Val holdsBitNot(Val id);
1017 
1018         SkTHashMap<Instruction, Val, InstructionHash> fIndex;
1019         std::vector<Instruction>                      fProgram;
1020         std::vector<TraceHook*>                       fTraceHooks;
1021         std::vector<int>                              fStrides;
1022         const Features                                fFeatures;
1023         bool                                          fCreateDuplicates;
1024     };
1025 
1026     // Optimization passes and data structures normally used by Builder::optimize(),
1027     // extracted here so they can be unit tested.
1028     std::vector<Instruction> eliminate_dead_code(std::vector<Instruction>,
1029                                                  viz::Visualizer* visualizer = nullptr);
1030     std::vector<OptimizedInstruction> finalize(std::vector<Instruction>,
1031                                                viz::Visualizer* visualizer = nullptr);
1032 
1033     using Reg = int;
1034 
1035     // d = op(x,y,z,w, immA,immB)
1036     struct InterpreterInstruction {
1037         Op  op;
1038         Reg d,x,y,z,w;
1039         int immA,immB,immC;
1040     };
1041 
1042     class Program {
1043     public:
1044         Program(const std::vector<OptimizedInstruction>& instructions,
1045                 std::unique_ptr<viz::Visualizer> visualizer,
1046                 const std::vector<int>& strides,
1047                 const std::vector<TraceHook*>& traceHooks,
1048                 const char* debug_name, bool allow_jit);
1049 
1050         Program();
1051         ~Program();
1052 
1053         Program(Program&&);
1054         Program& operator=(Program&&);
1055 
1056         Program(const Program&) = delete;
1057         Program& operator=(const Program&) = delete;
1058 
1059         void eval(int n, void* args[]) const;
1060 
1061         template <typename... T>
1062         void eval(int n, T*... arg) const {
1063             SkASSERT(sizeof...(arg) == this->nargs());
1064             // This nullptr isn't important except that it makes args[] non-empty if you pass none.
1065             void* args[] = { (void*)arg..., nullptr };
1066             this->eval(n, args);
1067         }
1068 
1069         std::vector<InterpreterInstruction> instructions() const;
1070         int  nargs() const;
1071         int  nregs() const;
1072         int  loop () const;
1073         bool empty() const;
1074 
1075         bool hasJIT() const;         // Has this Program been JITted?
1076         bool hasTraceHooks() const;  // Is this program instrumented for debugging?
1077 
1078         void visualize(SkWStream* output) const;
1079         void dump(SkWStream* = nullptr) const;
1080         void disassemble(SkWStream* = nullptr) const;
1081         viz::Visualizer* visualizer();
1082 
1083     private:
1084         void setupInterpreter(const std::vector<OptimizedInstruction>&);
1085         void setupJIT        (const std::vector<OptimizedInstruction>&, const char* debug_name);
1086 
1087         bool jit(const std::vector<OptimizedInstruction>&,
1088                  int* stack_hint, uint32_t* registers_used,
1089                  Assembler*) const;
1090 
1091         void dropJIT();
1092 
1093         struct Impl;
1094         std::unique_ptr<Impl> fImpl;
1095     };
1096 
1097     // TODO: control flow
1098     // TODO: 64-bit values?
1099 
1100 #define SI static inline
1101 
1102     SI I32 operator+(I32 x, I32 y) { return x->add(x,y); }
1103     SI I32 operator+(I32 x, int y) { return x->add(x,y); }
1104     SI I32 operator+(int x, I32 y) { return y->add(x,y); }
1105 
1106     SI I32 operator-(I32 x, I32 y) { return x->sub(x,y); }
1107     SI I32 operator-(I32 x, int y) { return x->sub(x,y); }
1108     SI I32 operator-(int x, I32 y) { return y->sub(x,y); }
1109 
1110     SI I32 operator*(I32 x, I32 y) { return x->mul(x,y); }
1111     SI I32 operator*(I32 x, int y) { return x->mul(x,y); }
1112     SI I32 operator*(int x, I32 y) { return y->mul(x,y); }
1113 
min(I32 x,I32 y)1114     SI I32 min(I32 x, I32 y) { return x->min(x,y); }
min(I32 x,int y)1115     SI I32 min(I32 x, int y) { return x->min(x,y); }
min(int x,I32 y)1116     SI I32 min(int x, I32 y) { return y->min(x,y); }
1117 
max(I32 x,I32 y)1118     SI I32 max(I32 x, I32 y) { return x->max(x,y); }
max(I32 x,int y)1119     SI I32 max(I32 x, int y) { return x->max(x,y); }
max(int x,I32 y)1120     SI I32 max(int x, I32 y) { return y->max(x,y); }
1121 
1122     SI I32 operator==(I32 x, I32 y) { return x->eq(x,y); }
1123     SI I32 operator==(I32 x, int y) { return x->eq(x,y); }
1124     SI I32 operator==(int x, I32 y) { return y->eq(x,y); }
1125 
1126     SI I32 operator!=(I32 x, I32 y) { return x->neq(x,y); }
1127     SI I32 operator!=(I32 x, int y) { return x->neq(x,y); }
1128     SI I32 operator!=(int x, I32 y) { return y->neq(x,y); }
1129 
1130     SI I32 operator< (I32 x, I32 y) { return x->lt(x,y); }
1131     SI I32 operator< (I32 x, int y) { return x->lt(x,y); }
1132     SI I32 operator< (int x, I32 y) { return y->lt(x,y); }
1133 
1134     SI I32 operator<=(I32 x, I32 y) { return x->lte(x,y); }
1135     SI I32 operator<=(I32 x, int y) { return x->lte(x,y); }
1136     SI I32 operator<=(int x, I32 y) { return y->lte(x,y); }
1137 
1138     SI I32 operator> (I32 x, I32 y) { return x->gt(x,y); }
1139     SI I32 operator> (I32 x, int y) { return x->gt(x,y); }
1140     SI I32 operator> (int x, I32 y) { return y->gt(x,y); }
1141 
1142     SI I32 operator>=(I32 x, I32 y) { return x->gte(x,y); }
1143     SI I32 operator>=(I32 x, int y) { return x->gte(x,y); }
1144     SI I32 operator>=(int x, I32 y) { return y->gte(x,y); }
1145 
1146 
1147     SI F32 operator+(F32   x, F32   y) { return x->add(x,y); }
1148     SI F32 operator+(F32   x, float y) { return x->add(x,y); }
1149     SI F32 operator+(float x, F32   y) { return y->add(x,y); }
1150 
1151     SI F32 operator-(F32   x, F32   y) { return x->sub(x,y); }
1152     SI F32 operator-(F32   x, float y) { return x->sub(x,y); }
1153     SI F32 operator-(float x, F32   y) { return y->sub(x,y); }
1154 
1155     SI F32 operator*(F32   x, F32   y) { return x->mul(x,y); }
1156     SI F32 operator*(F32   x, float y) { return x->mul(x,y); }
1157     SI F32 operator*(float x, F32   y) { return y->mul(x,y); }
1158 
fast_mul(F32 x,F32 y)1159     SI F32 fast_mul(F32   x, F32   y) { return x->fast_mul(x,y); }
fast_mul(F32 x,float y)1160     SI F32 fast_mul(F32   x, float y) { return x->fast_mul(x,y); }
fast_mul(float x,F32 y)1161     SI F32 fast_mul(float x, F32   y) { return y->fast_mul(x,y); }
1162 
1163     SI F32 operator/(F32   x, F32  y) { return x->div(x,y); }
1164     SI F32 operator/(float x, F32  y) { return y->div(x,y); }
1165 
min(F32 x,F32 y)1166     SI F32 min(F32   x, F32   y) { return x->min(x,y); }
min(F32 x,float y)1167     SI F32 min(F32   x, float y) { return x->min(x,y); }
min(float x,F32 y)1168     SI F32 min(float x, F32   y) { return y->min(x,y); }
1169 
max(F32 x,F32 y)1170     SI F32 max(F32   x, F32   y) { return x->max(x,y); }
max(F32 x,float y)1171     SI F32 max(F32   x, float y) { return x->max(x,y); }
max(float x,F32 y)1172     SI F32 max(float x, F32   y) { return y->max(x,y); }
1173 
1174     SI I32 operator==(F32   x, F32   y) { return x->eq(x,y); }
1175     SI I32 operator==(F32   x, float y) { return x->eq(x,y); }
1176     SI I32 operator==(float x, F32   y) { return y->eq(x,y); }
1177 
1178     SI I32 operator!=(F32   x, F32   y) { return x->neq(x,y); }
1179     SI I32 operator!=(F32   x, float y) { return x->neq(x,y); }
1180     SI I32 operator!=(float x, F32   y) { return y->neq(x,y); }
1181 
1182     SI I32 operator< (F32   x, F32   y) { return x->lt(x,y); }
1183     SI I32 operator< (F32   x, float y) { return x->lt(x,y); }
1184     SI I32 operator< (float x, F32   y) { return y->lt(x,y); }
1185 
1186     SI I32 operator<=(F32   x, F32   y) { return x->lte(x,y); }
1187     SI I32 operator<=(F32   x, float y) { return x->lte(x,y); }
1188     SI I32 operator<=(float x, F32   y) { return y->lte(x,y); }
1189 
1190     SI I32 operator> (F32   x, F32   y) { return x->gt(x,y); }
1191     SI I32 operator> (F32   x, float y) { return x->gt(x,y); }
1192     SI I32 operator> (float x, F32   y) { return y->gt(x,y); }
1193 
1194     SI I32 operator>=(F32   x, F32   y) { return x->gte(x,y); }
1195     SI I32 operator>=(F32   x, float y) { return x->gte(x,y); }
1196     SI I32 operator>=(float x, F32   y) { return y->gte(x,y); }
1197 
1198     SI I32& operator+=(I32& x, I32 y) { return (x = x + y); }
1199     SI I32& operator+=(I32& x, int y) { return (x = x + y); }
1200 
1201     SI I32& operator-=(I32& x, I32 y) { return (x = x - y); }
1202     SI I32& operator-=(I32& x, int y) { return (x = x - y); }
1203 
1204     SI I32& operator*=(I32& x, I32 y) { return (x = x * y); }
1205     SI I32& operator*=(I32& x, int y) { return (x = x * y); }
1206 
1207     SI F32& operator+=(F32& x, F32   y) { return (x = x + y); }
1208     SI F32& operator+=(F32& x, float y) { return (x = x + y); }
1209 
1210     SI F32& operator-=(F32& x, F32   y) { return (x = x - y); }
1211     SI F32& operator-=(F32& x, float y) { return (x = x - y); }
1212 
1213     SI F32& operator*=(F32& x, F32   y) { return (x = x * y); }
1214     SI F32& operator*=(F32& x, float y) { return (x = x * y); }
1215 
1216     SI F32& operator/=(F32& x, F32   y) { return (x = x / y); }
1217 
assert_true(I32 cond,I32 debug)1218     SI void assert_true(I32 cond, I32 debug) { cond->assert_true(cond,debug); }
assert_true(I32 cond,F32 debug)1219     SI void assert_true(I32 cond, F32 debug) { cond->assert_true(cond,debug); }
assert_true(I32 cond)1220     SI void assert_true(I32 cond)            { cond->assert_true(cond); }
1221 
store8(Ptr ptr,I32 val)1222     SI void store8  (Ptr ptr, I32 val)                    { val->store8  (ptr, val); }
store16(Ptr ptr,I32 val)1223     SI void store16 (Ptr ptr, I32 val)                    { val->store16 (ptr, val); }
store32(Ptr ptr,I32 val)1224     SI void store32 (Ptr ptr, I32 val)                    { val->store32 (ptr, val); }
storeF(Ptr ptr,F32 val)1225     SI void storeF  (Ptr ptr, F32 val)                    { val->storeF  (ptr, val); }
store64(Ptr ptr,I32 lo,I32 hi)1226     SI void store64 (Ptr ptr, I32 lo, I32 hi)             { lo ->store64 (ptr, lo,hi); }
store128(Ptr ptr,I32 x,I32 y,I32 z,I32 w)1227     SI void store128(Ptr ptr, I32 x, I32 y, I32 z, I32 w) { x  ->store128(ptr, x,y,z,w); }
1228 
gather8(UPtr ptr,int off,I32 ix)1229     SI I32 gather8 (UPtr ptr, int off, I32 ix) { return ix->gather8 (ptr, off, ix); }
gather16(UPtr ptr,int off,I32 ix)1230     SI I32 gather16(UPtr ptr, int off, I32 ix) { return ix->gather16(ptr, off, ix); }
gather32(UPtr ptr,int off,I32 ix)1231     SI I32 gather32(UPtr ptr, int off, I32 ix) { return ix->gather32(ptr, off, ix); }
gatherF(UPtr ptr,int off,I32 ix)1232     SI F32 gatherF (UPtr ptr, int off, I32 ix) { return ix->gatherF (ptr, off, ix); }
1233 
gather8(Uniform u,I32 ix)1234     SI I32 gather8 (Uniform u, I32 ix) { return ix->gather8 (u, ix); }
gather16(Uniform u,I32 ix)1235     SI I32 gather16(Uniform u, I32 ix) { return ix->gather16(u, ix); }
gather32(Uniform u,I32 ix)1236     SI I32 gather32(Uniform u, I32 ix) { return ix->gather32(u, ix); }
gatherF(Uniform u,I32 ix)1237     SI F32 gatherF (Uniform u, I32 ix) { return ix->gatherF (u, ix); }
1238 
sqrt(F32 x)1239     SI F32        sqrt(F32 x) { return x->       sqrt(x); }
approx_log2(F32 x)1240     SI F32 approx_log2(F32 x) { return x->approx_log2(x); }
approx_pow2(F32 x)1241     SI F32 approx_pow2(F32 x) { return x->approx_pow2(x); }
approx_log(F32 x)1242     SI F32 approx_log (F32 x) { return x->approx_log (x); }
approx_exp(F32 x)1243     SI F32 approx_exp (F32 x) { return x->approx_exp (x); }
1244 
approx_powf(F32 base,F32 exp)1245     SI F32 approx_powf(F32   base, F32   exp) { return base->approx_powf(base, exp); }
approx_powf(F32 base,float exp)1246     SI F32 approx_powf(F32   base, float exp) { return base->approx_powf(base, exp); }
approx_powf(float base,F32 exp)1247     SI F32 approx_powf(float base, F32   exp) { return  exp->approx_powf(base, exp); }
1248 
approx_sin(F32 radians)1249     SI F32 approx_sin(F32 radians) { return radians->approx_sin(radians); }
approx_cos(F32 radians)1250     SI F32 approx_cos(F32 radians) { return radians->approx_cos(radians); }
approx_tan(F32 radians)1251     SI F32 approx_tan(F32 radians) { return radians->approx_tan(radians); }
1252 
approx_asin(F32 x)1253     SI F32 approx_asin(F32 x) { return x->approx_asin(x); }
approx_acos(F32 x)1254     SI F32 approx_acos(F32 x) { return x->approx_acos(x); }
approx_atan(F32 x)1255     SI F32 approx_atan(F32 x) { return x->approx_atan(x); }
approx_atan2(F32 y,F32 x)1256     SI F32 approx_atan2(F32 y, F32 x) { return x->approx_atan2(y, x); }
1257 
clamp01(F32 x)1258     SI F32   clamp01(F32 x) { return x->  clamp01(x); }
abs(F32 x)1259     SI F32       abs(F32 x) { return x->      abs(x); }
ceil(F32 x)1260     SI F32      ceil(F32 x) { return x->     ceil(x); }
fract(F32 x)1261     SI F32     fract(F32 x) { return x->    fract(x); }
floor(F32 x)1262     SI F32     floor(F32 x) { return x->    floor(x); }
is_NaN(F32 x)1263     SI I32    is_NaN(F32 x) { return x->   is_NaN(x); }
is_finite(F32 x)1264     SI I32 is_finite(F32 x) { return x->is_finite(x); }
1265 
trunc(F32 x)1266     SI I32      trunc(F32 x) { return x->      trunc(x); }
round(F32 x)1267     SI I32      round(F32 x) { return x->      round(x); }
pun_to_I32(F32 x)1268     SI I32 pun_to_I32(F32 x) { return x-> pun_to_I32(x); }
pun_to_F32(I32 x)1269     SI F32 pun_to_F32(I32 x) { return x-> pun_to_F32(x); }
to_F32(I32 x)1270     SI F32     to_F32(I32 x) { return x->     to_F32(x); }
to_fp16(F32 x)1271     SI I32    to_fp16(F32 x) { return x->    to_fp16(x); }
from_fp16(I32 x)1272     SI F32  from_fp16(I32 x) { return x->  from_fp16(x); }
1273 
lerp(F32 lo,F32 hi,F32 t)1274     SI F32 lerp(F32   lo, F32   hi, F32   t) { return lo->lerp(lo,hi,t); }
lerp(F32 lo,F32 hi,float t)1275     SI F32 lerp(F32   lo, F32   hi, float t) { return lo->lerp(lo,hi,t); }
lerp(F32 lo,float hi,F32 t)1276     SI F32 lerp(F32   lo, float hi, F32   t) { return lo->lerp(lo,hi,t); }
lerp(F32 lo,float hi,float t)1277     SI F32 lerp(F32   lo, float hi, float t) { return lo->lerp(lo,hi,t); }
lerp(float lo,F32 hi,F32 t)1278     SI F32 lerp(float lo, F32   hi, F32   t) { return hi->lerp(lo,hi,t); }
lerp(float lo,F32 hi,float t)1279     SI F32 lerp(float lo, F32   hi, float t) { return hi->lerp(lo,hi,t); }
lerp(float lo,float hi,F32 t)1280     SI F32 lerp(float lo, float hi, F32   t) { return  t->lerp(lo,hi,t); }
1281 
clamp(F32 x,F32 lo,F32 hi)1282     SI F32 clamp(F32   x, F32   lo, F32   hi) { return  x->clamp(x,lo,hi); }
clamp(F32 x,F32 lo,float hi)1283     SI F32 clamp(F32   x, F32   lo, float hi) { return  x->clamp(x,lo,hi); }
clamp(F32 x,float lo,F32 hi)1284     SI F32 clamp(F32   x, float lo, F32   hi) { return  x->clamp(x,lo,hi); }
clamp(F32 x,float lo,float hi)1285     SI F32 clamp(F32   x, float lo, float hi) { return  x->clamp(x,lo,hi); }
clamp(float x,F32 lo,F32 hi)1286     SI F32 clamp(float x, F32   lo, F32   hi) { return lo->clamp(x,lo,hi); }
clamp(float x,F32 lo,float hi)1287     SI F32 clamp(float x, F32   lo, float hi) { return lo->clamp(x,lo,hi); }
clamp(float x,float lo,F32 hi)1288     SI F32 clamp(float x, float lo, F32   hi) { return hi->clamp(x,lo,hi); }
1289 
1290     SI I32 operator<<(I32 x, int bits) { return x->shl(x, bits); }
shl(I32 x,int bits)1291     SI I32        shl(I32 x, int bits) { return x->shl(x, bits); }
shr(I32 x,int bits)1292     SI I32        shr(I32 x, int bits) { return x->shr(x, bits); }
sra(I32 x,int bits)1293     SI I32        sra(I32 x, int bits) { return x->sra(x, bits); }
1294 
1295     SI I32 operator&(I32 x, I32 y) { return x->bit_and(x,y); }
1296     SI I32 operator&(I32 x, int y) { return x->bit_and(x,y); }
1297     SI I32 operator&(int x, I32 y) { return y->bit_and(x,y); }
1298 
1299     SI I32 operator|(I32 x, I32 y) { return x->bit_or (x,y); }
1300     SI I32 operator|(I32 x, int y) { return x->bit_or (x,y); }
1301     SI I32 operator|(int x, I32 y) { return y->bit_or (x,y); }
1302 
1303     SI I32 operator^(I32 x, I32 y) { return x->bit_xor(x,y); }
1304     SI I32 operator^(I32 x, int y) { return x->bit_xor(x,y); }
1305     SI I32 operator^(int x, I32 y) { return y->bit_xor(x,y); }
1306 
1307     SI I32& operator&=(I32& x, I32 y) { return (x = x & y); }
1308     SI I32& operator&=(I32& x, int y) { return (x = x & y); }
1309     SI I32& operator|=(I32& x, I32 y) { return (x = x | y); }
1310     SI I32& operator|=(I32& x, int y) { return (x = x | y); }
1311     SI I32& operator^=(I32& x, I32 y) { return (x = x ^ y); }
1312     SI I32& operator^=(I32& x, int y) { return (x = x ^ y); }
1313 
bit_clear(I32 x,I32 y)1314     SI I32 bit_clear(I32 x, I32 y) { return x->bit_clear(x,y); }
bit_clear(I32 x,int y)1315     SI I32 bit_clear(I32 x, int y) { return x->bit_clear(x,y); }
bit_clear(int x,I32 y)1316     SI I32 bit_clear(int x, I32 y) { return y->bit_clear(x,y); }
1317 
select(I32 c,I32 t,I32 f)1318     SI I32 select(I32 c, I32 t, I32 f) { return c->select(c,          t ,          f ); }
select(I32 c,I32 t,int f)1319     SI I32 select(I32 c, I32 t, int f) { return c->select(c,          t , c->splat(f)); }
select(I32 c,int t,I32 f)1320     SI I32 select(I32 c, int t, I32 f) { return c->select(c, c->splat(t),          f ); }
select(I32 c,int t,int f)1321     SI I32 select(I32 c, int t, int f) { return c->select(c, c->splat(t), c->splat(f)); }
1322 
select(I32 c,F32 t,F32 f)1323     SI F32 select(I32 c, F32   t, F32   f) { return c->select(c,          t ,          f ); }
select(I32 c,F32 t,float f)1324     SI F32 select(I32 c, F32   t, float f) { return c->select(c,          t , c->splat(f)); }
select(I32 c,float t,F32 f)1325     SI F32 select(I32 c, float t, F32   f) { return c->select(c, c->splat(t),          f ); }
select(I32 c,float t,float f)1326     SI F32 select(I32 c, float t, float f) { return c->select(c, c->splat(t), c->splat(f)); }
1327 
extract(I32 x,int bits,I32 z)1328     SI I32 extract(I32 x, int bits, I32 z) { return x->extract(x,bits,z); }
extract(I32 x,int bits,int z)1329     SI I32 extract(I32 x, int bits, int z) { return x->extract(x,bits,z); }
extract(int x,int bits,I32 z)1330     SI I32 extract(int x, int bits, I32 z) { return z->extract(x,bits,z); }
1331 
pack(I32 x,I32 y,int bits)1332     SI I32 pack(I32 x, I32 y, int bits) { return x->pack   (x,y,bits); }
pack(I32 x,int y,int bits)1333     SI I32 pack(I32 x, int y, int bits) { return x->pack   (x,y,bits); }
pack(int x,I32 y,int bits)1334     SI I32 pack(int x, I32 y, int bits) { return y->pack   (x,y,bits); }
1335 
1336     SI I32 operator~(I32 x) { return ~0 ^ x; }
1337     SI I32 operator-(I32 x) { return  0 - x; }
1338     SI F32 operator-(F32 x) { return 0.0f - x; }
1339 
from_unorm(int bits,I32 x)1340     SI F32 from_unorm(int bits, I32 x) { return x->from_unorm(bits,x); }
to_unorm(int bits,F32 x)1341     SI I32   to_unorm(int bits, F32 x) { return x->  to_unorm(bits,x); }
1342 
store(PixelFormat f,Ptr p,Color c)1343     SI void store(PixelFormat f, Ptr p, Color c) { return c->store(f,p,c); }
1344 
gather(PixelFormat f,UPtr p,int off,I32 ix)1345     SI Color gather(PixelFormat f, UPtr p, int off, I32 ix) { return ix->gather(f,p,off,ix); }
gather(PixelFormat f,Uniform u,I32 ix)1346     SI Color gather(PixelFormat f, Uniform u     , I32 ix)  { return ix->gather(f,u,ix); }
1347 
premul(F32 * r,F32 * g,F32 * b,F32 a)1348     SI void   premul(F32* r, F32* g, F32* b, F32 a) { a->  premul(r,g,b,a); }
unpremul(F32 * r,F32 * g,F32 * b,F32 a)1349     SI void unpremul(F32* r, F32* g, F32* b, F32 a) { a->unpremul(r,g,b,a); }
1350 
premul(Color c)1351     SI Color   premul(Color c) { return c->  premul(c); }
unpremul(Color c)1352     SI Color unpremul(Color c) { return c->unpremul(c); }
1353 
lerp(Color lo,Color hi,F32 t)1354     SI Color lerp(Color lo, Color hi, F32 t) { return t->lerp(lo,hi,t); }
1355 
blend(SkBlendMode m,Color s,Color d)1356     SI Color blend(SkBlendMode m, Color s, Color d) { return s->blend(m,s,d); }
1357 
clamp01(Color c)1358     SI Color clamp01(Color c) { return c->clamp01(c); }
1359 
to_hsla(Color c)1360     SI HSLA  to_hsla(Color c) { return c->to_hsla(c); }
to_rgba(HSLA c)1361     SI Color to_rgba(HSLA  c) { return c->to_rgba(c); }
1362 
1363     // Evaluate polynomials: ax^n + bx^(n-1) + ... for n >= 1
1364     template <typename F32_or_float, typename... Rest>
poly(F32 x,F32_or_float a,float b,Rest...rest)1365     SI F32 poly(F32 x, F32_or_float a, float b, Rest... rest) {
1366         if constexpr (sizeof...(rest) == 0) {
1367             return x*a+b;
1368         } else {
1369             return poly(x, x*a+b, rest...);
1370         }
1371     }
1372 #undef SI
1373 }  // namespace skvm
1374 
1375 #endif//SkVM_DEFINED
1376