1 /* 2 * Copyright 2019 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef SkVM_DEFINED 9 #define SkVM_DEFINED 10 11 #include "include/core/SkBlendMode.h" 12 #include "include/core/SkColor.h" 13 #include "include/core/SkSpan.h" 14 #include "include/private/SkMacros.h" 15 #include "include/private/SkTArray.h" 16 #include "include/private/SkTHash.h" 17 #include "src/core/SkVM_fwd.h" 18 #include <vector> // std::vector 19 20 class SkWStream; 21 22 #if defined(SKVM_JIT_WHEN_POSSIBLE) && !defined(SK_BUILD_FOR_IOS) 23 #if defined(__x86_64__) || defined(_M_X64) 24 #if defined(_WIN32) || defined(__linux) || defined(__APPLE__) 25 #define SKVM_JIT 26 #endif 27 #endif 28 #if defined(__aarch64__) 29 #if defined(__ANDROID__) || defined(__APPLE__) 30 #define SKVM_JIT 31 #endif 32 #endif 33 #endif 34 35 #if 0 36 #define SKVM_LLVM 37 #endif 38 39 #if 0 40 #undef SKVM_JIT 41 #endif 42 43 namespace skvm { 44 45 namespace viz { 46 class Visualizer; 47 } 48 49 class Assembler { 50 public: 51 explicit Assembler(void* buf); 52 53 size_t size() const; 54 55 // Order matters... GP64, Xmm, Ymm values match 4-bit register encoding for each. 56 enum GP64 { 57 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, 58 r8 , r9 , r10, r11, r12, r13, r14, r15, 59 }; 60 enum Xmm { 61 xmm0, xmm1, xmm2 , xmm3 , xmm4 , xmm5 , xmm6 , xmm7 , 62 xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, 63 }; 64 enum Ymm { 65 ymm0, ymm1, ymm2 , ymm3 , ymm4 , ymm5 , ymm6 , ymm7 , 66 ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15, 67 }; 68 69 // X and V values match 5-bit encoding for each (nothing tricky). 70 enum X { 71 x0 , x1 , x2 , x3 , x4 , x5 , x6 , x7 , 72 x8 , x9 , x10, x11, x12, x13, x14, x15, 73 x16, x17, x18, x19, x20, x21, x22, x23, 74 x24, x25, x26, x27, x28, x29, x30, xzr, sp=xzr, 75 }; 76 enum V { 77 v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 , 78 v8 , v9 , v10, v11, v12, v13, v14, v15, 79 v16, v17, v18, v19, v20, v21, v22, v23, 80 v24, v25, v26, v27, v28, v29, v30, v31, 81 }; 82 83 void bytes(const void*, int); 84 void byte(uint8_t); 85 void word(uint32_t); 86 87 struct Label { 88 int offset = 0; 89 enum { NotYetSet, ARMDisp19, X86Disp32 } kind = NotYetSet; 90 SkSTArray<2, int> references; 91 }; 92 93 // x86-64 94 95 void align(int mod); 96 97 void int3(); 98 void vzeroupper(); 99 void ret(); 100 101 // Mem represents a value at base + disp + scale*index, 102 // or simply at base + disp if index=rsp. 103 enum Scale { ONE, TWO, FOUR, EIGHT }; 104 struct Mem { 105 GP64 base; 106 int disp = 0; 107 GP64 index = rsp; 108 Scale scale = ONE; 109 }; 110 111 struct Operand { 112 union { 113 int reg; 114 Mem mem; 115 Label* label; 116 }; 117 enum { REG, MEM, LABEL } kind; 118 OperandOperand119 Operand(GP64 r) : reg (r), kind(REG ) {} OperandOperand120 Operand(Xmm r) : reg (r), kind(REG ) {} OperandOperand121 Operand(Ymm r) : reg (r), kind(REG ) {} OperandOperand122 Operand(Mem m) : mem (m), kind(MEM ) {} OperandOperand123 Operand(Label* l) : label(l), kind(LABEL) {} 124 }; 125 126 void vpand (Ymm dst, Ymm x, Operand y); 127 void vpandn(Ymm dst, Ymm x, Operand y); 128 void vpor (Ymm dst, Ymm x, Operand y); 129 void vpxor (Ymm dst, Ymm x, Operand y); 130 131 void vpaddd (Ymm dst, Ymm x, Operand y); 132 void vpsubd (Ymm dst, Ymm x, Operand y); 133 void vpmulld(Ymm dst, Ymm x, Operand y); 134 135 void vpaddw (Ymm dst, Ymm x, Operand y); 136 void vpsubw (Ymm dst, Ymm x, Operand y); 137 void vpmullw (Ymm dst, Ymm x, Operand y); 138 139 void vpabsw (Ymm dst, Operand x); 140 void vpavgw (Ymm dst, Ymm x, Operand y); // dst = (x+y+1)>>1, unsigned. 141 void vpmulhrsw(Ymm dst, Ymm x, Operand y); // dst = (x*y + (1<<14)) >> 15, signed. 142 void vpminsw (Ymm dst, Ymm x, Operand y); 143 void vpminuw (Ymm dst, Ymm x, Operand y); 144 void vpmaxsw (Ymm dst, Ymm x, Operand y); 145 void vpmaxuw (Ymm dst, Ymm x, Operand y); 146 147 void vaddps(Ymm dst, Ymm x, Operand y); 148 void vsubps(Ymm dst, Ymm x, Operand y); 149 void vmulps(Ymm dst, Ymm x, Operand y); 150 void vdivps(Ymm dst, Ymm x, Operand y); 151 void vminps(Ymm dst, Ymm x, Operand y); 152 void vmaxps(Ymm dst, Ymm x, Operand y); 153 154 void vsqrtps(Ymm dst, Operand x); 155 156 void vfmadd132ps(Ymm dst, Ymm x, Operand y); 157 void vfmadd213ps(Ymm dst, Ymm x, Operand y); 158 void vfmadd231ps(Ymm dst, Ymm x, Operand y); 159 160 void vfmsub132ps(Ymm dst, Ymm x, Operand y); 161 void vfmsub213ps(Ymm dst, Ymm x, Operand y); 162 void vfmsub231ps(Ymm dst, Ymm x, Operand y); 163 164 void vfnmadd132ps(Ymm dst, Ymm x, Operand y); 165 void vfnmadd213ps(Ymm dst, Ymm x, Operand y); 166 void vfnmadd231ps(Ymm dst, Ymm x, Operand y); 167 168 void vpackusdw(Ymm dst, Ymm x, Operand y); 169 void vpackuswb(Ymm dst, Ymm x, Operand y); 170 171 void vpunpckldq(Ymm dst, Ymm x, Operand y); 172 void vpunpckhdq(Ymm dst, Ymm x, Operand y); 173 174 void vpcmpeqd(Ymm dst, Ymm x, Operand y); 175 void vpcmpgtd(Ymm dst, Ymm x, Operand y); 176 void vpcmpeqw(Ymm dst, Ymm x, Operand y); 177 void vpcmpgtw(Ymm dst, Ymm x, Operand y); 178 179 void vcmpps (Ymm dst, Ymm x, Operand y, int imm); vcmpeqps(Ymm dst,Ymm x,Operand y)180 void vcmpeqps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,0); } vcmpltps(Ymm dst,Ymm x,Operand y)181 void vcmpltps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,1); } vcmpleps(Ymm dst,Ymm x,Operand y)182 void vcmpleps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,2); } vcmpneqps(Ymm dst,Ymm x,Operand y)183 void vcmpneqps(Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,4); } 184 185 // Sadly, the x parameter cannot be a general Operand for these shifts. 186 void vpslld(Ymm dst, Ymm x, int imm); 187 void vpsrld(Ymm dst, Ymm x, int imm); 188 void vpsrad(Ymm dst, Ymm x, int imm); 189 190 void vpsllw(Ymm dst, Ymm x, int imm); 191 void vpsrlw(Ymm dst, Ymm x, int imm); 192 void vpsraw(Ymm dst, Ymm x, int imm); 193 194 void vpermq (Ymm dst, Operand x, int imm); 195 void vperm2f128(Ymm dst, Ymm x, Operand y, int imm); 196 void vpermps (Ymm dst, Ymm ix, Operand src); // dst[i] = src[ix[i]] 197 198 enum Rounding { NEAREST, FLOOR, CEIL, TRUNC, CURRENT }; 199 void vroundps(Ymm dst, Operand x, Rounding); 200 201 void vmovdqa(Ymm dst, Operand x); 202 void vmovups(Ymm dst, Operand x); 203 void vmovups(Xmm dst, Operand x); 204 void vmovups(Operand dst, Ymm x); 205 void vmovups(Operand dst, Xmm x); 206 207 void vcvtdq2ps (Ymm dst, Operand x); 208 void vcvttps2dq(Ymm dst, Operand x); 209 void vcvtps2dq (Ymm dst, Operand x); 210 211 void vcvtps2ph(Operand dst, Ymm x, Rounding); 212 void vcvtph2ps(Ymm dst, Operand x); 213 214 void vpblendvb(Ymm dst, Ymm x, Operand y, Ymm z); 215 216 void vpshufb(Ymm dst, Ymm x, Operand y); 217 218 void vptest(Ymm x, Operand y); 219 220 void vbroadcastss(Ymm dst, Operand y); 221 222 void vpmovzxwd(Ymm dst, Operand src); // dst = src, 128-bit, uint16_t -> int 223 void vpmovzxbd(Ymm dst, Operand src); // dst = src, 64-bit, uint8_t -> int 224 225 void vmovq(Operand dst, Xmm src); // dst = src, 64-bit 226 void vmovd(Operand dst, Xmm src); // dst = src, 32-bit 227 void vmovd(Xmm dst, Operand src); // dst = src, 32-bit 228 229 void vpinsrd(Xmm dst, Xmm src, Operand y, int imm); // dst = src; dst[imm] = y, 32-bit 230 void vpinsrw(Xmm dst, Xmm src, Operand y, int imm); // dst = src; dst[imm] = y, 16-bit 231 void vpinsrb(Xmm dst, Xmm src, Operand y, int imm); // dst = src; dst[imm] = y, 8-bit 232 233 void vextracti128(Operand dst, Ymm src, int imm); // dst = src[imm], 128-bit 234 void vpextrd (Operand dst, Xmm src, int imm); // dst = src[imm], 32-bit 235 void vpextrw (Operand dst, Xmm src, int imm); // dst = src[imm], 16-bit 236 void vpextrb (Operand dst, Xmm src, int imm); // dst = src[imm], 8-bit 237 238 // if (mask & 0x8000'0000) { 239 // dst = base[scale*ix]; 240 // } 241 // mask = 0; 242 void vgatherdps(Ymm dst, Scale scale, Ymm ix, GP64 base, Ymm mask); 243 244 245 void label(Label*); 246 247 void jmp(Label*); 248 void je (Label*); 249 void jne(Label*); 250 void jl (Label*); 251 void jc (Label*); 252 253 void add (Operand dst, int imm); 254 void sub (Operand dst, int imm); 255 void cmp (Operand dst, int imm); 256 void mov (Operand dst, int imm); 257 void movb(Operand dst, int imm); 258 259 void add (Operand dst, GP64 x); 260 void sub (Operand dst, GP64 x); 261 void cmp (Operand dst, GP64 x); 262 void mov (Operand dst, GP64 x); 263 void movb(Operand dst, GP64 x); 264 265 void add (GP64 dst, Operand x); 266 void sub (GP64 dst, Operand x); 267 void cmp (GP64 dst, Operand x); 268 void mov (GP64 dst, Operand x); 269 void movb(GP64 dst, Operand x); 270 271 // Disambiguators... choice is arbitrary (but generates different code!). add(GP64 dst,GP64 x)272 void add (GP64 dst, GP64 x) { this->add (Operand(dst), x); } sub(GP64 dst,GP64 x)273 void sub (GP64 dst, GP64 x) { this->sub (Operand(dst), x); } cmp(GP64 dst,GP64 x)274 void cmp (GP64 dst, GP64 x) { this->cmp (Operand(dst), x); } mov(GP64 dst,GP64 x)275 void mov (GP64 dst, GP64 x) { this->mov (Operand(dst), x); } movb(GP64 dst,GP64 x)276 void movb(GP64 dst, GP64 x) { this->movb(Operand(dst), x); } 277 278 void movzbq(GP64 dst, Operand x); // dst = x, uint8_t -> int 279 void movzwq(GP64 dst, Operand x); // dst = x, uint16_t -> int 280 281 // aarch64 282 283 // d = op(n,m) 284 using DOpNM = void(V d, V n, V m); 285 DOpNM and16b, orr16b, eor16b, bic16b, bsl16b, 286 add4s, sub4s, mul4s, 287 cmeq4s, cmgt4s, 288 sub8h, mul8h, 289 fadd4s, fsub4s, fmul4s, fdiv4s, fmin4s, fmax4s, 290 fcmeq4s, fcmgt4s, fcmge4s, 291 tbl, 292 uzp14s, uzp24s, 293 zip14s, zip24s; 294 295 // TODO: there are also float ==,<,<=,>,>= instructions with an immediate 0.0f, 296 // and the register comparison > and >= can also compare absolute values. Interesting. 297 298 // d += n*m 299 void fmla4s(V d, V n, V m); 300 301 // d -= n*m 302 void fmls4s(V d, V n, V m); 303 304 // d = op(n,imm) 305 using DOpNImm = void(V d, V n, int imm); 306 DOpNImm sli4s, 307 shl4s, sshr4s, ushr4s, 308 ushr8h; 309 310 // d = op(n) 311 using DOpN = void(V d, V n); 312 DOpN not16b, // d = ~n 313 fneg4s, // d = -n 314 fsqrt4s, // d = sqrtf(n) 315 scvtf4s, // int -> float 316 fcvtzs4s, // truncate float -> int 317 fcvtns4s, // round float -> int (nearest even) 318 frintp4s, // round float -> int as float, toward plus infinity (ceil) 319 frintm4s, // round float -> int as float, toward minus infinity (floor) 320 fcvtn, // f32 -> f16 in low half 321 fcvtl, // f16 in low half -> f32 322 xtns2h, // u32 -> u16 323 xtnh2b, // u16 -> u8 324 uxtlb2h, // u8 -> u16 (TODO: this is a special case of ushll.8h) 325 uxtlh2s, // u16 -> u32 (TODO: this is a special case of ushll.4s) 326 uminv4s; // dst[0] = min(n[0],n[1],n[2],n[3]), n as unsigned 327 328 void brk (int imm16); 329 void ret (X); 330 void add (X d, X n, int imm12); 331 void sub (X d, X n, int imm12); 332 void subs(X d, X n, int imm12); // subtract setting condition flags 333 334 enum Shift { LSL,LSR,ASR,ROR }; 335 void add (X d, X n, X m, Shift=LSL, int imm6=0); // d=n+Shift(m,imm6), for Shift != ROR. 336 337 // There's another encoding for unconditional branches that can jump further, 338 // but this one encoded as b.al is simple to implement and should be fine. b(Label * l)339 void b (Label* l) { this->b(Condition::al, l); } bne(Label * l)340 void bne(Label* l) { this->b(Condition::ne, l); } blt(Label * l)341 void blt(Label* l) { this->b(Condition::lt, l); } 342 343 // "cmp ..." is just an assembler mnemonic for "subs xzr, ..."! cmp(X n,int imm12)344 void cmp(X n, int imm12) { this->subs(xzr, n, imm12); } 345 346 // Compare and branch if zero/non-zero, as if 347 // cmp(t,0) 348 // beq/bne(l) 349 // but without setting condition flags. 350 void cbz (X t, Label* l); 351 void cbnz(X t, Label* l); 352 353 // TODO: there are ldur variants with unscaled imm, useful? 354 void ldrd(X dst, X src, int imm12=0); // 64-bit dst = *(src+imm12*8) 355 void ldrs(X dst, X src, int imm12=0); // 32-bit dst = *(src+imm12*4) 356 void ldrh(X dst, X src, int imm12=0); // 16-bit dst = *(src+imm12*2) 357 void ldrb(X dst, X src, int imm12=0); // 8-bit dst = *(src+imm12) 358 359 void ldrq(V dst, Label*); // 128-bit PC-relative load 360 361 void ldrq(V dst, X src, int imm12=0); // 128-bit dst = *(src+imm12*16) 362 void ldrd(V dst, X src, int imm12=0); // 64-bit dst = *(src+imm12*8) 363 void ldrs(V dst, X src, int imm12=0); // 32-bit dst = *(src+imm12*4) 364 void ldrh(V dst, X src, int imm12=0); // 16-bit dst = *(src+imm12*2) 365 void ldrb(V dst, X src, int imm12=0); // 8-bit dst = *(src+imm12) 366 367 void strs(X src, X dst, int imm12=0); // 32-bit *(dst+imm12*4) = src 368 369 void strq(V src, X dst, int imm12=0); // 128-bit *(dst+imm12*16) = src 370 void strd(V src, X dst, int imm12=0); // 64-bit *(dst+imm12*8) = src 371 void strs(V src, X dst, int imm12=0); // 32-bit *(dst+imm12*4) = src 372 void strh(V src, X dst, int imm12=0); // 16-bit *(dst+imm12*2) = src 373 void strb(V src, X dst, int imm12=0); // 8-bit *(dst+imm12) = src 374 375 void movs(X dst, V src, int lane); // dst = 32-bit src[lane] 376 void inss(V dst, X src, int lane); // dst[lane] = 32-bit src 377 378 void dup4s (V dst, X src); // Each 32-bit lane = src 379 380 void ld1r4s (V dst, X src); // Each 32-bit lane = *src 381 void ld1r8h (V dst, X src); // Each 16-bit lane = *src 382 void ld1r16b(V dst, X src); // Each 8-bit lane = *src 383 384 void ld24s(V dst, X src); // deinterleave(dst,dst+1) = 256-bit *src 385 void ld44s(V dst, X src); // deinterleave(dst,dst+1,dst+2,dst+3) = 512-bit *src 386 void st24s(V src, X dst); // 256-bit *dst = interleave_32bit_lanes(src,src+1) 387 void st44s(V src, X dst); // 512-bit *dst = interleave_32bit_lanes(src,src+1,src+2,src+3) 388 389 void ld24s(V dst, X src, int lane); // Load 2 32-bit values into given lane of dst..dst+1 390 void ld44s(V dst, X src, int lane); // Load 4 32-bit values into given lane of dst..dst+3 391 392 private: 393 uint8_t* fCode; 394 size_t fSize; 395 396 // x86-64 397 enum W { W0, W1 }; // Are the lanes 64-bit (W1) or default (W0)? Intel Vol 2A 2.3.5.5 398 enum L { L128, L256 }; // Is this a 128- or 256-bit operation? Intel Vol 2A 2.3.6.2 399 400 // Helpers for vector instructions. 401 void op(int prefix, int map, int opcode, int dst, int x, Operand y, W,L); 402 void op(int p, int m, int o, Ymm d, Ymm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L256); } 403 void op(int p, int m, int o, Ymm d, Operand y, W w=W0) { op(p,m,o, d,0,y,w,L256); } 404 void op(int p, int m, int o, Xmm d, Xmm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L128); } 405 void op(int p, int m, int o, Xmm d, Operand y, W w=W0) { op(p,m,o, d,0,y,w,L128); } 406 407 // Helpers for GP64 instructions. 408 void op(int opcode, Operand dst, GP64 x); 409 void op(int opcode, int opcode_ext, Operand dst, int imm); 410 411 void jump(uint8_t condition, Label*); 412 int disp32(Label*); 413 void imm_byte_after_operand(const Operand&, int byte); 414 415 // aarch64 416 417 // Opcode for 3-arguments ops is split between hi and lo: 418 // [11 bits hi] [5 bits m] [6 bits lo] [5 bits n] [5 bits d] 419 void op(uint32_t hi, V m, uint32_t lo, V n, V d); 420 421 // 0,1,2-argument ops, with or without an immediate: 422 // [ 22 bits op ] [5 bits n] [5 bits d] 423 // Any immediate falls in the middle somewhere overlapping with either op, n, or both. 424 void op(uint32_t op22, V n, V d, int imm=0); 425 void op(uint32_t op22, X n, V d, int imm=0) { this->op(op22,(V)n, d,imm); } 426 void op(uint32_t op22, V n, X d, int imm=0) { this->op(op22, n,(V)d,imm); } 427 void op(uint32_t op22, X n, X d, int imm=0) { this->op(op22,(V)n,(V)d,imm); } 428 void op(uint32_t op22, int imm=0) { this->op(op22,(V)0,(V)0,imm); } 429 // (1-argument ops don't seem to have a consistent convention of passing as n or d.) 430 431 432 // Order matters... value is 4-bit encoding for condition code. 433 enum class Condition { eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,al }; 434 void b(Condition, Label*); 435 int disp19(Label*); 436 }; 437 438 // Order matters a little: Ops <=store128 are treated as having side effects. 439 #define SKVM_OPS(M) \ 440 M(assert_true) \ 441 M(trace_line) M(trace_var) \ 442 M(trace_enter) M(trace_exit) M(trace_scope) \ 443 M(store8) M(store16) M(store32) M(store64) M(store128) \ 444 M(load8) M(load16) M(load32) M(load64) M(load128) \ 445 M(index) \ 446 M(gather8) M(gather16) M(gather32) \ 447 M(uniform32) \ 448 M(array32) \ 449 M(splat) \ 450 M(add_f32) M(add_i32) \ 451 M(sub_f32) M(sub_i32) \ 452 M(mul_f32) M(mul_i32) \ 453 M(div_f32) \ 454 M(min_f32) M(max_f32) \ 455 M(fma_f32) M(fms_f32) M(fnma_f32) \ 456 M(sqrt_f32) \ 457 M(shl_i32) M(shr_i32) M(sra_i32) \ 458 M(ceil) M(floor) M(trunc) M(round) M(to_fp16) M(from_fp16) \ 459 M(to_f32) \ 460 M(neq_f32) M(eq_f32) M(eq_i32) \ 461 M(gte_f32) M(gt_f32) M(gt_i32) \ 462 M(bit_and) M(bit_or) M(bit_xor) M(bit_clear) \ 463 M(select) \ 464 M(duplicate) 465 // End of SKVM_OPS 466 467 enum class Op : int { 468 #define M(op) op, 469 SKVM_OPS(M) 470 #undef M 471 }; 472 has_side_effect(Op op)473 static inline bool has_side_effect(Op op) { 474 return op <= Op::store128; 475 } touches_varying_memory(Op op)476 static inline bool touches_varying_memory(Op op) { 477 return Op::store8 <= op && op <= Op::load128; 478 } is_always_varying(Op op)479 static inline bool is_always_varying(Op op) { 480 return Op::store8 <= op && op <= Op::index; 481 } is_trace(Op op)482 static inline bool is_trace(Op op) { 483 return Op::trace_line <= op && op <= Op::trace_scope; 484 } 485 486 using Val = int; 487 // We reserve an impossibe Val ID as a sentinel 488 // NA meaning none, n/a, null, nil, etc. 489 static const Val NA = -1; 490 491 // Ptr and UPtr are an index into the registers args[]. The two styles of using args are 492 // varyings and uniforms. Varyings use Ptr, have a stride associated with them, and are 493 // evaluated everytime through the loop. Uniforms use UPtr, don't have a stride, and are 494 // usually hoisted above the loop. 495 struct Ptr { int ix; }; 496 struct UPtr : public Ptr {}; 497 498 bool operator!=(Ptr a, Ptr b); 499 500 struct I32 { 501 Builder* builder = nullptr; 502 Val id = NA; 503 explicit operator bool() const { return id != NA; } 504 Builder* operator->() const { return builder; } 505 }; 506 507 struct F32 { 508 Builder* builder = nullptr; 509 Val id = NA; 510 explicit operator bool() const { return id != NA; } 511 Builder* operator->() const { return builder; } 512 }; 513 514 struct Color { 515 F32 r,g,b,a; 516 explicit operator bool() const { return r && g && b && a; } 517 Builder* operator->() const { return a.operator->(); } 518 }; 519 520 struct HSLA { 521 F32 h,s,l,a; 522 explicit operator bool() const { return h && s && l && a; } 523 Builder* operator->() const { return a.operator->(); } 524 }; 525 526 struct Coord { 527 F32 x,y; 528 explicit operator bool() const { return x && y; } 529 Builder* operator->() const { return x.operator->(); } 530 }; 531 532 struct Uniform { 533 UPtr ptr; 534 int offset; 535 }; 536 struct Uniforms { 537 UPtr base; 538 std::vector<int> buf; 539 UniformsUniforms540 Uniforms(UPtr ptr, int init) : base(ptr), buf(init) {} 541 pushUniforms542 Uniform push(int val) { 543 buf.push_back(val); 544 return {base, (int)( sizeof(int)*(buf.size() - 1) )}; 545 } 546 pushFUniforms547 Uniform pushF(float val) { 548 int bits; 549 memcpy(&bits, &val, sizeof(int)); 550 return this->push(bits); 551 } 552 pushPtrUniforms553 Uniform pushPtr(const void* ptr) { 554 // Jam the pointer into 1 or 2 ints. 555 int ints[sizeof(ptr) / sizeof(int)]; 556 memcpy(ints, &ptr, sizeof(ptr)); 557 for (int bits : ints) { 558 buf.push_back(bits); 559 } 560 return {base, (int)( sizeof(int)*(buf.size() - SK_ARRAY_COUNT(ints)) )}; 561 } 562 pushArrayUniforms563 Uniform pushArray(int32_t a[]) { 564 return this->pushPtr(a); 565 } 566 pushArrayFUniforms567 Uniform pushArrayF(float a[]) { 568 return this->pushPtr(a); 569 } 570 }; 571 572 struct PixelFormat { 573 enum { UNORM, SRGB, FLOAT} encoding; 574 int r_bits, g_bits, b_bits, a_bits, 575 r_shift, g_shift, b_shift, a_shift; 576 }; 577 PixelFormat SkColorType_to_PixelFormat(SkColorType); 578 579 SK_BEGIN_REQUIRE_DENSE 580 struct Instruction { 581 Op op; // v* = op(x,y,z,w,immA,immB), where * == index of this Instruction. 582 Val x,y,z,w; // Enough arguments for Op::store128. 583 int immA,immB,immC; // Immediate bit pattern, shift count, pointer index, byte offset, etc. 584 }; 585 SK_END_REQUIRE_DENSE 586 587 bool operator==(const Instruction&, const Instruction&); 588 struct InstructionHash { 589 uint32_t operator()(const Instruction&, uint32_t seed=0) const; 590 }; 591 592 struct OptimizedInstruction { 593 Op op; 594 Val x,y,z,w; 595 int immA,immB,immC; 596 597 Val death; 598 bool can_hoist; 599 }; 600 601 struct Features { 602 bool fma = false; 603 bool fp16 = false; 604 }; 605 606 class TraceHook { 607 public: 608 virtual ~TraceHook() = default; 609 virtual void line(int lineNum) = 0; 610 virtual void var(int slot, int32_t val) = 0; 611 virtual void enter(int fnIdx) = 0; 612 virtual void exit(int fnIdx) = 0; 613 virtual void scope(int delta) = 0; 614 }; 615 616 class Builder { 617 public: 618 Builder(bool createDuplicates = false); 619 Builder(Features, bool createDuplicates = false); 620 621 Program done(const char* debug_name, 622 bool allow_jit, 623 std::unique_ptr<viz::Visualizer> visualizer) const; 624 Program done(const char* debug_name = nullptr, 625 bool allow_jit=true) const; 626 627 // Mostly for debugging, tests, etc. program()628 std::vector<Instruction> program() const { return fProgram; } 629 std::vector<OptimizedInstruction> optimize(viz::Visualizer* visualizer = nullptr) const; 630 631 // Returns a trace-hook ID which must be passed to the trace opcodes. 632 int attachTraceHook(TraceHook*); 633 634 // Convenience arg() wrappers for most common strides, sizeof(T) and 0. 635 template <typename T> varying()636 Ptr varying() { return this->arg(sizeof(T)); } varying(int stride)637 Ptr varying(int stride) { SkASSERT(stride > 0); return this->arg(stride); } uniform()638 UPtr uniform() { Ptr p = this->arg(0); return UPtr{{p.ix}}; } 639 640 // TODO: allow uniform (i.e. Ptr) offsets to store* and load*? 641 // TODO: sign extension (signed types) for <32-bit loads? 642 // TODO: unsigned integer operations where relevant (just comparisons?)? 643 644 // Assert cond is true, printing debug when not. 645 void assert_true(I32 cond, I32 debug); assert_true(I32 cond,F32 debug)646 void assert_true(I32 cond, F32 debug) { assert_true(cond, pun_to_I32(debug)); } assert_true(I32 cond)647 void assert_true(I32 cond) { assert_true(cond, cond); } 648 649 // Insert debug traces into the instruction stream 650 bool mergeMasks(I32& mask, I32& traceMask); 651 void trace_line (int traceHookID, I32 mask, I32 traceMask, int line); 652 void trace_var (int traceHookID, I32 mask, I32 traceMask, int slot, I32 val); 653 void trace_enter(int traceHookID, I32 mask, I32 traceMask, int fnIdx); 654 void trace_exit (int traceHookID, I32 mask, I32 traceMask, int fnIdx); 655 void trace_scope(int traceHookID, I32 mask, I32 traceMask, int delta); 656 657 // Store {8,16,32,64,128}-bit varying. 658 void store8 (Ptr ptr, I32 val); 659 void store16 (Ptr ptr, I32 val); 660 void store32 (Ptr ptr, I32 val); storeF(Ptr ptr,F32 val)661 void storeF (Ptr ptr, F32 val) { store32(ptr, pun_to_I32(val)); } 662 void store64 (Ptr ptr, I32 lo, I32 hi); // *ptr = lo|(hi<<32) 663 void store128(Ptr ptr, I32 x, I32 y, I32 z, I32 w); // *ptr = x|(y<<32)|(z<<64)|(w<<96) 664 665 // Returns varying {n, n-1, n-2, ..., 1}, where n is the argument to Program::eval(). 666 I32 index(); 667 668 // Load {8,16,32,64,128}-bit varying. 669 I32 load8 (Ptr ptr); 670 I32 load16 (Ptr ptr); 671 I32 load32 (Ptr ptr); loadF(Ptr ptr)672 F32 loadF (Ptr ptr) { return pun_to_F32(load32(ptr)); } 673 I32 load64 (Ptr ptr, int lane); // Load 32-bit lane 0-1 of 64-bit value. 674 I32 load128(Ptr ptr, int lane); // Load 32-bit lane 0-3 of 128-bit value. 675 676 // Load i32/f32 uniform with byte-count offset. 677 I32 uniform32(UPtr ptr, int offset); uniformF(UPtr ptr,int offset)678 F32 uniformF (UPtr ptr, int offset) { return pun_to_F32(uniform32(ptr,offset)); } 679 680 // Load i32/f32 uniform with byte-count offset and an c-style array index. The address of 681 // the element is (*(ptr + byte-count offset))[index]. 682 I32 array32 (UPtr ptr, int offset, int index); arrayF(UPtr ptr,int offset,int index)683 F32 arrayF (UPtr ptr, int offset, int index) { 684 return pun_to_F32(array32(ptr, offset, index)); 685 } 686 687 // Push and load this color as a uniform. 688 Color uniformColor(SkColor4f, Uniforms*); 689 690 // Gather u8,u16,i32 with varying element-count index from *(ptr + byte-count offset). 691 I32 gather8 (UPtr ptr, int offset, I32 index); 692 I32 gather16(UPtr ptr, int offset, I32 index); 693 I32 gather32(UPtr ptr, int offset, I32 index); gatherF(UPtr ptr,int offset,I32 index)694 F32 gatherF (UPtr ptr, int offset, I32 index) { 695 return pun_to_F32(gather32(ptr, offset, index)); 696 } 697 698 // Convenience methods for working with skvm::Uniform(s). uniform32(Uniform u)699 I32 uniform32(Uniform u) { return this->uniform32(u.ptr, u.offset); } uniformF(Uniform u)700 F32 uniformF (Uniform u) { return this->uniformF (u.ptr, u.offset); } gather8(Uniform u,I32 index)701 I32 gather8 (Uniform u, I32 index) { return this->gather8 (u.ptr, u.offset, index); } gather16(Uniform u,I32 index)702 I32 gather16 (Uniform u, I32 index) { return this->gather16 (u.ptr, u.offset, index); } gather32(Uniform u,I32 index)703 I32 gather32 (Uniform u, I32 index) { return this->gather32 (u.ptr, u.offset, index); } gatherF(Uniform u,I32 index)704 F32 gatherF (Uniform u, I32 index) { return this->gatherF (u.ptr, u.offset, index); } 705 706 // Convenience methods for working with array pointers in skvm::Uniforms. Index is an 707 // array index and not a byte offset. The array pointer is stored at u. array32(Uniform a,int index)708 I32 array32 (Uniform a, int index) { return this->array32 (a.ptr, a.offset, index); } arrayF(Uniform a,int index)709 F32 arrayF (Uniform a, int index) { return this->arrayF (a.ptr, a.offset, index); } 710 711 // Load an immediate constant. 712 I32 splat(int n); splat(unsigned u)713 I32 splat(unsigned u) { return splat((int)u); } splat(float f)714 F32 splat(float f) { 715 int bits; 716 memcpy(&bits, &f, 4); 717 return pun_to_F32(splat(bits)); 718 } 719 720 // Some operations make sense with immediate arguments, 721 // so we provide overloads inline to make that seamless. 722 // 723 // We omit overloads that may indicate a bug or performance issue. 724 // In general it does not make sense to pass immediates to unary operations, 725 // and even sometimes not for binary operations, e.g. 726 // 727 // div(x, y) -- normal every day divide 728 // div(3.0f, y) -- yep, makes sense 729 // div(x, 3.0f) -- omitted as a reminder you probably want mul(x, 1/3.0f). 730 // 731 // You can of course always splat() to override these opinions. 732 733 // float math, comparisons, etc. 734 F32 add(F32, F32); add(F32 x,float y)735 F32 add(F32 x, float y) { return add(x, splat(y)); } add(float x,F32 y)736 F32 add(float x, F32 y) { return add(splat(x), y); } 737 738 F32 sub(F32, F32); sub(F32 x,float y)739 F32 sub(F32 x, float y) { return sub(x, splat(y)); } sub(float x,F32 y)740 F32 sub(float x, F32 y) { return sub(splat(x), y); } 741 742 F32 mul(F32, F32); mul(F32 x,float y)743 F32 mul(F32 x, float y) { return mul(x, splat(y)); } mul(float x,F32 y)744 F32 mul(float x, F32 y) { return mul(splat(x), y); } 745 746 // mul(), but allowing optimizations not strictly legal under IEEE-754 rules. 747 F32 fast_mul(F32, F32); fast_mul(F32 x,float y)748 F32 fast_mul(F32 x, float y) { return fast_mul(x, splat(y)); } fast_mul(float x,F32 y)749 F32 fast_mul(float x, F32 y) { return fast_mul(splat(x), y); } 750 751 F32 div(F32, F32); div(float x,F32 y)752 F32 div(float x, F32 y) { return div(splat(x), y); } 753 754 F32 min(F32, F32); min(F32 x,float y)755 F32 min(F32 x, float y) { return min(x, splat(y)); } min(float x,F32 y)756 F32 min(float x, F32 y) { return min(splat(x), y); } 757 758 F32 max(F32, F32); max(F32 x,float y)759 F32 max(F32 x, float y) { return max(x, splat(y)); } max(float x,F32 y)760 F32 max(float x, F32 y) { return max(splat(x), y); } 761 762 // TODO: remove mad()? It's just sugar. mad(F32 x,F32 y,F32 z)763 F32 mad(F32 x, F32 y, F32 z) { return add(mul(x,y), z); } mad(F32 x,F32 y,float z)764 F32 mad(F32 x, F32 y, float z) { return mad( x , y , splat(z)); } mad(F32 x,float y,F32 z)765 F32 mad(F32 x, float y, F32 z) { return mad( x , splat(y), z ); } mad(F32 x,float y,float z)766 F32 mad(F32 x, float y, float z) { return mad( x , splat(y), splat(z)); } mad(float x,F32 y,F32 z)767 F32 mad(float x, F32 y, F32 z) { return mad(splat(x), y , z ); } mad(float x,F32 y,float z)768 F32 mad(float x, F32 y, float z) { return mad(splat(x), y , splat(z)); } mad(float x,float y,F32 z)769 F32 mad(float x, float y, F32 z) { return mad(splat(x), splat(y), z ); } 770 771 F32 sqrt(F32); 772 F32 approx_log2(F32); 773 F32 approx_pow2(F32); approx_log(F32 x)774 F32 approx_log (F32 x) { return mul(0.69314718f, approx_log2(x)); } approx_exp(F32 x)775 F32 approx_exp (F32 x) { return approx_pow2(mul(x, 1.4426950408889634074f)); } 776 777 F32 approx_powf(F32 base, F32 exp); approx_powf(F32 base,float exp)778 F32 approx_powf(F32 base, float exp) { return approx_powf(base, splat(exp)); } approx_powf(float base,F32 exp)779 F32 approx_powf(float base, F32 exp) { return approx_powf(splat(base), exp); } 780 781 782 F32 approx_sin(F32 radians); approx_cos(F32 radians)783 F32 approx_cos(F32 radians) { return approx_sin(add(radians, SK_ScalarPI/2)); } 784 F32 approx_tan(F32 radians); 785 786 F32 approx_asin(F32 x); approx_acos(F32 x)787 F32 approx_acos(F32 x) { return sub(SK_ScalarPI/2, approx_asin(x)); } 788 F32 approx_atan(F32 x); 789 F32 approx_atan2(F32 y, F32 x); 790 791 F32 lerp(F32 lo, F32 hi, F32 t); lerp(F32 lo,F32 hi,float t)792 F32 lerp(F32 lo, F32 hi, float t) { return lerp( lo , hi , splat(t)); } lerp(F32 lo,float hi,float t)793 F32 lerp(F32 lo, float hi, float t) { return lerp( lo , splat(hi), splat(t)); } lerp(F32 lo,float hi,F32 t)794 F32 lerp(F32 lo, float hi, F32 t) { return lerp( lo , splat(hi), t ); } lerp(float lo,F32 hi,F32 t)795 F32 lerp(float lo, F32 hi, F32 t) { return lerp(splat(lo), hi , t ); } lerp(float lo,F32 hi,float t)796 F32 lerp(float lo, F32 hi, float t) { return lerp(splat(lo), hi , splat(t)); } lerp(float lo,float hi,F32 t)797 F32 lerp(float lo, float hi, F32 t) { return lerp(splat(lo), splat(hi), t ); } 798 clamp(F32 x,F32 lo,F32 hi)799 F32 clamp(F32 x, F32 lo, F32 hi) { return max(lo, min(x, hi)); } clamp(F32 x,F32 lo,float hi)800 F32 clamp(F32 x, F32 lo, float hi) { return clamp( x , lo , splat(hi)); } clamp(F32 x,float lo,float hi)801 F32 clamp(F32 x, float lo, float hi) { return clamp( x , splat(lo), splat(hi)); } clamp(F32 x,float lo,F32 hi)802 F32 clamp(F32 x, float lo, F32 hi) { return clamp( x , splat(lo), hi ); } clamp(float x,F32 lo,F32 hi)803 F32 clamp(float x, F32 lo, F32 hi) { return clamp(splat(x), lo , hi ); } clamp(float x,F32 lo,float hi)804 F32 clamp(float x, F32 lo, float hi) { return clamp(splat(x), lo , splat(hi)); } clamp(float x,float lo,F32 hi)805 F32 clamp(float x, float lo, F32 hi) { return clamp(splat(x), splat(lo), hi ); } 806 clamp01(F32 x)807 F32 clamp01(F32 x) { return clamp(x, 0.0f, 1.0f); } 808 abs(F32 x)809 F32 abs(F32 x) { return pun_to_F32(bit_and(pun_to_I32(x), 0x7fff'ffff)); } 810 F32 fract(F32 x) { return sub(x, floor(x)); } 811 F32 ceil(F32); 812 F32 floor(F32); 813 I32 is_NaN (F32 x) { return neq(x,x); } 814 I32 is_finite(F32 x) { return lt(bit_and(pun_to_I32(x), 0x7f80'0000), 0x7f80'0000); } 815 816 I32 trunc(F32 x); 817 I32 round(F32 x); // Round to int using current rounding mode (as if lrintf()). 818 I32 pun_to_I32(F32 x) { return {x.builder, x.id}; } 819 820 I32 to_fp16(F32 x); 821 F32 from_fp16(I32 x); 822 823 I32 eq(F32, F32); 824 I32 eq(F32 x, float y) { return eq(x, splat(y)); } 825 I32 eq(float x, F32 y) { return eq(splat(x), y); } 826 827 I32 neq(F32, F32); 828 I32 neq(F32 x, float y) { return neq(x, splat(y)); } 829 I32 neq(float x, F32 y) { return neq(splat(x), y); } 830 831 I32 lt(F32, F32); 832 I32 lt(F32 x, float y) { return lt(x, splat(y)); } 833 I32 lt(float x, F32 y) { return lt(splat(x), y); } 834 835 I32 lte(F32, F32); 836 I32 lte(F32 x, float y) { return lte(x, splat(y)); } 837 I32 lte(float x, F32 y) { return lte(splat(x), y); } 838 839 I32 gt(F32, F32); 840 I32 gt(F32 x, float y) { return gt(x, splat(y)); } 841 I32 gt(float x, F32 y) { return gt(splat(x), y); } 842 843 I32 gte(F32, F32); 844 I32 gte(F32 x, float y) { return gte(x, splat(y)); } 845 I32 gte(float x, F32 y) { return gte(splat(x), y); } 846 847 // int math, comparisons, etc. 848 I32 add(I32, I32); 849 I32 add(I32 x, int y) { return add(x, splat(y)); } 850 I32 add(int x, I32 y) { return add(splat(x), y); } 851 852 I32 sub(I32, I32); 853 I32 sub(I32 x, int y) { return sub(x, splat(y)); } 854 I32 sub(int x, I32 y) { return sub(splat(x), y); } 855 856 I32 mul(I32, I32); 857 I32 mul(I32 x, int y) { return mul(x, splat(y)); } 858 I32 mul(int x, I32 y) { return mul(splat(x), y); } 859 860 I32 shl(I32 x, int bits); 861 I32 shr(I32 x, int bits); 862 I32 sra(I32 x, int bits); 863 864 I32 eq(I32, I32); 865 I32 eq(I32 x, int y) { return eq(x, splat(y)); } 866 I32 eq(int x, I32 y) { return eq(splat(x), y); } 867 868 I32 neq(I32, I32); 869 I32 neq(I32 x, int y) { return neq(x, splat(y)); } 870 I32 neq(int x, I32 y) { return neq(splat(x), y); } 871 872 I32 lt(I32, I32); 873 I32 lt(I32 x, int y) { return lt(x, splat(y)); } 874 I32 lt(int x, I32 y) { return lt(splat(x), y); } 875 876 I32 lte(I32, I32); 877 I32 lte(I32 x, int y) { return lte(x, splat(y)); } 878 I32 lte(int x, I32 y) { return lte(splat(x), y); } 879 880 I32 gt(I32, I32); 881 I32 gt(I32 x, int y) { return gt(x, splat(y)); } 882 I32 gt(int x, I32 y) { return gt(splat(x), y); } 883 884 I32 gte(I32, I32); 885 I32 gte(I32 x, int y) { return gte(x, splat(y)); } 886 I32 gte(int x, I32 y) { return gte(splat(x), y); } 887 888 F32 to_F32(I32 x); 889 F32 pun_to_F32(I32 x) { return {x.builder, x.id}; } 890 891 // Bitwise operations. 892 I32 bit_and(I32, I32); 893 I32 bit_and(I32 x, int y) { return bit_and(x, splat(y)); } 894 I32 bit_and(int x, I32 y) { return bit_and(splat(x), y); } 895 896 I32 bit_or(I32, I32); 897 I32 bit_or(I32 x, int y) { return bit_or(x, splat(y)); } 898 I32 bit_or(int x, I32 y) { return bit_or(splat(x), y); } 899 900 I32 bit_xor(I32, I32); 901 I32 bit_xor(I32 x, int y) { return bit_xor(x, splat(y)); } 902 I32 bit_xor(int x, I32 y) { return bit_xor(splat(x), y); } 903 904 I32 bit_clear(I32, I32); 905 I32 bit_clear(I32 x, int y) { return bit_clear(x, splat(y)); } 906 I32 bit_clear(int x, I32 y) { return bit_clear(splat(x), y); } 907 908 I32 min(I32 x, I32 y) { return select(lte(x,y), x, y); } 909 I32 min(I32 x, int y) { return min(x, splat(y)); } 910 I32 min(int x, I32 y) { return min(splat(x), y); } 911 912 I32 max(I32 x, I32 y) { return select(gte(x,y), x, y); } 913 I32 max(I32 x, int y) { return max(x, splat(y)); } 914 I32 max(int x, I32 y) { return max(splat(x), y); } 915 916 I32 select(I32 cond, I32 t, I32 f); // cond ? t : f 917 I32 select(I32 cond, int t, I32 f) { return select(cond, splat(t), f ); } 918 I32 select(I32 cond, I32 t, int f) { return select(cond, t , splat(f)); } 919 I32 select(I32 cond, int t, int f) { return select(cond, splat(t), splat(f)); } 920 921 F32 select(I32 cond, F32 t, F32 f) { 922 return pun_to_F32(select(cond, pun_to_I32(t) 923 , pun_to_I32(f))); 924 } 925 F32 select(I32 cond, float t, F32 f) { return select(cond, splat(t), f ); } 926 F32 select(I32 cond, F32 t, float f) { return select(cond, t , splat(f)); } 927 F32 select(I32 cond, float t, float f) { return select(cond, splat(t), splat(f)); } 928 929 I32 extract(I32 x, int bits, I32 z); // (x>>bits) & z 930 I32 extract(I32 x, int bits, int z) { return extract(x, bits, splat(z)); } 931 I32 extract(int x, int bits, I32 z) { return extract(splat(x), bits, z); } 932 933 I32 pack(I32 x, I32 y, int bits); // x | (y<<bits) 934 I32 pack(I32 x, int y, int bits) { return pack(x, splat(y), bits); } 935 I32 pack(int x, I32 y, int bits) { return pack(splat(x), y, bits); } 936 937 938 // Common idioms used in several places, worth centralizing for consistency. 939 F32 from_unorm(int bits, I32); // E.g. from_unorm(8, x) -> x * (1/255.0f) 940 I32 to_unorm(int bits, F32); // E.g. to_unorm(8, x) -> round(x * 255) 941 942 Color load(PixelFormat, Ptr ptr); 943 void store(PixelFormat, Ptr ptr, Color); 944 Color gather(PixelFormat, UPtr ptr, int offset, I32 index); 945 Color gather(PixelFormat f, Uniform u, I32 index) { 946 return gather(f, u.ptr, u.offset, index); 947 } 948 949 void premul(F32* r, F32* g, F32* b, F32 a); 950 void unpremul(F32* r, F32* g, F32* b, F32 a); 951 952 Color premul(Color c) { this->premul(&c.r, &c.g, &c.b, c.a); return c; } 953 Color unpremul(Color c) { this->unpremul(&c.r, &c.g, &c.b, c.a); return c; } 954 955 Color lerp(Color lo, Color hi, F32 t); 956 Color blend(SkBlendMode, Color src, Color dst); 957 958 Color clamp01(Color c) { 959 return { clamp01(c.r), clamp01(c.g), clamp01(c.b), clamp01(c.a) }; 960 } 961 962 HSLA to_hsla(Color); 963 Color to_rgba(HSLA); 964 965 void dump(SkWStream* = nullptr) const; 966 967 uint64_t hash() const; 968 969 Val push(Instruction); 970 971 bool allImm() const { return true; } 972 973 template <typename T, typename... Rest> 974 bool allImm(Val id, T* imm, Rest... rest) const { 975 if (fProgram[id].op == Op::splat) { 976 static_assert(sizeof(T) == 4); 977 memcpy(imm, &fProgram[id].immA, 4); 978 return this->allImm(rest...); 979 } 980 return false; 981 } 982 983 bool allUniform() const { return true; } 984 985 template <typename... Rest> 986 bool allUniform(Val id, Uniform* uni, Rest... rest) const { 987 if (fProgram[id].op == Op::uniform32) { 988 uni->ptr.ix = fProgram[id].immA; 989 uni->offset = fProgram[id].immB; 990 return this->allUniform(rest...); 991 } 992 return false; 993 } 994 995 private: 996 // Declare an argument with given stride (use stride=0 for uniforms). 997 Ptr arg(int stride); 998 999 Val push( 1000 Op op, Val x=NA, Val y=NA, Val z=NA, Val w=NA, int immA=0, int immB=0, int immC=0) { 1001 return this->push(Instruction{op, x,y,z,w, immA,immB,immC}); 1002 } 1003 1004 template <typename T> 1005 bool isImm(Val id, T want) const { 1006 T imm = 0; 1007 return this->allImm(id, &imm) && imm == want; 1008 } 1009 1010 SkTHashMap<Instruction, Val, InstructionHash> fIndex; 1011 std::vector<Instruction> fProgram; 1012 std::vector<TraceHook*> fTraceHooks; 1013 std::vector<int> fStrides; 1014 const Features fFeatures; 1015 bool fCreateDuplicates; 1016 }; 1017 1018 // Optimization passes and data structures normally used by Builder::optimize(), 1019 // extracted here so they can be unit tested. 1020 std::vector<Instruction> eliminate_dead_code(std::vector<Instruction>, 1021 viz::Visualizer* visualizer = nullptr); 1022 std::vector<OptimizedInstruction> finalize(std::vector<Instruction>, 1023 viz::Visualizer* visualizer = nullptr); 1024 1025 using Reg = int; 1026 1027 // d = op(x,y,z,w, immA,immB) 1028 struct InterpreterInstruction { 1029 Op op; 1030 Reg d,x,y,z,w; 1031 int immA,immB,immC; 1032 }; 1033 1034 class Program { 1035 public: 1036 Program(const std::vector<OptimizedInstruction>& instructions, 1037 std::unique_ptr<viz::Visualizer> visualizer, 1038 const std::vector<int>& strides, 1039 const std::vector<TraceHook*>& traceHooks, 1040 const char* debug_name, bool allow_jit); 1041 1042 Program(); 1043 ~Program(); 1044 1045 Program(Program&&); 1046 Program& operator=(Program&&); 1047 1048 Program(const Program&) = delete; 1049 Program& operator=(const Program&) = delete; 1050 1051 void eval(int n, void* args[]) const; 1052 1053 template <typename... T> 1054 void eval(int n, T*... arg) const { 1055 SkASSERT(sizeof...(arg) == this->nargs()); 1056 // This nullptr isn't important except that it makes args[] non-empty if you pass none. 1057 void* args[] = { (void*)arg..., nullptr }; 1058 this->eval(n, args); 1059 } 1060 1061 std::vector<InterpreterInstruction> instructions() const; 1062 int nargs() const; 1063 int nregs() const; 1064 int loop () const; 1065 bool empty() const; 1066 1067 bool hasJIT() const; // Has this Program been JITted? 1068 bool hasTraceHooks() const; // Is this program instrumented for debugging? 1069 1070 void visualize(SkWStream* output, const char* code) const; 1071 void dump(SkWStream* = nullptr) const; 1072 void disassemble(SkWStream* = nullptr) const; 1073 viz::Visualizer* visualizer(); 1074 1075 private: 1076 void setupInterpreter(const std::vector<OptimizedInstruction>&); 1077 void setupJIT (const std::vector<OptimizedInstruction>&, const char* debug_name); 1078 void setupLLVM (const std::vector<OptimizedInstruction>&, const char* debug_name); 1079 1080 bool jit(const std::vector<OptimizedInstruction>&, 1081 int* stack_hint, uint32_t* registers_used, 1082 Assembler*) const; 1083 1084 void waitForLLVM() const; 1085 void dropJIT(); 1086 1087 struct Impl; 1088 std::unique_ptr<Impl> fImpl; 1089 }; 1090 1091 // TODO: control flow 1092 // TODO: 64-bit values? 1093 1094 #define SI static inline 1095 1096 SI I32 operator+(I32 x, I32 y) { return x->add(x,y); } 1097 SI I32 operator+(I32 x, int y) { return x->add(x,y); } 1098 SI I32 operator+(int x, I32 y) { return y->add(x,y); } 1099 1100 SI I32 operator-(I32 x, I32 y) { return x->sub(x,y); } 1101 SI I32 operator-(I32 x, int y) { return x->sub(x,y); } 1102 SI I32 operator-(int x, I32 y) { return y->sub(x,y); } 1103 1104 SI I32 operator*(I32 x, I32 y) { return x->mul(x,y); } 1105 SI I32 operator*(I32 x, int y) { return x->mul(x,y); } 1106 SI I32 operator*(int x, I32 y) { return y->mul(x,y); } 1107 min(I32 x,I32 y)1108 SI I32 min(I32 x, I32 y) { return x->min(x,y); } min(I32 x,int y)1109 SI I32 min(I32 x, int y) { return x->min(x,y); } min(int x,I32 y)1110 SI I32 min(int x, I32 y) { return y->min(x,y); } 1111 max(I32 x,I32 y)1112 SI I32 max(I32 x, I32 y) { return x->max(x,y); } max(I32 x,int y)1113 SI I32 max(I32 x, int y) { return x->max(x,y); } max(int x,I32 y)1114 SI I32 max(int x, I32 y) { return y->max(x,y); } 1115 1116 SI I32 operator==(I32 x, I32 y) { return x->eq(x,y); } 1117 SI I32 operator==(I32 x, int y) { return x->eq(x,y); } 1118 SI I32 operator==(int x, I32 y) { return y->eq(x,y); } 1119 1120 SI I32 operator!=(I32 x, I32 y) { return x->neq(x,y); } 1121 SI I32 operator!=(I32 x, int y) { return x->neq(x,y); } 1122 SI I32 operator!=(int x, I32 y) { return y->neq(x,y); } 1123 1124 SI I32 operator< (I32 x, I32 y) { return x->lt(x,y); } 1125 SI I32 operator< (I32 x, int y) { return x->lt(x,y); } 1126 SI I32 operator< (int x, I32 y) { return y->lt(x,y); } 1127 1128 SI I32 operator<=(I32 x, I32 y) { return x->lte(x,y); } 1129 SI I32 operator<=(I32 x, int y) { return x->lte(x,y); } 1130 SI I32 operator<=(int x, I32 y) { return y->lte(x,y); } 1131 1132 SI I32 operator> (I32 x, I32 y) { return x->gt(x,y); } 1133 SI I32 operator> (I32 x, int y) { return x->gt(x,y); } 1134 SI I32 operator> (int x, I32 y) { return y->gt(x,y); } 1135 1136 SI I32 operator>=(I32 x, I32 y) { return x->gte(x,y); } 1137 SI I32 operator>=(I32 x, int y) { return x->gte(x,y); } 1138 SI I32 operator>=(int x, I32 y) { return y->gte(x,y); } 1139 1140 1141 SI F32 operator+(F32 x, F32 y) { return x->add(x,y); } 1142 SI F32 operator+(F32 x, float y) { return x->add(x,y); } 1143 SI F32 operator+(float x, F32 y) { return y->add(x,y); } 1144 1145 SI F32 operator-(F32 x, F32 y) { return x->sub(x,y); } 1146 SI F32 operator-(F32 x, float y) { return x->sub(x,y); } 1147 SI F32 operator-(float x, F32 y) { return y->sub(x,y); } 1148 1149 SI F32 operator*(F32 x, F32 y) { return x->mul(x,y); } 1150 SI F32 operator*(F32 x, float y) { return x->mul(x,y); } 1151 SI F32 operator*(float x, F32 y) { return y->mul(x,y); } 1152 fast_mul(F32 x,F32 y)1153 SI F32 fast_mul(F32 x, F32 y) { return x->fast_mul(x,y); } fast_mul(F32 x,float y)1154 SI F32 fast_mul(F32 x, float y) { return x->fast_mul(x,y); } fast_mul(float x,F32 y)1155 SI F32 fast_mul(float x, F32 y) { return y->fast_mul(x,y); } 1156 1157 SI F32 operator/(F32 x, F32 y) { return x->div(x,y); } 1158 SI F32 operator/(float x, F32 y) { return y->div(x,y); } 1159 min(F32 x,F32 y)1160 SI F32 min(F32 x, F32 y) { return x->min(x,y); } min(F32 x,float y)1161 SI F32 min(F32 x, float y) { return x->min(x,y); } min(float x,F32 y)1162 SI F32 min(float x, F32 y) { return y->min(x,y); } 1163 max(F32 x,F32 y)1164 SI F32 max(F32 x, F32 y) { return x->max(x,y); } max(F32 x,float y)1165 SI F32 max(F32 x, float y) { return x->max(x,y); } max(float x,F32 y)1166 SI F32 max(float x, F32 y) { return y->max(x,y); } 1167 1168 SI I32 operator==(F32 x, F32 y) { return x->eq(x,y); } 1169 SI I32 operator==(F32 x, float y) { return x->eq(x,y); } 1170 SI I32 operator==(float x, F32 y) { return y->eq(x,y); } 1171 1172 SI I32 operator!=(F32 x, F32 y) { return x->neq(x,y); } 1173 SI I32 operator!=(F32 x, float y) { return x->neq(x,y); } 1174 SI I32 operator!=(float x, F32 y) { return y->neq(x,y); } 1175 1176 SI I32 operator< (F32 x, F32 y) { return x->lt(x,y); } 1177 SI I32 operator< (F32 x, float y) { return x->lt(x,y); } 1178 SI I32 operator< (float x, F32 y) { return y->lt(x,y); } 1179 1180 SI I32 operator<=(F32 x, F32 y) { return x->lte(x,y); } 1181 SI I32 operator<=(F32 x, float y) { return x->lte(x,y); } 1182 SI I32 operator<=(float x, F32 y) { return y->lte(x,y); } 1183 1184 SI I32 operator> (F32 x, F32 y) { return x->gt(x,y); } 1185 SI I32 operator> (F32 x, float y) { return x->gt(x,y); } 1186 SI I32 operator> (float x, F32 y) { return y->gt(x,y); } 1187 1188 SI I32 operator>=(F32 x, F32 y) { return x->gte(x,y); } 1189 SI I32 operator>=(F32 x, float y) { return x->gte(x,y); } 1190 SI I32 operator>=(float x, F32 y) { return y->gte(x,y); } 1191 1192 SI I32& operator+=(I32& x, I32 y) { return (x = x + y); } 1193 SI I32& operator+=(I32& x, int y) { return (x = x + y); } 1194 1195 SI I32& operator-=(I32& x, I32 y) { return (x = x - y); } 1196 SI I32& operator-=(I32& x, int y) { return (x = x - y); } 1197 1198 SI I32& operator*=(I32& x, I32 y) { return (x = x * y); } 1199 SI I32& operator*=(I32& x, int y) { return (x = x * y); } 1200 1201 SI F32& operator+=(F32& x, F32 y) { return (x = x + y); } 1202 SI F32& operator+=(F32& x, float y) { return (x = x + y); } 1203 1204 SI F32& operator-=(F32& x, F32 y) { return (x = x - y); } 1205 SI F32& operator-=(F32& x, float y) { return (x = x - y); } 1206 1207 SI F32& operator*=(F32& x, F32 y) { return (x = x * y); } 1208 SI F32& operator*=(F32& x, float y) { return (x = x * y); } 1209 1210 SI F32& operator/=(F32& x, F32 y) { return (x = x / y); } 1211 assert_true(I32 cond,I32 debug)1212 SI void assert_true(I32 cond, I32 debug) { cond->assert_true(cond,debug); } assert_true(I32 cond,F32 debug)1213 SI void assert_true(I32 cond, F32 debug) { cond->assert_true(cond,debug); } assert_true(I32 cond)1214 SI void assert_true(I32 cond) { cond->assert_true(cond); } 1215 store8(Ptr ptr,I32 val)1216 SI void store8 (Ptr ptr, I32 val) { val->store8 (ptr, val); } store16(Ptr ptr,I32 val)1217 SI void store16 (Ptr ptr, I32 val) { val->store16 (ptr, val); } store32(Ptr ptr,I32 val)1218 SI void store32 (Ptr ptr, I32 val) { val->store32 (ptr, val); } storeF(Ptr ptr,F32 val)1219 SI void storeF (Ptr ptr, F32 val) { val->storeF (ptr, val); } store64(Ptr ptr,I32 lo,I32 hi)1220 SI void store64 (Ptr ptr, I32 lo, I32 hi) { lo ->store64 (ptr, lo,hi); } store128(Ptr ptr,I32 x,I32 y,I32 z,I32 w)1221 SI void store128(Ptr ptr, I32 x, I32 y, I32 z, I32 w) { x ->store128(ptr, x,y,z,w); } 1222 gather8(UPtr ptr,int off,I32 ix)1223 SI I32 gather8 (UPtr ptr, int off, I32 ix) { return ix->gather8 (ptr, off, ix); } gather16(UPtr ptr,int off,I32 ix)1224 SI I32 gather16(UPtr ptr, int off, I32 ix) { return ix->gather16(ptr, off, ix); } gather32(UPtr ptr,int off,I32 ix)1225 SI I32 gather32(UPtr ptr, int off, I32 ix) { return ix->gather32(ptr, off, ix); } gatherF(UPtr ptr,int off,I32 ix)1226 SI F32 gatherF (UPtr ptr, int off, I32 ix) { return ix->gatherF (ptr, off, ix); } 1227 gather8(Uniform u,I32 ix)1228 SI I32 gather8 (Uniform u, I32 ix) { return ix->gather8 (u, ix); } gather16(Uniform u,I32 ix)1229 SI I32 gather16(Uniform u, I32 ix) { return ix->gather16(u, ix); } gather32(Uniform u,I32 ix)1230 SI I32 gather32(Uniform u, I32 ix) { return ix->gather32(u, ix); } gatherF(Uniform u,I32 ix)1231 SI F32 gatherF (Uniform u, I32 ix) { return ix->gatherF (u, ix); } 1232 sqrt(F32 x)1233 SI F32 sqrt(F32 x) { return x-> sqrt(x); } approx_log2(F32 x)1234 SI F32 approx_log2(F32 x) { return x->approx_log2(x); } approx_pow2(F32 x)1235 SI F32 approx_pow2(F32 x) { return x->approx_pow2(x); } approx_log(F32 x)1236 SI F32 approx_log (F32 x) { return x->approx_log (x); } approx_exp(F32 x)1237 SI F32 approx_exp (F32 x) { return x->approx_exp (x); } 1238 approx_powf(F32 base,F32 exp)1239 SI F32 approx_powf(F32 base, F32 exp) { return base->approx_powf(base, exp); } approx_powf(F32 base,float exp)1240 SI F32 approx_powf(F32 base, float exp) { return base->approx_powf(base, exp); } approx_powf(float base,F32 exp)1241 SI F32 approx_powf(float base, F32 exp) { return exp->approx_powf(base, exp); } 1242 approx_sin(F32 radians)1243 SI F32 approx_sin(F32 radians) { return radians->approx_sin(radians); } approx_cos(F32 radians)1244 SI F32 approx_cos(F32 radians) { return radians->approx_cos(radians); } approx_tan(F32 radians)1245 SI F32 approx_tan(F32 radians) { return radians->approx_tan(radians); } 1246 approx_asin(F32 x)1247 SI F32 approx_asin(F32 x) { return x->approx_asin(x); } approx_acos(F32 x)1248 SI F32 approx_acos(F32 x) { return x->approx_acos(x); } approx_atan(F32 x)1249 SI F32 approx_atan(F32 x) { return x->approx_atan(x); } approx_atan2(F32 y,F32 x)1250 SI F32 approx_atan2(F32 y, F32 x) { return x->approx_atan2(y, x); } 1251 clamp01(F32 x)1252 SI F32 clamp01(F32 x) { return x-> clamp01(x); } abs(F32 x)1253 SI F32 abs(F32 x) { return x-> abs(x); } ceil(F32 x)1254 SI F32 ceil(F32 x) { return x-> ceil(x); } fract(F32 x)1255 SI F32 fract(F32 x) { return x-> fract(x); } floor(F32 x)1256 SI F32 floor(F32 x) { return x-> floor(x); } is_NaN(F32 x)1257 SI I32 is_NaN(F32 x) { return x-> is_NaN(x); } is_finite(F32 x)1258 SI I32 is_finite(F32 x) { return x->is_finite(x); } 1259 trunc(F32 x)1260 SI I32 trunc(F32 x) { return x-> trunc(x); } round(F32 x)1261 SI I32 round(F32 x) { return x-> round(x); } pun_to_I32(F32 x)1262 SI I32 pun_to_I32(F32 x) { return x-> pun_to_I32(x); } pun_to_F32(I32 x)1263 SI F32 pun_to_F32(I32 x) { return x-> pun_to_F32(x); } to_F32(I32 x)1264 SI F32 to_F32(I32 x) { return x-> to_F32(x); } to_fp16(F32 x)1265 SI I32 to_fp16(F32 x) { return x-> to_fp16(x); } from_fp16(I32 x)1266 SI F32 from_fp16(I32 x) { return x-> from_fp16(x); } 1267 lerp(F32 lo,F32 hi,F32 t)1268 SI F32 lerp(F32 lo, F32 hi, F32 t) { return lo->lerp(lo,hi,t); } lerp(F32 lo,F32 hi,float t)1269 SI F32 lerp(F32 lo, F32 hi, float t) { return lo->lerp(lo,hi,t); } lerp(F32 lo,float hi,F32 t)1270 SI F32 lerp(F32 lo, float hi, F32 t) { return lo->lerp(lo,hi,t); } lerp(F32 lo,float hi,float t)1271 SI F32 lerp(F32 lo, float hi, float t) { return lo->lerp(lo,hi,t); } lerp(float lo,F32 hi,F32 t)1272 SI F32 lerp(float lo, F32 hi, F32 t) { return hi->lerp(lo,hi,t); } lerp(float lo,F32 hi,float t)1273 SI F32 lerp(float lo, F32 hi, float t) { return hi->lerp(lo,hi,t); } lerp(float lo,float hi,F32 t)1274 SI F32 lerp(float lo, float hi, F32 t) { return t->lerp(lo,hi,t); } 1275 clamp(F32 x,F32 lo,F32 hi)1276 SI F32 clamp(F32 x, F32 lo, F32 hi) { return x->clamp(x,lo,hi); } clamp(F32 x,F32 lo,float hi)1277 SI F32 clamp(F32 x, F32 lo, float hi) { return x->clamp(x,lo,hi); } clamp(F32 x,float lo,F32 hi)1278 SI F32 clamp(F32 x, float lo, F32 hi) { return x->clamp(x,lo,hi); } clamp(F32 x,float lo,float hi)1279 SI F32 clamp(F32 x, float lo, float hi) { return x->clamp(x,lo,hi); } clamp(float x,F32 lo,F32 hi)1280 SI F32 clamp(float x, F32 lo, F32 hi) { return lo->clamp(x,lo,hi); } clamp(float x,F32 lo,float hi)1281 SI F32 clamp(float x, F32 lo, float hi) { return lo->clamp(x,lo,hi); } clamp(float x,float lo,F32 hi)1282 SI F32 clamp(float x, float lo, F32 hi) { return hi->clamp(x,lo,hi); } 1283 1284 SI I32 operator<<(I32 x, int bits) { return x->shl(x, bits); } shl(I32 x,int bits)1285 SI I32 shl(I32 x, int bits) { return x->shl(x, bits); } shr(I32 x,int bits)1286 SI I32 shr(I32 x, int bits) { return x->shr(x, bits); } sra(I32 x,int bits)1287 SI I32 sra(I32 x, int bits) { return x->sra(x, bits); } 1288 1289 SI I32 operator&(I32 x, I32 y) { return x->bit_and(x,y); } 1290 SI I32 operator&(I32 x, int y) { return x->bit_and(x,y); } 1291 SI I32 operator&(int x, I32 y) { return y->bit_and(x,y); } 1292 1293 SI I32 operator|(I32 x, I32 y) { return x->bit_or (x,y); } 1294 SI I32 operator|(I32 x, int y) { return x->bit_or (x,y); } 1295 SI I32 operator|(int x, I32 y) { return y->bit_or (x,y); } 1296 1297 SI I32 operator^(I32 x, I32 y) { return x->bit_xor(x,y); } 1298 SI I32 operator^(I32 x, int y) { return x->bit_xor(x,y); } 1299 SI I32 operator^(int x, I32 y) { return y->bit_xor(x,y); } 1300 1301 SI I32& operator&=(I32& x, I32 y) { return (x = x & y); } 1302 SI I32& operator&=(I32& x, int y) { return (x = x & y); } 1303 SI I32& operator|=(I32& x, I32 y) { return (x = x | y); } 1304 SI I32& operator|=(I32& x, int y) { return (x = x | y); } 1305 SI I32& operator^=(I32& x, I32 y) { return (x = x ^ y); } 1306 SI I32& operator^=(I32& x, int y) { return (x = x ^ y); } 1307 bit_clear(I32 x,I32 y)1308 SI I32 bit_clear(I32 x, I32 y) { return x->bit_clear(x,y); } bit_clear(I32 x,int y)1309 SI I32 bit_clear(I32 x, int y) { return x->bit_clear(x,y); } bit_clear(int x,I32 y)1310 SI I32 bit_clear(int x, I32 y) { return y->bit_clear(x,y); } 1311 select(I32 c,I32 t,I32 f)1312 SI I32 select(I32 c, I32 t, I32 f) { return c->select(c, t , f ); } select(I32 c,I32 t,int f)1313 SI I32 select(I32 c, I32 t, int f) { return c->select(c, t , c->splat(f)); } select(I32 c,int t,I32 f)1314 SI I32 select(I32 c, int t, I32 f) { return c->select(c, c->splat(t), f ); } select(I32 c,int t,int f)1315 SI I32 select(I32 c, int t, int f) { return c->select(c, c->splat(t), c->splat(f)); } 1316 select(I32 c,F32 t,F32 f)1317 SI F32 select(I32 c, F32 t, F32 f) { return c->select(c, t , f ); } select(I32 c,F32 t,float f)1318 SI F32 select(I32 c, F32 t, float f) { return c->select(c, t , c->splat(f)); } select(I32 c,float t,F32 f)1319 SI F32 select(I32 c, float t, F32 f) { return c->select(c, c->splat(t), f ); } select(I32 c,float t,float f)1320 SI F32 select(I32 c, float t, float f) { return c->select(c, c->splat(t), c->splat(f)); } 1321 extract(I32 x,int bits,I32 z)1322 SI I32 extract(I32 x, int bits, I32 z) { return x->extract(x,bits,z); } extract(I32 x,int bits,int z)1323 SI I32 extract(I32 x, int bits, int z) { return x->extract(x,bits,z); } extract(int x,int bits,I32 z)1324 SI I32 extract(int x, int bits, I32 z) { return z->extract(x,bits,z); } 1325 pack(I32 x,I32 y,int bits)1326 SI I32 pack(I32 x, I32 y, int bits) { return x->pack (x,y,bits); } pack(I32 x,int y,int bits)1327 SI I32 pack(I32 x, int y, int bits) { return x->pack (x,y,bits); } pack(int x,I32 y,int bits)1328 SI I32 pack(int x, I32 y, int bits) { return y->pack (x,y,bits); } 1329 1330 SI I32 operator~(I32 x) { return ~0 ^ x; } 1331 SI I32 operator-(I32 x) { return 0 - x; } 1332 SI F32 operator-(F32 x) { return 0.0f - x; } 1333 from_unorm(int bits,I32 x)1334 SI F32 from_unorm(int bits, I32 x) { return x->from_unorm(bits,x); } to_unorm(int bits,F32 x)1335 SI I32 to_unorm(int bits, F32 x) { return x-> to_unorm(bits,x); } 1336 store(PixelFormat f,Ptr p,Color c)1337 SI void store(PixelFormat f, Ptr p, Color c) { return c->store(f,p,c); } 1338 gather(PixelFormat f,UPtr p,int off,I32 ix)1339 SI Color gather(PixelFormat f, UPtr p, int off, I32 ix) { return ix->gather(f,p,off,ix); } gather(PixelFormat f,Uniform u,I32 ix)1340 SI Color gather(PixelFormat f, Uniform u , I32 ix) { return ix->gather(f,u,ix); } 1341 premul(F32 * r,F32 * g,F32 * b,F32 a)1342 SI void premul(F32* r, F32* g, F32* b, F32 a) { a-> premul(r,g,b,a); } unpremul(F32 * r,F32 * g,F32 * b,F32 a)1343 SI void unpremul(F32* r, F32* g, F32* b, F32 a) { a->unpremul(r,g,b,a); } 1344 premul(Color c)1345 SI Color premul(Color c) { return c-> premul(c); } unpremul(Color c)1346 SI Color unpremul(Color c) { return c->unpremul(c); } 1347 lerp(Color lo,Color hi,F32 t)1348 SI Color lerp(Color lo, Color hi, F32 t) { return t->lerp(lo,hi,t); } 1349 blend(SkBlendMode m,Color s,Color d)1350 SI Color blend(SkBlendMode m, Color s, Color d) { return s->blend(m,s,d); } 1351 clamp01(Color c)1352 SI Color clamp01(Color c) { return c->clamp01(c); } 1353 to_hsla(Color c)1354 SI HSLA to_hsla(Color c) { return c->to_hsla(c); } to_rgba(HSLA c)1355 SI Color to_rgba(HSLA c) { return c->to_rgba(c); } 1356 1357 // Evaluate polynomials: ax^n + bx^(n-1) + ... for n >= 1 1358 template <typename F32_or_float, typename... Rest> poly(F32 x,F32_or_float a,float b,Rest...rest)1359 SI F32 poly(F32 x, F32_or_float a, float b, Rest... rest) { 1360 if constexpr (sizeof...(rest) == 0) { 1361 return x*a+b; 1362 } else { 1363 return poly(x, x*a+b, rest...); 1364 } 1365 } 1366 #undef SI 1367 } // namespace skvm 1368 1369 #endif//SkVM_DEFINED 1370