1 /* 2 * Copyright 2019 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef SkVM_DEFINED 9 #define SkVM_DEFINED 10 11 #include "include/core/SkBlendMode.h" 12 #include "include/core/SkColor.h" 13 #include "include/core/SkColorType.h" 14 #include "include/core/SkSpan.h" 15 #include "include/private/base/SkMacros.h" 16 #include "include/private/base/SkTArray.h" 17 #include "src/core/SkTHash.h" 18 #include "src/core/SkVM_fwd.h" 19 #include <vector> // std::vector 20 21 class SkWStream; 22 23 #if defined(SKVM_JIT_WHEN_POSSIBLE) && !defined(SK_BUILD_FOR_IOS) 24 #if defined(__x86_64__) || defined(_M_X64) 25 #if defined(_WIN32) || defined(__linux) || defined(__APPLE__) 26 #define SKVM_JIT 27 #endif 28 #endif 29 #if defined(__aarch64__) 30 #if defined(__ANDROID__) || defined(__APPLE__) 31 #define SKVM_JIT 32 #endif 33 #endif 34 #endif 35 36 #if 0 37 #undef SKVM_JIT 38 #endif 39 40 namespace skvm { 41 42 namespace viz { 43 class Visualizer; 44 } 45 46 class Assembler { 47 public: 48 explicit Assembler(void* buf); 49 50 size_t size() const; 51 52 // Order matters... GP64, Xmm, Ymm values match 4-bit register encoding for each. 53 enum GP64 { 54 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, 55 r8 , r9 , r10, r11, r12, r13, r14, r15, 56 }; 57 enum Xmm { 58 xmm0, xmm1, xmm2 , xmm3 , xmm4 , xmm5 , xmm6 , xmm7 , 59 xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, 60 }; 61 enum Ymm { 62 ymm0, ymm1, ymm2 , ymm3 , ymm4 , ymm5 , ymm6 , ymm7 , 63 ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15, 64 }; 65 66 // X and V values match 5-bit encoding for each (nothing tricky). 67 enum X { 68 x0 , x1 , x2 , x3 , x4 , x5 , x6 , x7 , 69 x8 , x9 , x10, x11, x12, x13, x14, x15, 70 x16, x17, x18, x19, x20, x21, x22, x23, 71 x24, x25, x26, x27, x28, x29, x30, xzr, sp=xzr, 72 }; 73 enum V { 74 v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 , 75 v8 , v9 , v10, v11, v12, v13, v14, v15, 76 v16, v17, v18, v19, v20, v21, v22, v23, 77 v24, v25, v26, v27, v28, v29, v30, v31, 78 }; 79 80 void bytes(const void*, int); 81 void byte(uint8_t); 82 void word(uint32_t); 83 84 struct Label { 85 int offset = 0; 86 enum { NotYetSet, ARMDisp19, X86Disp32 } kind = NotYetSet; 87 SkSTArray<2, int> references; 88 }; 89 90 // x86-64 91 92 void align(int mod); 93 94 void int3(); 95 void vzeroupper(); 96 void ret(); 97 98 // Mem represents a value at base + disp + scale*index, 99 // or simply at base + disp if index=rsp. 100 enum Scale { ONE, TWO, FOUR, EIGHT }; 101 struct Mem { 102 GP64 base; 103 int disp = 0; 104 GP64 index = rsp; 105 Scale scale = ONE; 106 }; 107 108 struct Operand { 109 union { 110 int reg; 111 Mem mem; 112 Label* label; 113 }; 114 enum { REG, MEM, LABEL } kind; 115 OperandOperand116 Operand(GP64 r) : reg (r), kind(REG ) {} OperandOperand117 Operand(Xmm r) : reg (r), kind(REG ) {} OperandOperand118 Operand(Ymm r) : reg (r), kind(REG ) {} OperandOperand119 Operand(Mem m) : mem (m), kind(MEM ) {} OperandOperand120 Operand(Label* l) : label(l), kind(LABEL) {} 121 }; 122 123 void vpand (Ymm dst, Ymm x, Operand y); 124 void vpandn(Ymm dst, Ymm x, Operand y); 125 void vpor (Ymm dst, Ymm x, Operand y); 126 void vpxor (Ymm dst, Ymm x, Operand y); 127 128 void vpaddd (Ymm dst, Ymm x, Operand y); 129 void vpsubd (Ymm dst, Ymm x, Operand y); 130 void vpmulld(Ymm dst, Ymm x, Operand y); 131 132 void vpaddw (Ymm dst, Ymm x, Operand y); 133 void vpsubw (Ymm dst, Ymm x, Operand y); 134 void vpmullw (Ymm dst, Ymm x, Operand y); 135 136 void vpabsw (Ymm dst, Operand x); 137 void vpavgw (Ymm dst, Ymm x, Operand y); // dst = (x+y+1)>>1, unsigned. 138 void vpmulhrsw(Ymm dst, Ymm x, Operand y); // dst = (x*y + (1<<14)) >> 15, signed. 139 void vpminsw (Ymm dst, Ymm x, Operand y); 140 void vpminuw (Ymm dst, Ymm x, Operand y); 141 void vpmaxsw (Ymm dst, Ymm x, Operand y); 142 void vpmaxuw (Ymm dst, Ymm x, Operand y); 143 144 void vaddps(Ymm dst, Ymm x, Operand y); 145 void vsubps(Ymm dst, Ymm x, Operand y); 146 void vmulps(Ymm dst, Ymm x, Operand y); 147 void vdivps(Ymm dst, Ymm x, Operand y); 148 void vminps(Ymm dst, Ymm x, Operand y); 149 void vmaxps(Ymm dst, Ymm x, Operand y); 150 151 void vsqrtps(Ymm dst, Operand x); 152 153 void vfmadd132ps(Ymm dst, Ymm x, Operand y); 154 void vfmadd213ps(Ymm dst, Ymm x, Operand y); 155 void vfmadd231ps(Ymm dst, Ymm x, Operand y); 156 157 void vfmsub132ps(Ymm dst, Ymm x, Operand y); 158 void vfmsub213ps(Ymm dst, Ymm x, Operand y); 159 void vfmsub231ps(Ymm dst, Ymm x, Operand y); 160 161 void vfnmadd132ps(Ymm dst, Ymm x, Operand y); 162 void vfnmadd213ps(Ymm dst, Ymm x, Operand y); 163 void vfnmadd231ps(Ymm dst, Ymm x, Operand y); 164 165 void vpackusdw(Ymm dst, Ymm x, Operand y); 166 void vpackuswb(Ymm dst, Ymm x, Operand y); 167 168 void vpunpckldq(Ymm dst, Ymm x, Operand y); 169 void vpunpckhdq(Ymm dst, Ymm x, Operand y); 170 171 void vpcmpeqd(Ymm dst, Ymm x, Operand y); 172 void vpcmpgtd(Ymm dst, Ymm x, Operand y); 173 void vpcmpeqw(Ymm dst, Ymm x, Operand y); 174 void vpcmpgtw(Ymm dst, Ymm x, Operand y); 175 176 void vcmpps (Ymm dst, Ymm x, Operand y, int imm); vcmpeqps(Ymm dst,Ymm x,Operand y)177 void vcmpeqps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,0); } vcmpltps(Ymm dst,Ymm x,Operand y)178 void vcmpltps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,1); } vcmpleps(Ymm dst,Ymm x,Operand y)179 void vcmpleps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,2); } vcmpneqps(Ymm dst,Ymm x,Operand y)180 void vcmpneqps(Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,4); } 181 182 // Sadly, the x parameter cannot be a general Operand for these shifts. 183 void vpslld(Ymm dst, Ymm x, int imm); 184 void vpsrld(Ymm dst, Ymm x, int imm); 185 void vpsrad(Ymm dst, Ymm x, int imm); 186 187 void vpsllw(Ymm dst, Ymm x, int imm); 188 void vpsrlw(Ymm dst, Ymm x, int imm); 189 void vpsraw(Ymm dst, Ymm x, int imm); 190 191 void vpermq (Ymm dst, Operand x, int imm); 192 void vperm2f128(Ymm dst, Ymm x, Operand y, int imm); 193 void vpermps (Ymm dst, Ymm ix, Operand src); // dst[i] = src[ix[i]] 194 195 enum Rounding { NEAREST, FLOOR, CEIL, TRUNC, CURRENT }; 196 void vroundps(Ymm dst, Operand x, Rounding); 197 198 void vmovdqa(Ymm dst, Operand x); 199 void vmovups(Ymm dst, Operand x); 200 void vmovups(Xmm dst, Operand x); 201 void vmovups(Operand dst, Ymm x); 202 void vmovups(Operand dst, Xmm x); 203 204 void vcvtdq2ps (Ymm dst, Operand x); 205 void vcvttps2dq(Ymm dst, Operand x); 206 void vcvtps2dq (Ymm dst, Operand x); 207 208 void vcvtps2ph(Operand dst, Ymm x, Rounding); 209 void vcvtph2ps(Ymm dst, Operand x); 210 211 void vpblendvb(Ymm dst, Ymm x, Operand y, Ymm z); 212 213 void vpshufb(Ymm dst, Ymm x, Operand y); 214 215 void vptest(Ymm x, Operand y); 216 217 void vbroadcastss(Ymm dst, Operand y); 218 219 void vpmovzxwd(Ymm dst, Operand src); // dst = src, 128-bit, uint16_t -> int 220 void vpmovzxbd(Ymm dst, Operand src); // dst = src, 64-bit, uint8_t -> int 221 222 void vmovq(Operand dst, Xmm src); // dst = src, 64-bit 223 void vmovd(Operand dst, Xmm src); // dst = src, 32-bit 224 void vmovd(Xmm dst, Operand src); // dst = src, 32-bit 225 226 void vpinsrd(Xmm dst, Xmm src, Operand y, int imm); // dst = src; dst[imm] = y, 32-bit 227 void vpinsrw(Xmm dst, Xmm src, Operand y, int imm); // dst = src; dst[imm] = y, 16-bit 228 void vpinsrb(Xmm dst, Xmm src, Operand y, int imm); // dst = src; dst[imm] = y, 8-bit 229 230 void vextracti128(Operand dst, Ymm src, int imm); // dst = src[imm], 128-bit 231 void vpextrd (Operand dst, Xmm src, int imm); // dst = src[imm], 32-bit 232 void vpextrw (Operand dst, Xmm src, int imm); // dst = src[imm], 16-bit 233 void vpextrb (Operand dst, Xmm src, int imm); // dst = src[imm], 8-bit 234 235 // if (mask & 0x8000'0000) { 236 // dst = base[scale*ix]; 237 // } 238 // mask = 0; 239 void vgatherdps(Ymm dst, Scale scale, Ymm ix, GP64 base, Ymm mask); 240 241 242 void label(Label*); 243 244 void jmp(Label*); 245 void je (Label*); 246 void jne(Label*); 247 void jl (Label*); 248 void jc (Label*); 249 250 void add (Operand dst, int imm); 251 void sub (Operand dst, int imm); 252 void cmp (Operand dst, int imm); 253 void mov (Operand dst, int imm); 254 void movb(Operand dst, int imm); 255 256 void add (Operand dst, GP64 x); 257 void sub (Operand dst, GP64 x); 258 void cmp (Operand dst, GP64 x); 259 void mov (Operand dst, GP64 x); 260 void movb(Operand dst, GP64 x); 261 262 void add (GP64 dst, Operand x); 263 void sub (GP64 dst, Operand x); 264 void cmp (GP64 dst, Operand x); 265 void mov (GP64 dst, Operand x); 266 void movb(GP64 dst, Operand x); 267 268 // Disambiguators... choice is arbitrary (but generates different code!). add(GP64 dst,GP64 x)269 void add (GP64 dst, GP64 x) { this->add (Operand(dst), x); } sub(GP64 dst,GP64 x)270 void sub (GP64 dst, GP64 x) { this->sub (Operand(dst), x); } cmp(GP64 dst,GP64 x)271 void cmp (GP64 dst, GP64 x) { this->cmp (Operand(dst), x); } mov(GP64 dst,GP64 x)272 void mov (GP64 dst, GP64 x) { this->mov (Operand(dst), x); } movb(GP64 dst,GP64 x)273 void movb(GP64 dst, GP64 x) { this->movb(Operand(dst), x); } 274 275 void movzbq(GP64 dst, Operand x); // dst = x, uint8_t -> int 276 void movzwq(GP64 dst, Operand x); // dst = x, uint16_t -> int 277 278 // aarch64 279 280 // d = op(n,m) 281 using DOpNM = void(V d, V n, V m); 282 DOpNM and16b, orr16b, eor16b, bic16b, bsl16b, 283 add4s, sub4s, mul4s, 284 cmeq4s, cmgt4s, 285 sub8h, mul8h, 286 fadd4s, fsub4s, fmul4s, fdiv4s, fmin4s, fmax4s, 287 fcmeq4s, fcmgt4s, fcmge4s, 288 tbl, 289 uzp14s, uzp24s, 290 zip14s, zip24s; 291 292 // TODO: there are also float ==,<,<=,>,>= instructions with an immediate 0.0f, 293 // and the register comparison > and >= can also compare absolute values. Interesting. 294 295 // d += n*m 296 void fmla4s(V d, V n, V m); 297 298 // d -= n*m 299 void fmls4s(V d, V n, V m); 300 301 // d = op(n,imm) 302 using DOpNImm = void(V d, V n, int imm); 303 DOpNImm sli4s, 304 shl4s, sshr4s, ushr4s, 305 ushr8h; 306 307 // d = op(n) 308 using DOpN = void(V d, V n); 309 DOpN not16b, // d = ~n 310 fneg4s, // d = -n 311 fsqrt4s, // d = sqrtf(n) 312 scvtf4s, // int -> float 313 fcvtzs4s, // truncate float -> int 314 fcvtns4s, // round float -> int (nearest even) 315 frintp4s, // round float -> int as float, toward plus infinity (ceil) 316 frintm4s, // round float -> int as float, toward minus infinity (floor) 317 fcvtn, // f32 -> f16 in low half 318 fcvtl, // f16 in low half -> f32 319 xtns2h, // u32 -> u16 320 xtnh2b, // u16 -> u8 321 uxtlb2h, // u8 -> u16 (TODO: this is a special case of ushll.8h) 322 uxtlh2s, // u16 -> u32 (TODO: this is a special case of ushll.4s) 323 uminv4s; // dst[0] = min(n[0],n[1],n[2],n[3]), n as unsigned 324 325 void brk (int imm16); 326 void ret (X); 327 void add (X d, X n, int imm12); 328 void sub (X d, X n, int imm12); 329 void subs(X d, X n, int imm12); // subtract setting condition flags 330 331 enum Shift { LSL,LSR,ASR,ROR }; 332 void add (X d, X n, X m, Shift=LSL, int imm6=0); // d=n+Shift(m,imm6), for Shift != ROR. 333 334 // There's another encoding for unconditional branches that can jump further, 335 // but this one encoded as b.al is simple to implement and should be fine. b(Label * l)336 void b (Label* l) { this->b(Condition::al, l); } bne(Label * l)337 void bne(Label* l) { this->b(Condition::ne, l); } blt(Label * l)338 void blt(Label* l) { this->b(Condition::lt, l); } 339 340 // "cmp ..." is just an assembler mnemonic for "subs xzr, ..."! cmp(X n,int imm12)341 void cmp(X n, int imm12) { this->subs(xzr, n, imm12); } 342 343 // Compare and branch if zero/non-zero, as if 344 // cmp(t,0) 345 // beq/bne(l) 346 // but without setting condition flags. 347 void cbz (X t, Label* l); 348 void cbnz(X t, Label* l); 349 350 // TODO: there are ldur variants with unscaled imm, useful? 351 void ldrd(X dst, X src, int imm12=0); // 64-bit dst = *(src+imm12*8) 352 void ldrs(X dst, X src, int imm12=0); // 32-bit dst = *(src+imm12*4) 353 void ldrh(X dst, X src, int imm12=0); // 16-bit dst = *(src+imm12*2) 354 void ldrb(X dst, X src, int imm12=0); // 8-bit dst = *(src+imm12) 355 356 void ldrq(V dst, Label*); // 128-bit PC-relative load 357 358 void ldrq(V dst, X src, int imm12=0); // 128-bit dst = *(src+imm12*16) 359 void ldrd(V dst, X src, int imm12=0); // 64-bit dst = *(src+imm12*8) 360 void ldrs(V dst, X src, int imm12=0); // 32-bit dst = *(src+imm12*4) 361 void ldrh(V dst, X src, int imm12=0); // 16-bit dst = *(src+imm12*2) 362 void ldrb(V dst, X src, int imm12=0); // 8-bit dst = *(src+imm12) 363 364 void strs(X src, X dst, int imm12=0); // 32-bit *(dst+imm12*4) = src 365 366 void strq(V src, X dst, int imm12=0); // 128-bit *(dst+imm12*16) = src 367 void strd(V src, X dst, int imm12=0); // 64-bit *(dst+imm12*8) = src 368 void strs(V src, X dst, int imm12=0); // 32-bit *(dst+imm12*4) = src 369 void strh(V src, X dst, int imm12=0); // 16-bit *(dst+imm12*2) = src 370 void strb(V src, X dst, int imm12=0); // 8-bit *(dst+imm12) = src 371 372 void movs(X dst, V src, int lane); // dst = 32-bit src[lane] 373 void inss(V dst, X src, int lane); // dst[lane] = 32-bit src 374 375 void dup4s (V dst, X src); // Each 32-bit lane = src 376 377 void ld1r4s (V dst, X src); // Each 32-bit lane = *src 378 void ld1r8h (V dst, X src); // Each 16-bit lane = *src 379 void ld1r16b(V dst, X src); // Each 8-bit lane = *src 380 381 void ld24s(V dst, X src); // deinterleave(dst,dst+1) = 256-bit *src 382 void ld44s(V dst, X src); // deinterleave(dst,dst+1,dst+2,dst+3) = 512-bit *src 383 void st24s(V src, X dst); // 256-bit *dst = interleave_32bit_lanes(src,src+1) 384 void st44s(V src, X dst); // 512-bit *dst = interleave_32bit_lanes(src,src+1,src+2,src+3) 385 386 void ld24s(V dst, X src, int lane); // Load 2 32-bit values into given lane of dst..dst+1 387 void ld44s(V dst, X src, int lane); // Load 4 32-bit values into given lane of dst..dst+3 388 389 private: 390 uint8_t* fCode; 391 size_t fSize; 392 393 // x86-64 394 enum W { W0, W1 }; // Are the lanes 64-bit (W1) or default (W0)? Intel Vol 2A 2.3.5.5 395 enum L { L128, L256 }; // Is this a 128- or 256-bit operation? Intel Vol 2A 2.3.6.2 396 397 // Helpers for vector instructions. 398 void op(int prefix, int map, int opcode, int dst, int x, Operand y, W,L); 399 void op(int p, int m, int o, Ymm d, Ymm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L256); } 400 void op(int p, int m, int o, Ymm d, Operand y, W w=W0) { op(p,m,o, d,0,y,w,L256); } 401 void op(int p, int m, int o, Xmm d, Xmm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L128); } 402 void op(int p, int m, int o, Xmm d, Operand y, W w=W0) { op(p,m,o, d,0,y,w,L128); } 403 404 // Helpers for GP64 instructions. 405 void op(int opcode, Operand dst, GP64 x); 406 void op(int opcode, int opcode_ext, Operand dst, int imm); 407 408 void jump(uint8_t condition, Label*); 409 int disp32(Label*); 410 void imm_byte_after_operand(const Operand&, int byte); 411 412 // aarch64 413 414 // Opcode for 3-arguments ops is split between hi and lo: 415 // [11 bits hi] [5 bits m] [6 bits lo] [5 bits n] [5 bits d] 416 void op(uint32_t hi, V m, uint32_t lo, V n, V d); 417 418 // 0,1,2-argument ops, with or without an immediate: 419 // [ 22 bits op ] [5 bits n] [5 bits d] 420 // Any immediate falls in the middle somewhere overlapping with either op, n, or both. 421 void op(uint32_t op22, V n, V d, int imm=0); 422 void op(uint32_t op22, X n, V d, int imm=0) { this->op(op22,(V)n, d,imm); } 423 void op(uint32_t op22, V n, X d, int imm=0) { this->op(op22, n,(V)d,imm); } 424 void op(uint32_t op22, X n, X d, int imm=0) { this->op(op22,(V)n,(V)d,imm); } 425 void op(uint32_t op22, int imm=0) { this->op(op22,(V)0,(V)0,imm); } 426 // (1-argument ops don't seem to have a consistent convention of passing as n or d.) 427 428 429 // Order matters... value is 4-bit encoding for condition code. 430 enum class Condition { eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,al }; 431 void b(Condition, Label*); 432 int disp19(Label*); 433 }; 434 435 // Order matters a little: Ops <=store128 are treated as having side effects. 436 #define SKVM_OPS(M) \ 437 M(assert_true) \ 438 M(trace_line) M(trace_var) \ 439 M(trace_enter) M(trace_exit) M(trace_scope) \ 440 M(store8) M(store16) M(store32) M(store64) M(store128) \ 441 M(load8) M(load16) M(load32) M(load64) M(load128) \ 442 M(index) \ 443 M(gather8) M(gather16) M(gather32) \ 444 M(uniform32) \ 445 M(array32) \ 446 M(splat) \ 447 M(add_f32) M(add_i32) \ 448 M(sub_f32) M(sub_i32) \ 449 M(mul_f32) M(mul_i32) \ 450 M(div_f32) \ 451 M(min_f32) M(max_f32) \ 452 M(fma_f32) M(fms_f32) M(fnma_f32) \ 453 M(sqrt_f32) \ 454 M(shl_i32) M(shr_i32) M(sra_i32) \ 455 M(ceil) M(floor) M(trunc) M(round) M(to_fp16) M(from_fp16) \ 456 M(to_f32) \ 457 M(neq_f32) M(eq_f32) M(eq_i32) \ 458 M(gte_f32) M(gt_f32) M(gt_i32) \ 459 M(bit_and) M(bit_or) M(bit_xor) M(bit_clear) \ 460 M(select) \ 461 M(duplicate) 462 // End of SKVM_OPS 463 464 enum class Op : int { 465 #define M(op) op, 466 SKVM_OPS(M) 467 #undef M 468 }; 469 has_side_effect(Op op)470 static inline bool has_side_effect(Op op) { 471 return op <= Op::store128; 472 } touches_varying_memory(Op op)473 static inline bool touches_varying_memory(Op op) { 474 return Op::store8 <= op && op <= Op::load128; 475 } is_always_varying(Op op)476 static inline bool is_always_varying(Op op) { 477 return Op::store8 <= op && op <= Op::index; 478 } is_trace(Op op)479 static inline bool is_trace(Op op) { 480 return Op::trace_line <= op && op <= Op::trace_scope; 481 } 482 483 using Val = int; 484 // We reserve an impossible Val ID as a sentinel 485 // NA meaning none, n/a, null, nil, etc. 486 static const Val NA = -1; 487 488 // Ptr and UPtr are an index into the registers args[]. The two styles of using args are 489 // varyings and uniforms. Varyings use Ptr, have a stride associated with them, and are 490 // evaluated everytime through the loop. Uniforms use UPtr, don't have a stride, and are 491 // usually hoisted above the loop. 492 struct Ptr { int ix; }; 493 struct UPtr : public Ptr {}; 494 495 bool operator!=(Ptr a, Ptr b); 496 497 struct I32 { 498 Builder* builder = nullptr; 499 Val id = NA; 500 explicit operator bool() const { return id != NA; } 501 Builder* operator->() const { return builder; } 502 }; 503 504 struct F32 { 505 Builder* builder = nullptr; 506 Val id = NA; 507 explicit operator bool() const { return id != NA; } 508 Builder* operator->() const { return builder; } 509 }; 510 511 struct Color { 512 F32 r,g,b,a; 513 explicit operator bool() const { return r && g && b && a; } 514 Builder* operator->() const { return a.operator->(); } 515 }; 516 517 struct HSLA { 518 F32 h,s,l,a; 519 explicit operator bool() const { return h && s && l && a; } 520 Builder* operator->() const { return a.operator->(); } 521 }; 522 523 struct Coord { 524 F32 x,y; 525 explicit operator bool() const { return x && y; } 526 Builder* operator->() const { return x.operator->(); } 527 }; 528 529 struct Uniform { 530 UPtr ptr; 531 int offset; 532 }; 533 struct Uniforms { 534 UPtr base; 535 std::vector<int> buf; 536 UniformsUniforms537 Uniforms(UPtr ptr, int init) : base(ptr), buf(init) {} 538 pushUniforms539 Uniform push(int val) { 540 buf.push_back(val); 541 return {base, (int)( sizeof(int)*(buf.size() - 1) )}; 542 } 543 pushFUniforms544 Uniform pushF(float val) { 545 int bits; 546 memcpy(&bits, &val, sizeof(int)); 547 return this->push(bits); 548 } 549 pushPtrUniforms550 Uniform pushPtr(const void* ptr) { 551 // Jam the pointer into 1 or 2 ints. 552 int ints[sizeof(ptr) / sizeof(int)]; 553 memcpy(ints, &ptr, sizeof(ptr)); 554 for (int bits : ints) { 555 buf.push_back(bits); 556 } 557 return {base, (int)( sizeof(int)*(buf.size() - std::size(ints)) )}; 558 } 559 pushArrayUniforms560 Uniform pushArray(int32_t a[]) { 561 return this->pushPtr(a); 562 } 563 pushArrayFUniforms564 Uniform pushArrayF(float a[]) { 565 return this->pushPtr(a); 566 } 567 }; 568 569 struct PixelFormat { 570 enum { UNORM, SRGB, FLOAT, XRNG } encoding; 571 int r_bits, g_bits, b_bits, a_bits, 572 r_shift, g_shift, b_shift, a_shift; 573 }; 574 PixelFormat SkColorType_to_PixelFormat(SkColorType); 575 576 SK_BEGIN_REQUIRE_DENSE 577 struct Instruction { 578 Op op; // v* = op(x,y,z,w,immA,immB), where * == index of this Instruction. 579 Val x,y,z,w; // Enough arguments for Op::store128. 580 int immA,immB,immC; // Immediate bit pattern, shift count, pointer index, byte offset, etc. 581 }; 582 SK_END_REQUIRE_DENSE 583 584 bool operator==(const Instruction&, const Instruction&); 585 struct InstructionHash { 586 uint32_t operator()(const Instruction&, uint32_t seed=0) const; 587 }; 588 589 struct OptimizedInstruction { 590 Op op; 591 Val x,y,z,w; 592 int immA,immB,immC; 593 594 Val death; 595 bool can_hoist; 596 }; 597 598 struct Features { 599 bool fma = false; 600 bool fp16 = false; 601 }; 602 603 class TraceHook { 604 public: 605 virtual ~TraceHook() = default; 606 virtual void line(int lineNum) = 0; 607 virtual void var(int slot, int32_t val) = 0; 608 virtual void enter(int fnIdx) = 0; 609 virtual void exit(int fnIdx) = 0; 610 virtual void scope(int delta) = 0; 611 }; 612 613 class Builder { 614 public: 615 Builder(bool createDuplicates = false); 616 Builder(Features, bool createDuplicates = false); 617 618 Program done(const char* debug_name, 619 bool allow_jit, 620 std::unique_ptr<viz::Visualizer> visualizer) const; 621 Program done(const char* debug_name = nullptr, 622 bool allow_jit=true) const; 623 624 // Mostly for debugging, tests, etc. program()625 std::vector<Instruction> program() const { return fProgram; } 626 std::vector<OptimizedInstruction> optimize(viz::Visualizer* visualizer = nullptr) const; 627 628 // Returns a trace-hook ID which must be passed to the trace opcodes. 629 int attachTraceHook(TraceHook*); 630 631 // Convenience arg() wrappers for most common strides, sizeof(T) and 0. 632 template <typename T> varying()633 Ptr varying() { return this->arg(sizeof(T)); } varying(int stride)634 Ptr varying(int stride) { SkASSERT(stride > 0); return this->arg(stride); } uniform()635 UPtr uniform() { Ptr p = this->arg(0); return UPtr{{p.ix}}; } 636 637 // TODO: allow uniform (i.e. Ptr) offsets to store* and load*? 638 // TODO: sign extension (signed types) for <32-bit loads? 639 // TODO: unsigned integer operations where relevant (just comparisons?)? 640 641 // Assert cond is true, printing debug when not. 642 void assert_true(I32 cond, I32 debug); assert_true(I32 cond,F32 debug)643 void assert_true(I32 cond, F32 debug) { assert_true(cond, pun_to_I32(debug)); } assert_true(I32 cond)644 void assert_true(I32 cond) { assert_true(cond, cond); } 645 646 // Insert debug traces into the instruction stream 647 bool mergeMasks(I32& mask, I32& traceMask); 648 void trace_line (int traceHookID, I32 mask, I32 traceMask, int line); 649 void trace_var (int traceHookID, I32 mask, I32 traceMask, int slot, I32 val); 650 void trace_enter(int traceHookID, I32 mask, I32 traceMask, int fnIdx); 651 void trace_exit (int traceHookID, I32 mask, I32 traceMask, int fnIdx); 652 void trace_scope(int traceHookID, I32 mask, I32 traceMask, int delta); 653 654 // Store {8,16,32,64,128}-bit varying. 655 void store8 (Ptr ptr, I32 val); 656 void store16 (Ptr ptr, I32 val); 657 void store32 (Ptr ptr, I32 val); storeF(Ptr ptr,F32 val)658 void storeF (Ptr ptr, F32 val) { store32(ptr, pun_to_I32(val)); } 659 void store64 (Ptr ptr, I32 lo, I32 hi); // *ptr = lo|(hi<<32) 660 void store128(Ptr ptr, I32 x, I32 y, I32 z, I32 w); // *ptr = x|(y<<32)|(z<<64)|(w<<96) 661 662 // Returns varying {n, n-1, n-2, ..., 1}, where n is the argument to Program::eval(). 663 I32 index(); 664 665 // Load {8,16,32,64,128}-bit varying. 666 I32 load8 (Ptr ptr); 667 I32 load16 (Ptr ptr); 668 I32 load32 (Ptr ptr); loadF(Ptr ptr)669 F32 loadF (Ptr ptr) { return pun_to_F32(load32(ptr)); } 670 I32 load64 (Ptr ptr, int lane); // Load 32-bit lane 0-1 of 64-bit value. 671 I32 load128(Ptr ptr, int lane); // Load 32-bit lane 0-3 of 128-bit value. 672 673 // Load i32/f32 uniform with byte-count offset. 674 I32 uniform32(UPtr ptr, int offset); uniformF(UPtr ptr,int offset)675 F32 uniformF (UPtr ptr, int offset) { return pun_to_F32(uniform32(ptr,offset)); } 676 677 // Load i32/f32 uniform with byte-count offset and an c-style array index. The address of 678 // the element is (*(ptr + byte-count offset))[index]. 679 I32 array32 (UPtr ptr, int offset, int index); arrayF(UPtr ptr,int offset,int index)680 F32 arrayF (UPtr ptr, int offset, int index) { 681 return pun_to_F32(array32(ptr, offset, index)); 682 } 683 684 // Push and load this color as a uniform. 685 Color uniformColor(SkColor4f, Uniforms*); 686 687 // Gather u8,u16,i32 with varying element-count index from *(ptr + byte-count offset). 688 I32 gather8 (UPtr ptr, int offset, I32 index); 689 I32 gather16(UPtr ptr, int offset, I32 index); 690 I32 gather32(UPtr ptr, int offset, I32 index); gatherF(UPtr ptr,int offset,I32 index)691 F32 gatherF (UPtr ptr, int offset, I32 index) { 692 return pun_to_F32(gather32(ptr, offset, index)); 693 } 694 695 // Convenience methods for working with skvm::Uniform(s). uniform32(Uniform u)696 I32 uniform32(Uniform u) { return this->uniform32(u.ptr, u.offset); } uniformF(Uniform u)697 F32 uniformF (Uniform u) { return this->uniformF (u.ptr, u.offset); } gather8(Uniform u,I32 index)698 I32 gather8 (Uniform u, I32 index) { return this->gather8 (u.ptr, u.offset, index); } gather16(Uniform u,I32 index)699 I32 gather16 (Uniform u, I32 index) { return this->gather16 (u.ptr, u.offset, index); } gather32(Uniform u,I32 index)700 I32 gather32 (Uniform u, I32 index) { return this->gather32 (u.ptr, u.offset, index); } gatherF(Uniform u,I32 index)701 F32 gatherF (Uniform u, I32 index) { return this->gatherF (u.ptr, u.offset, index); } 702 703 // Convenience methods for working with array pointers in skvm::Uniforms. Index is an 704 // array index and not a byte offset. The array pointer is stored at u. array32(Uniform a,int index)705 I32 array32 (Uniform a, int index) { return this->array32 (a.ptr, a.offset, index); } arrayF(Uniform a,int index)706 F32 arrayF (Uniform a, int index) { return this->arrayF (a.ptr, a.offset, index); } 707 708 // Load an immediate constant. 709 I32 splat(int n); splat(unsigned u)710 I32 splat(unsigned u) { return splat((int)u); } splat(float f)711 F32 splat(float f) { 712 int bits; 713 memcpy(&bits, &f, 4); 714 return pun_to_F32(splat(bits)); 715 } 716 717 // Some operations make sense with immediate arguments, 718 // so we provide overloads inline to make that seamless. 719 // 720 // We omit overloads that may indicate a bug or performance issue. 721 // In general it does not make sense to pass immediates to unary operations, 722 // and even sometimes not for binary operations, e.g. 723 // 724 // div(x, y) -- normal every day divide 725 // div(3.0f, y) -- yep, makes sense 726 // div(x, 3.0f) -- omitted as a reminder you probably want mul(x, 1/3.0f). 727 // 728 // You can of course always splat() to override these opinions. 729 730 // float math, comparisons, etc. 731 F32 add(F32, F32); add(F32 x,float y)732 F32 add(F32 x, float y) { return add(x, splat(y)); } add(float x,F32 y)733 F32 add(float x, F32 y) { return add(splat(x), y); } 734 735 F32 sub(F32, F32); sub(F32 x,float y)736 F32 sub(F32 x, float y) { return sub(x, splat(y)); } sub(float x,F32 y)737 F32 sub(float x, F32 y) { return sub(splat(x), y); } 738 739 F32 mul(F32, F32); mul(F32 x,float y)740 F32 mul(F32 x, float y) { return mul(x, splat(y)); } mul(float x,F32 y)741 F32 mul(float x, F32 y) { return mul(splat(x), y); } 742 743 // mul(), but allowing optimizations not strictly legal under IEEE-754 rules. 744 F32 fast_mul(F32, F32); fast_mul(F32 x,float y)745 F32 fast_mul(F32 x, float y) { return fast_mul(x, splat(y)); } fast_mul(float x,F32 y)746 F32 fast_mul(float x, F32 y) { return fast_mul(splat(x), y); } 747 748 F32 div(F32, F32); div(float x,F32 y)749 F32 div(float x, F32 y) { return div(splat(x), y); } 750 751 F32 min(F32, F32); min(F32 x,float y)752 F32 min(F32 x, float y) { return min(x, splat(y)); } min(float x,F32 y)753 F32 min(float x, F32 y) { return min(splat(x), y); } 754 755 F32 max(F32, F32); max(F32 x,float y)756 F32 max(F32 x, float y) { return max(x, splat(y)); } max(float x,F32 y)757 F32 max(float x, F32 y) { return max(splat(x), y); } 758 759 // TODO: remove mad()? It's just sugar. mad(F32 x,F32 y,F32 z)760 F32 mad(F32 x, F32 y, F32 z) { return add(mul(x,y), z); } mad(F32 x,F32 y,float z)761 F32 mad(F32 x, F32 y, float z) { return mad( x , y , splat(z)); } mad(F32 x,float y,F32 z)762 F32 mad(F32 x, float y, F32 z) { return mad( x , splat(y), z ); } mad(F32 x,float y,float z)763 F32 mad(F32 x, float y, float z) { return mad( x , splat(y), splat(z)); } mad(float x,F32 y,F32 z)764 F32 mad(float x, F32 y, F32 z) { return mad(splat(x), y , z ); } mad(float x,F32 y,float z)765 F32 mad(float x, F32 y, float z) { return mad(splat(x), y , splat(z)); } mad(float x,float y,F32 z)766 F32 mad(float x, float y, F32 z) { return mad(splat(x), splat(y), z ); } 767 768 F32 sqrt(F32); 769 F32 approx_log2(F32); 770 F32 approx_pow2(F32); approx_log(F32 x)771 F32 approx_log (F32 x) { return mul(0.69314718f, approx_log2(x)); } approx_exp(F32 x)772 F32 approx_exp (F32 x) { return approx_pow2(mul(x, 1.4426950408889634074f)); } 773 774 F32 approx_powf(F32 base, F32 exp); approx_powf(F32 base,float exp)775 F32 approx_powf(F32 base, float exp) { return approx_powf(base, splat(exp)); } approx_powf(float base,F32 exp)776 F32 approx_powf(float base, F32 exp) { return approx_powf(splat(base), exp); } 777 778 779 F32 approx_sin(F32 radians); approx_cos(F32 radians)780 F32 approx_cos(F32 radians) { return approx_sin(add(radians, SK_ScalarPI/2)); } 781 F32 approx_tan(F32 radians); 782 783 F32 approx_asin(F32 x); approx_acos(F32 x)784 F32 approx_acos(F32 x) { return sub(SK_ScalarPI/2, approx_asin(x)); } 785 F32 approx_atan(F32 x); 786 F32 approx_atan2(F32 y, F32 x); 787 788 F32 lerp(F32 lo, F32 hi, F32 t); lerp(F32 lo,F32 hi,float t)789 F32 lerp(F32 lo, F32 hi, float t) { return lerp( lo , hi , splat(t)); } lerp(F32 lo,float hi,float t)790 F32 lerp(F32 lo, float hi, float t) { return lerp( lo , splat(hi), splat(t)); } lerp(F32 lo,float hi,F32 t)791 F32 lerp(F32 lo, float hi, F32 t) { return lerp( lo , splat(hi), t ); } lerp(float lo,F32 hi,F32 t)792 F32 lerp(float lo, F32 hi, F32 t) { return lerp(splat(lo), hi , t ); } lerp(float lo,F32 hi,float t)793 F32 lerp(float lo, F32 hi, float t) { return lerp(splat(lo), hi , splat(t)); } lerp(float lo,float hi,F32 t)794 F32 lerp(float lo, float hi, F32 t) { return lerp(splat(lo), splat(hi), t ); } 795 clamp(F32 x,F32 lo,F32 hi)796 F32 clamp(F32 x, F32 lo, F32 hi) { return max(lo, min(x, hi)); } clamp(F32 x,F32 lo,float hi)797 F32 clamp(F32 x, F32 lo, float hi) { return clamp( x , lo , splat(hi)); } clamp(F32 x,float lo,float hi)798 F32 clamp(F32 x, float lo, float hi) { return clamp( x , splat(lo), splat(hi)); } clamp(F32 x,float lo,F32 hi)799 F32 clamp(F32 x, float lo, F32 hi) { return clamp( x , splat(lo), hi ); } clamp(float x,F32 lo,F32 hi)800 F32 clamp(float x, F32 lo, F32 hi) { return clamp(splat(x), lo , hi ); } clamp(float x,F32 lo,float hi)801 F32 clamp(float x, F32 lo, float hi) { return clamp(splat(x), lo , splat(hi)); } clamp(float x,float lo,F32 hi)802 F32 clamp(float x, float lo, F32 hi) { return clamp(splat(x), splat(lo), hi ); } 803 clamp01(F32 x)804 F32 clamp01(F32 x) { return clamp(x, 0.0f, 1.0f); } 805 abs(F32 x)806 F32 abs(F32 x) { return pun_to_F32(bit_and(pun_to_I32(x), 0x7fff'ffff)); } 807 F32 fract(F32 x) { return sub(x, floor(x)); } 808 F32 ceil(F32); 809 F32 floor(F32); 810 I32 is_NaN (F32 x) { return neq(x,x); } 811 I32 is_finite(F32 x) { return lt(bit_and(pun_to_I32(x), 0x7f80'0000), 0x7f80'0000); } 812 813 I32 trunc(F32 x); 814 I32 round(F32 x); // Round to int using current rounding mode (as if lrintf()). 815 I32 pun_to_I32(F32 x) { return {x.builder, x.id}; } 816 817 I32 to_fp16(F32 x); 818 F32 from_fp16(I32 x); 819 820 I32 eq(F32, F32); 821 I32 eq(F32 x, float y) { return eq(x, splat(y)); } 822 I32 eq(float x, F32 y) { return eq(splat(x), y); } 823 824 I32 neq(F32, F32); 825 I32 neq(F32 x, float y) { return neq(x, splat(y)); } 826 I32 neq(float x, F32 y) { return neq(splat(x), y); } 827 828 I32 lt(F32, F32); 829 I32 lt(F32 x, float y) { return lt(x, splat(y)); } 830 I32 lt(float x, F32 y) { return lt(splat(x), y); } 831 832 I32 lte(F32, F32); 833 I32 lte(F32 x, float y) { return lte(x, splat(y)); } 834 I32 lte(float x, F32 y) { return lte(splat(x), y); } 835 836 I32 gt(F32, F32); 837 I32 gt(F32 x, float y) { return gt(x, splat(y)); } 838 I32 gt(float x, F32 y) { return gt(splat(x), y); } 839 840 I32 gte(F32, F32); 841 I32 gte(F32 x, float y) { return gte(x, splat(y)); } 842 I32 gte(float x, F32 y) { return gte(splat(x), y); } 843 844 // int math, comparisons, etc. 845 I32 add(I32, I32); 846 I32 add(I32 x, int y) { return add(x, splat(y)); } 847 I32 add(int x, I32 y) { return add(splat(x), y); } 848 849 I32 sub(I32, I32); 850 I32 sub(I32 x, int y) { return sub(x, splat(y)); } 851 I32 sub(int x, I32 y) { return sub(splat(x), y); } 852 853 I32 mul(I32, I32); 854 I32 mul(I32 x, int y) { return mul(x, splat(y)); } 855 I32 mul(int x, I32 y) { return mul(splat(x), y); } 856 857 I32 shl(I32 x, int bits); 858 I32 shr(I32 x, int bits); 859 I32 sra(I32 x, int bits); 860 861 I32 eq(I32, I32); 862 I32 eq(I32 x, int y) { return eq(x, splat(y)); } 863 I32 eq(int x, I32 y) { return eq(splat(x), y); } 864 865 I32 neq(I32, I32); 866 I32 neq(I32 x, int y) { return neq(x, splat(y)); } 867 I32 neq(int x, I32 y) { return neq(splat(x), y); } 868 869 I32 lt(I32, I32); 870 I32 lt(I32 x, int y) { return lt(x, splat(y)); } 871 I32 lt(int x, I32 y) { return lt(splat(x), y); } 872 873 I32 lte(I32, I32); 874 I32 lte(I32 x, int y) { return lte(x, splat(y)); } 875 I32 lte(int x, I32 y) { return lte(splat(x), y); } 876 877 I32 gt(I32, I32); 878 I32 gt(I32 x, int y) { return gt(x, splat(y)); } 879 I32 gt(int x, I32 y) { return gt(splat(x), y); } 880 881 I32 gte(I32, I32); 882 I32 gte(I32 x, int y) { return gte(x, splat(y)); } 883 I32 gte(int x, I32 y) { return gte(splat(x), y); } 884 885 F32 to_F32(I32 x); 886 F32 pun_to_F32(I32 x) { return {x.builder, x.id}; } 887 888 // Bitwise operations. 889 I32 bit_and(I32, I32); 890 I32 bit_and(I32 x, int y) { return bit_and(x, splat(y)); } 891 I32 bit_and(int x, I32 y) { return bit_and(splat(x), y); } 892 893 I32 bit_or(I32, I32); 894 I32 bit_or(I32 x, int y) { return bit_or(x, splat(y)); } 895 I32 bit_or(int x, I32 y) { return bit_or(splat(x), y); } 896 897 I32 bit_xor(I32, I32); 898 I32 bit_xor(I32 x, int y) { return bit_xor(x, splat(y)); } 899 I32 bit_xor(int x, I32 y) { return bit_xor(splat(x), y); } 900 901 I32 bit_clear(I32, I32); 902 I32 bit_clear(I32 x, int y) { return bit_clear(x, splat(y)); } 903 I32 bit_clear(int x, I32 y) { return bit_clear(splat(x), y); } 904 905 I32 min(I32 x, I32 y) { return select(lte(x,y), x, y); } 906 I32 min(I32 x, int y) { return min(x, splat(y)); } 907 I32 min(int x, I32 y) { return min(splat(x), y); } 908 909 I32 max(I32 x, I32 y) { return select(gte(x,y), x, y); } 910 I32 max(I32 x, int y) { return max(x, splat(y)); } 911 I32 max(int x, I32 y) { return max(splat(x), y); } 912 913 I32 select(I32 cond, I32 t, I32 f); // cond ? t : f 914 I32 select(I32 cond, int t, I32 f) { return select(cond, splat(t), f ); } 915 I32 select(I32 cond, I32 t, int f) { return select(cond, t , splat(f)); } 916 I32 select(I32 cond, int t, int f) { return select(cond, splat(t), splat(f)); } 917 918 F32 select(I32 cond, F32 t, F32 f) { 919 return pun_to_F32(select(cond, pun_to_I32(t) 920 , pun_to_I32(f))); 921 } 922 F32 select(I32 cond, float t, F32 f) { return select(cond, splat(t), f ); } 923 F32 select(I32 cond, F32 t, float f) { return select(cond, t , splat(f)); } 924 F32 select(I32 cond, float t, float f) { return select(cond, splat(t), splat(f)); } 925 926 I32 extract(I32 x, int bits, I32 z); // (x>>bits) & z 927 I32 extract(I32 x, int bits, int z) { return extract(x, bits, splat(z)); } 928 I32 extract(int x, int bits, I32 z) { return extract(splat(x), bits, z); } 929 930 I32 pack(I32 x, I32 y, int bits); // x | (y<<bits) 931 I32 pack(I32 x, int y, int bits) { return pack(x, splat(y), bits); } 932 I32 pack(int x, I32 y, int bits) { return pack(splat(x), y, bits); } 933 934 935 // Common idioms used in several places, worth centralizing for consistency. 936 F32 from_unorm(int bits, I32); // E.g. from_unorm(8, x) -> x * (1/255.0f) 937 I32 to_unorm(int bits, F32); // E.g. to_unorm(8, x) -> round(x * 255) 938 939 Color load(PixelFormat, Ptr ptr); 940 void store(PixelFormat, Ptr ptr, Color); 941 Color gather(PixelFormat, UPtr ptr, int offset, I32 index); 942 Color gather(PixelFormat f, Uniform u, I32 index) { 943 return gather(f, u.ptr, u.offset, index); 944 } 945 946 void premul(F32* r, F32* g, F32* b, F32 a); 947 void unpremul(F32* r, F32* g, F32* b, F32 a); 948 949 Color premul(Color c) { this->premul(&c.r, &c.g, &c.b, c.a); return c; } 950 Color unpremul(Color c) { this->unpremul(&c.r, &c.g, &c.b, c.a); return c; } 951 952 Color lerp(Color lo, Color hi, F32 t); 953 Color blend(SkBlendMode, Color src, Color dst); 954 955 Color clamp01(Color c) { 956 return { clamp01(c.r), clamp01(c.g), clamp01(c.b), clamp01(c.a) }; 957 } 958 959 HSLA to_hsla(Color); 960 Color to_rgba(HSLA); 961 962 void dump(SkWStream* = nullptr) const; 963 964 uint64_t hash() const; 965 966 Val push(Instruction); 967 968 bool allImm() const { return true; } 969 970 template <typename T, typename... Rest> 971 bool allImm(Val id, T* imm, Rest... rest) const { 972 if (fProgram[id].op == Op::splat) { 973 static_assert(sizeof(T) == 4); 974 memcpy(imm, &fProgram[id].immA, 4); 975 return this->allImm(rest...); 976 } 977 return false; 978 } 979 980 bool allUniform() const { return true; } 981 982 template <typename... Rest> 983 bool allUniform(Val id, Uniform* uni, Rest... rest) const { 984 if (fProgram[id].op == Op::uniform32) { 985 uni->ptr.ix = fProgram[id].immA; 986 uni->offset = fProgram[id].immB; 987 return this->allUniform(rest...); 988 } 989 return false; 990 } 991 992 private: 993 // Declare an argument with given stride (use stride=0 for uniforms). 994 Ptr arg(int stride); 995 996 Val push( 997 Op op, Val x=NA, Val y=NA, Val z=NA, Val w=NA, int immA=0, int immB=0, int immC=0) { 998 return this->push(Instruction{op, x,y,z,w, immA,immB,immC}); 999 } 1000 1001 template <typename T> 1002 bool isImm(Val id, T want) const { 1003 T imm = 0; 1004 return this->allImm(id, &imm) && imm == want; 1005 } 1006 1007 // `canonicalizeIdOrder` and has two rules: 1008 // - Immediate values go last; that is, `x + 1` is preferred over `1 + x`. 1009 // - If both/neither of x and y are immediate, lower IDs go before higher IDs. 1010 // Canonicalizing the IDs helps with opcode deduplication. Putting immediates in a 1011 // consistent position makes it easier to detect no-op arithmetic like `x + 0`. 1012 template <typename F32_or_I32> 1013 void canonicalizeIdOrder(F32_or_I32& x, F32_or_I32& y); 1014 1015 // If the passed in ID is a bit-not, return the value being bit-notted. Otherwise, NA. 1016 Val holdsBitNot(Val id); 1017 1018 SkTHashMap<Instruction, Val, InstructionHash> fIndex; 1019 std::vector<Instruction> fProgram; 1020 std::vector<TraceHook*> fTraceHooks; 1021 std::vector<int> fStrides; 1022 const Features fFeatures; 1023 bool fCreateDuplicates; 1024 }; 1025 1026 // Optimization passes and data structures normally used by Builder::optimize(), 1027 // extracted here so they can be unit tested. 1028 std::vector<Instruction> eliminate_dead_code(std::vector<Instruction>, 1029 viz::Visualizer* visualizer = nullptr); 1030 std::vector<OptimizedInstruction> finalize(std::vector<Instruction>, 1031 viz::Visualizer* visualizer = nullptr); 1032 1033 using Reg = int; 1034 1035 // d = op(x,y,z,w, immA,immB) 1036 struct InterpreterInstruction { 1037 Op op; 1038 Reg d,x,y,z,w; 1039 int immA,immB,immC; 1040 }; 1041 1042 class Program { 1043 public: 1044 Program(const std::vector<OptimizedInstruction>& instructions, 1045 std::unique_ptr<viz::Visualizer> visualizer, 1046 const std::vector<int>& strides, 1047 const std::vector<TraceHook*>& traceHooks, 1048 const char* debug_name, bool allow_jit); 1049 1050 Program(); 1051 ~Program(); 1052 1053 Program(Program&&); 1054 Program& operator=(Program&&); 1055 1056 Program(const Program&) = delete; 1057 Program& operator=(const Program&) = delete; 1058 1059 void eval(int n, void* args[]) const; 1060 1061 template <typename... T> 1062 void eval(int n, T*... arg) const { 1063 SkASSERT(sizeof...(arg) == this->nargs()); 1064 // This nullptr isn't important except that it makes args[] non-empty if you pass none. 1065 void* args[] = { (void*)arg..., nullptr }; 1066 this->eval(n, args); 1067 } 1068 1069 std::vector<InterpreterInstruction> instructions() const; 1070 int nargs() const; 1071 int nregs() const; 1072 int loop () const; 1073 bool empty() const; 1074 1075 bool hasJIT() const; // Has this Program been JITted? 1076 bool hasTraceHooks() const; // Is this program instrumented for debugging? 1077 1078 void visualize(SkWStream* output) const; 1079 void dump(SkWStream* = nullptr) const; 1080 void disassemble(SkWStream* = nullptr) const; 1081 viz::Visualizer* visualizer(); 1082 1083 private: 1084 void setupInterpreter(const std::vector<OptimizedInstruction>&); 1085 void setupJIT (const std::vector<OptimizedInstruction>&, const char* debug_name); 1086 1087 bool jit(const std::vector<OptimizedInstruction>&, 1088 int* stack_hint, uint32_t* registers_used, 1089 Assembler*) const; 1090 1091 void dropJIT(); 1092 1093 struct Impl; 1094 std::unique_ptr<Impl> fImpl; 1095 }; 1096 1097 // TODO: control flow 1098 // TODO: 64-bit values? 1099 1100 #define SI static inline 1101 1102 SI I32 operator+(I32 x, I32 y) { return x->add(x,y); } 1103 SI I32 operator+(I32 x, int y) { return x->add(x,y); } 1104 SI I32 operator+(int x, I32 y) { return y->add(x,y); } 1105 1106 SI I32 operator-(I32 x, I32 y) { return x->sub(x,y); } 1107 SI I32 operator-(I32 x, int y) { return x->sub(x,y); } 1108 SI I32 operator-(int x, I32 y) { return y->sub(x,y); } 1109 1110 SI I32 operator*(I32 x, I32 y) { return x->mul(x,y); } 1111 SI I32 operator*(I32 x, int y) { return x->mul(x,y); } 1112 SI I32 operator*(int x, I32 y) { return y->mul(x,y); } 1113 min(I32 x,I32 y)1114 SI I32 min(I32 x, I32 y) { return x->min(x,y); } min(I32 x,int y)1115 SI I32 min(I32 x, int y) { return x->min(x,y); } min(int x,I32 y)1116 SI I32 min(int x, I32 y) { return y->min(x,y); } 1117 max(I32 x,I32 y)1118 SI I32 max(I32 x, I32 y) { return x->max(x,y); } max(I32 x,int y)1119 SI I32 max(I32 x, int y) { return x->max(x,y); } max(int x,I32 y)1120 SI I32 max(int x, I32 y) { return y->max(x,y); } 1121 1122 SI I32 operator==(I32 x, I32 y) { return x->eq(x,y); } 1123 SI I32 operator==(I32 x, int y) { return x->eq(x,y); } 1124 SI I32 operator==(int x, I32 y) { return y->eq(x,y); } 1125 1126 SI I32 operator!=(I32 x, I32 y) { return x->neq(x,y); } 1127 SI I32 operator!=(I32 x, int y) { return x->neq(x,y); } 1128 SI I32 operator!=(int x, I32 y) { return y->neq(x,y); } 1129 1130 SI I32 operator< (I32 x, I32 y) { return x->lt(x,y); } 1131 SI I32 operator< (I32 x, int y) { return x->lt(x,y); } 1132 SI I32 operator< (int x, I32 y) { return y->lt(x,y); } 1133 1134 SI I32 operator<=(I32 x, I32 y) { return x->lte(x,y); } 1135 SI I32 operator<=(I32 x, int y) { return x->lte(x,y); } 1136 SI I32 operator<=(int x, I32 y) { return y->lte(x,y); } 1137 1138 SI I32 operator> (I32 x, I32 y) { return x->gt(x,y); } 1139 SI I32 operator> (I32 x, int y) { return x->gt(x,y); } 1140 SI I32 operator> (int x, I32 y) { return y->gt(x,y); } 1141 1142 SI I32 operator>=(I32 x, I32 y) { return x->gte(x,y); } 1143 SI I32 operator>=(I32 x, int y) { return x->gte(x,y); } 1144 SI I32 operator>=(int x, I32 y) { return y->gte(x,y); } 1145 1146 1147 SI F32 operator+(F32 x, F32 y) { return x->add(x,y); } 1148 SI F32 operator+(F32 x, float y) { return x->add(x,y); } 1149 SI F32 operator+(float x, F32 y) { return y->add(x,y); } 1150 1151 SI F32 operator-(F32 x, F32 y) { return x->sub(x,y); } 1152 SI F32 operator-(F32 x, float y) { return x->sub(x,y); } 1153 SI F32 operator-(float x, F32 y) { return y->sub(x,y); } 1154 1155 SI F32 operator*(F32 x, F32 y) { return x->mul(x,y); } 1156 SI F32 operator*(F32 x, float y) { return x->mul(x,y); } 1157 SI F32 operator*(float x, F32 y) { return y->mul(x,y); } 1158 fast_mul(F32 x,F32 y)1159 SI F32 fast_mul(F32 x, F32 y) { return x->fast_mul(x,y); } fast_mul(F32 x,float y)1160 SI F32 fast_mul(F32 x, float y) { return x->fast_mul(x,y); } fast_mul(float x,F32 y)1161 SI F32 fast_mul(float x, F32 y) { return y->fast_mul(x,y); } 1162 1163 SI F32 operator/(F32 x, F32 y) { return x->div(x,y); } 1164 SI F32 operator/(float x, F32 y) { return y->div(x,y); } 1165 min(F32 x,F32 y)1166 SI F32 min(F32 x, F32 y) { return x->min(x,y); } min(F32 x,float y)1167 SI F32 min(F32 x, float y) { return x->min(x,y); } min(float x,F32 y)1168 SI F32 min(float x, F32 y) { return y->min(x,y); } 1169 max(F32 x,F32 y)1170 SI F32 max(F32 x, F32 y) { return x->max(x,y); } max(F32 x,float y)1171 SI F32 max(F32 x, float y) { return x->max(x,y); } max(float x,F32 y)1172 SI F32 max(float x, F32 y) { return y->max(x,y); } 1173 1174 SI I32 operator==(F32 x, F32 y) { return x->eq(x,y); } 1175 SI I32 operator==(F32 x, float y) { return x->eq(x,y); } 1176 SI I32 operator==(float x, F32 y) { return y->eq(x,y); } 1177 1178 SI I32 operator!=(F32 x, F32 y) { return x->neq(x,y); } 1179 SI I32 operator!=(F32 x, float y) { return x->neq(x,y); } 1180 SI I32 operator!=(float x, F32 y) { return y->neq(x,y); } 1181 1182 SI I32 operator< (F32 x, F32 y) { return x->lt(x,y); } 1183 SI I32 operator< (F32 x, float y) { return x->lt(x,y); } 1184 SI I32 operator< (float x, F32 y) { return y->lt(x,y); } 1185 1186 SI I32 operator<=(F32 x, F32 y) { return x->lte(x,y); } 1187 SI I32 operator<=(F32 x, float y) { return x->lte(x,y); } 1188 SI I32 operator<=(float x, F32 y) { return y->lte(x,y); } 1189 1190 SI I32 operator> (F32 x, F32 y) { return x->gt(x,y); } 1191 SI I32 operator> (F32 x, float y) { return x->gt(x,y); } 1192 SI I32 operator> (float x, F32 y) { return y->gt(x,y); } 1193 1194 SI I32 operator>=(F32 x, F32 y) { return x->gte(x,y); } 1195 SI I32 operator>=(F32 x, float y) { return x->gte(x,y); } 1196 SI I32 operator>=(float x, F32 y) { return y->gte(x,y); } 1197 1198 SI I32& operator+=(I32& x, I32 y) { return (x = x + y); } 1199 SI I32& operator+=(I32& x, int y) { return (x = x + y); } 1200 1201 SI I32& operator-=(I32& x, I32 y) { return (x = x - y); } 1202 SI I32& operator-=(I32& x, int y) { return (x = x - y); } 1203 1204 SI I32& operator*=(I32& x, I32 y) { return (x = x * y); } 1205 SI I32& operator*=(I32& x, int y) { return (x = x * y); } 1206 1207 SI F32& operator+=(F32& x, F32 y) { return (x = x + y); } 1208 SI F32& operator+=(F32& x, float y) { return (x = x + y); } 1209 1210 SI F32& operator-=(F32& x, F32 y) { return (x = x - y); } 1211 SI F32& operator-=(F32& x, float y) { return (x = x - y); } 1212 1213 SI F32& operator*=(F32& x, F32 y) { return (x = x * y); } 1214 SI F32& operator*=(F32& x, float y) { return (x = x * y); } 1215 1216 SI F32& operator/=(F32& x, F32 y) { return (x = x / y); } 1217 assert_true(I32 cond,I32 debug)1218 SI void assert_true(I32 cond, I32 debug) { cond->assert_true(cond,debug); } assert_true(I32 cond,F32 debug)1219 SI void assert_true(I32 cond, F32 debug) { cond->assert_true(cond,debug); } assert_true(I32 cond)1220 SI void assert_true(I32 cond) { cond->assert_true(cond); } 1221 store8(Ptr ptr,I32 val)1222 SI void store8 (Ptr ptr, I32 val) { val->store8 (ptr, val); } store16(Ptr ptr,I32 val)1223 SI void store16 (Ptr ptr, I32 val) { val->store16 (ptr, val); } store32(Ptr ptr,I32 val)1224 SI void store32 (Ptr ptr, I32 val) { val->store32 (ptr, val); } storeF(Ptr ptr,F32 val)1225 SI void storeF (Ptr ptr, F32 val) { val->storeF (ptr, val); } store64(Ptr ptr,I32 lo,I32 hi)1226 SI void store64 (Ptr ptr, I32 lo, I32 hi) { lo ->store64 (ptr, lo,hi); } store128(Ptr ptr,I32 x,I32 y,I32 z,I32 w)1227 SI void store128(Ptr ptr, I32 x, I32 y, I32 z, I32 w) { x ->store128(ptr, x,y,z,w); } 1228 gather8(UPtr ptr,int off,I32 ix)1229 SI I32 gather8 (UPtr ptr, int off, I32 ix) { return ix->gather8 (ptr, off, ix); } gather16(UPtr ptr,int off,I32 ix)1230 SI I32 gather16(UPtr ptr, int off, I32 ix) { return ix->gather16(ptr, off, ix); } gather32(UPtr ptr,int off,I32 ix)1231 SI I32 gather32(UPtr ptr, int off, I32 ix) { return ix->gather32(ptr, off, ix); } gatherF(UPtr ptr,int off,I32 ix)1232 SI F32 gatherF (UPtr ptr, int off, I32 ix) { return ix->gatherF (ptr, off, ix); } 1233 gather8(Uniform u,I32 ix)1234 SI I32 gather8 (Uniform u, I32 ix) { return ix->gather8 (u, ix); } gather16(Uniform u,I32 ix)1235 SI I32 gather16(Uniform u, I32 ix) { return ix->gather16(u, ix); } gather32(Uniform u,I32 ix)1236 SI I32 gather32(Uniform u, I32 ix) { return ix->gather32(u, ix); } gatherF(Uniform u,I32 ix)1237 SI F32 gatherF (Uniform u, I32 ix) { return ix->gatherF (u, ix); } 1238 sqrt(F32 x)1239 SI F32 sqrt(F32 x) { return x-> sqrt(x); } approx_log2(F32 x)1240 SI F32 approx_log2(F32 x) { return x->approx_log2(x); } approx_pow2(F32 x)1241 SI F32 approx_pow2(F32 x) { return x->approx_pow2(x); } approx_log(F32 x)1242 SI F32 approx_log (F32 x) { return x->approx_log (x); } approx_exp(F32 x)1243 SI F32 approx_exp (F32 x) { return x->approx_exp (x); } 1244 approx_powf(F32 base,F32 exp)1245 SI F32 approx_powf(F32 base, F32 exp) { return base->approx_powf(base, exp); } approx_powf(F32 base,float exp)1246 SI F32 approx_powf(F32 base, float exp) { return base->approx_powf(base, exp); } approx_powf(float base,F32 exp)1247 SI F32 approx_powf(float base, F32 exp) { return exp->approx_powf(base, exp); } 1248 approx_sin(F32 radians)1249 SI F32 approx_sin(F32 radians) { return radians->approx_sin(radians); } approx_cos(F32 radians)1250 SI F32 approx_cos(F32 radians) { return radians->approx_cos(radians); } approx_tan(F32 radians)1251 SI F32 approx_tan(F32 radians) { return radians->approx_tan(radians); } 1252 approx_asin(F32 x)1253 SI F32 approx_asin(F32 x) { return x->approx_asin(x); } approx_acos(F32 x)1254 SI F32 approx_acos(F32 x) { return x->approx_acos(x); } approx_atan(F32 x)1255 SI F32 approx_atan(F32 x) { return x->approx_atan(x); } approx_atan2(F32 y,F32 x)1256 SI F32 approx_atan2(F32 y, F32 x) { return x->approx_atan2(y, x); } 1257 clamp01(F32 x)1258 SI F32 clamp01(F32 x) { return x-> clamp01(x); } abs(F32 x)1259 SI F32 abs(F32 x) { return x-> abs(x); } ceil(F32 x)1260 SI F32 ceil(F32 x) { return x-> ceil(x); } fract(F32 x)1261 SI F32 fract(F32 x) { return x-> fract(x); } floor(F32 x)1262 SI F32 floor(F32 x) { return x-> floor(x); } is_NaN(F32 x)1263 SI I32 is_NaN(F32 x) { return x-> is_NaN(x); } is_finite(F32 x)1264 SI I32 is_finite(F32 x) { return x->is_finite(x); } 1265 trunc(F32 x)1266 SI I32 trunc(F32 x) { return x-> trunc(x); } round(F32 x)1267 SI I32 round(F32 x) { return x-> round(x); } pun_to_I32(F32 x)1268 SI I32 pun_to_I32(F32 x) { return x-> pun_to_I32(x); } pun_to_F32(I32 x)1269 SI F32 pun_to_F32(I32 x) { return x-> pun_to_F32(x); } to_F32(I32 x)1270 SI F32 to_F32(I32 x) { return x-> to_F32(x); } to_fp16(F32 x)1271 SI I32 to_fp16(F32 x) { return x-> to_fp16(x); } from_fp16(I32 x)1272 SI F32 from_fp16(I32 x) { return x-> from_fp16(x); } 1273 lerp(F32 lo,F32 hi,F32 t)1274 SI F32 lerp(F32 lo, F32 hi, F32 t) { return lo->lerp(lo,hi,t); } lerp(F32 lo,F32 hi,float t)1275 SI F32 lerp(F32 lo, F32 hi, float t) { return lo->lerp(lo,hi,t); } lerp(F32 lo,float hi,F32 t)1276 SI F32 lerp(F32 lo, float hi, F32 t) { return lo->lerp(lo,hi,t); } lerp(F32 lo,float hi,float t)1277 SI F32 lerp(F32 lo, float hi, float t) { return lo->lerp(lo,hi,t); } lerp(float lo,F32 hi,F32 t)1278 SI F32 lerp(float lo, F32 hi, F32 t) { return hi->lerp(lo,hi,t); } lerp(float lo,F32 hi,float t)1279 SI F32 lerp(float lo, F32 hi, float t) { return hi->lerp(lo,hi,t); } lerp(float lo,float hi,F32 t)1280 SI F32 lerp(float lo, float hi, F32 t) { return t->lerp(lo,hi,t); } 1281 clamp(F32 x,F32 lo,F32 hi)1282 SI F32 clamp(F32 x, F32 lo, F32 hi) { return x->clamp(x,lo,hi); } clamp(F32 x,F32 lo,float hi)1283 SI F32 clamp(F32 x, F32 lo, float hi) { return x->clamp(x,lo,hi); } clamp(F32 x,float lo,F32 hi)1284 SI F32 clamp(F32 x, float lo, F32 hi) { return x->clamp(x,lo,hi); } clamp(F32 x,float lo,float hi)1285 SI F32 clamp(F32 x, float lo, float hi) { return x->clamp(x,lo,hi); } clamp(float x,F32 lo,F32 hi)1286 SI F32 clamp(float x, F32 lo, F32 hi) { return lo->clamp(x,lo,hi); } clamp(float x,F32 lo,float hi)1287 SI F32 clamp(float x, F32 lo, float hi) { return lo->clamp(x,lo,hi); } clamp(float x,float lo,F32 hi)1288 SI F32 clamp(float x, float lo, F32 hi) { return hi->clamp(x,lo,hi); } 1289 1290 SI I32 operator<<(I32 x, int bits) { return x->shl(x, bits); } shl(I32 x,int bits)1291 SI I32 shl(I32 x, int bits) { return x->shl(x, bits); } shr(I32 x,int bits)1292 SI I32 shr(I32 x, int bits) { return x->shr(x, bits); } sra(I32 x,int bits)1293 SI I32 sra(I32 x, int bits) { return x->sra(x, bits); } 1294 1295 SI I32 operator&(I32 x, I32 y) { return x->bit_and(x,y); } 1296 SI I32 operator&(I32 x, int y) { return x->bit_and(x,y); } 1297 SI I32 operator&(int x, I32 y) { return y->bit_and(x,y); } 1298 1299 SI I32 operator|(I32 x, I32 y) { return x->bit_or (x,y); } 1300 SI I32 operator|(I32 x, int y) { return x->bit_or (x,y); } 1301 SI I32 operator|(int x, I32 y) { return y->bit_or (x,y); } 1302 1303 SI I32 operator^(I32 x, I32 y) { return x->bit_xor(x,y); } 1304 SI I32 operator^(I32 x, int y) { return x->bit_xor(x,y); } 1305 SI I32 operator^(int x, I32 y) { return y->bit_xor(x,y); } 1306 1307 SI I32& operator&=(I32& x, I32 y) { return (x = x & y); } 1308 SI I32& operator&=(I32& x, int y) { return (x = x & y); } 1309 SI I32& operator|=(I32& x, I32 y) { return (x = x | y); } 1310 SI I32& operator|=(I32& x, int y) { return (x = x | y); } 1311 SI I32& operator^=(I32& x, I32 y) { return (x = x ^ y); } 1312 SI I32& operator^=(I32& x, int y) { return (x = x ^ y); } 1313 bit_clear(I32 x,I32 y)1314 SI I32 bit_clear(I32 x, I32 y) { return x->bit_clear(x,y); } bit_clear(I32 x,int y)1315 SI I32 bit_clear(I32 x, int y) { return x->bit_clear(x,y); } bit_clear(int x,I32 y)1316 SI I32 bit_clear(int x, I32 y) { return y->bit_clear(x,y); } 1317 select(I32 c,I32 t,I32 f)1318 SI I32 select(I32 c, I32 t, I32 f) { return c->select(c, t , f ); } select(I32 c,I32 t,int f)1319 SI I32 select(I32 c, I32 t, int f) { return c->select(c, t , c->splat(f)); } select(I32 c,int t,I32 f)1320 SI I32 select(I32 c, int t, I32 f) { return c->select(c, c->splat(t), f ); } select(I32 c,int t,int f)1321 SI I32 select(I32 c, int t, int f) { return c->select(c, c->splat(t), c->splat(f)); } 1322 select(I32 c,F32 t,F32 f)1323 SI F32 select(I32 c, F32 t, F32 f) { return c->select(c, t , f ); } select(I32 c,F32 t,float f)1324 SI F32 select(I32 c, F32 t, float f) { return c->select(c, t , c->splat(f)); } select(I32 c,float t,F32 f)1325 SI F32 select(I32 c, float t, F32 f) { return c->select(c, c->splat(t), f ); } select(I32 c,float t,float f)1326 SI F32 select(I32 c, float t, float f) { return c->select(c, c->splat(t), c->splat(f)); } 1327 extract(I32 x,int bits,I32 z)1328 SI I32 extract(I32 x, int bits, I32 z) { return x->extract(x,bits,z); } extract(I32 x,int bits,int z)1329 SI I32 extract(I32 x, int bits, int z) { return x->extract(x,bits,z); } extract(int x,int bits,I32 z)1330 SI I32 extract(int x, int bits, I32 z) { return z->extract(x,bits,z); } 1331 pack(I32 x,I32 y,int bits)1332 SI I32 pack(I32 x, I32 y, int bits) { return x->pack (x,y,bits); } pack(I32 x,int y,int bits)1333 SI I32 pack(I32 x, int y, int bits) { return x->pack (x,y,bits); } pack(int x,I32 y,int bits)1334 SI I32 pack(int x, I32 y, int bits) { return y->pack (x,y,bits); } 1335 1336 SI I32 operator~(I32 x) { return ~0 ^ x; } 1337 SI I32 operator-(I32 x) { return 0 - x; } 1338 SI F32 operator-(F32 x) { return 0.0f - x; } 1339 from_unorm(int bits,I32 x)1340 SI F32 from_unorm(int bits, I32 x) { return x->from_unorm(bits,x); } to_unorm(int bits,F32 x)1341 SI I32 to_unorm(int bits, F32 x) { return x-> to_unorm(bits,x); } 1342 store(PixelFormat f,Ptr p,Color c)1343 SI void store(PixelFormat f, Ptr p, Color c) { return c->store(f,p,c); } 1344 gather(PixelFormat f,UPtr p,int off,I32 ix)1345 SI Color gather(PixelFormat f, UPtr p, int off, I32 ix) { return ix->gather(f,p,off,ix); } gather(PixelFormat f,Uniform u,I32 ix)1346 SI Color gather(PixelFormat f, Uniform u , I32 ix) { return ix->gather(f,u,ix); } 1347 premul(F32 * r,F32 * g,F32 * b,F32 a)1348 SI void premul(F32* r, F32* g, F32* b, F32 a) { a-> premul(r,g,b,a); } unpremul(F32 * r,F32 * g,F32 * b,F32 a)1349 SI void unpremul(F32* r, F32* g, F32* b, F32 a) { a->unpremul(r,g,b,a); } 1350 premul(Color c)1351 SI Color premul(Color c) { return c-> premul(c); } unpremul(Color c)1352 SI Color unpremul(Color c) { return c->unpremul(c); } 1353 lerp(Color lo,Color hi,F32 t)1354 SI Color lerp(Color lo, Color hi, F32 t) { return t->lerp(lo,hi,t); } 1355 blend(SkBlendMode m,Color s,Color d)1356 SI Color blend(SkBlendMode m, Color s, Color d) { return s->blend(m,s,d); } 1357 clamp01(Color c)1358 SI Color clamp01(Color c) { return c->clamp01(c); } 1359 to_hsla(Color c)1360 SI HSLA to_hsla(Color c) { return c->to_hsla(c); } to_rgba(HSLA c)1361 SI Color to_rgba(HSLA c) { return c->to_rgba(c); } 1362 1363 // Evaluate polynomials: ax^n + bx^(n-1) + ... for n >= 1 1364 template <typename F32_or_float, typename... Rest> poly(F32 x,F32_or_float a,float b,Rest...rest)1365 SI F32 poly(F32 x, F32_or_float a, float b, Rest... rest) { 1366 if constexpr (sizeof...(rest) == 0) { 1367 return x*a+b; 1368 } else { 1369 return poly(x, x*a+b, rest...); 1370 } 1371 } 1372 #undef SI 1373 } // namespace skvm 1374 1375 #endif//SkVM_DEFINED 1376