1 /* 2 * Copyright 2019 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef SkVM_DEFINED 9 #define SkVM_DEFINED 10 11 #include "include/core/SkBlendMode.h" 12 #include "include/core/SkColor.h" 13 #include "include/core/SkSpan.h" 14 #include "include/private/SkMacros.h" 15 #include "include/private/SkTArray.h" 16 #include "include/private/SkTHash.h" 17 #include "src/core/SkVM_fwd.h" 18 #include <vector> // std::vector 19 20 class SkWStream; 21 22 #if defined(SKVM_JIT_WHEN_POSSIBLE) && !defined(SK_BUILD_FOR_IOS) 23 #if defined(__x86_64__) || defined(_M_X64) 24 #if defined(_WIN32) || defined(__linux) || defined(__APPLE__) 25 #define SKVM_JIT 26 #endif 27 #endif 28 #if defined(__aarch64__) 29 #if defined(__ANDROID__) || defined(__APPLE__) 30 #define SKVM_JIT 31 #endif 32 #endif 33 #endif 34 35 #if 0 36 #define SKVM_LLVM 37 #endif 38 39 #if 0 40 #undef SKVM_JIT 41 #endif 42 43 namespace skvm { 44 45 class Assembler { 46 public: 47 explicit Assembler(void* buf); 48 49 size_t size() const; 50 51 // Order matters... GP64, Xmm, Ymm values match 4-bit register encoding for each. 52 enum GP64 { 53 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, 54 r8 , r9 , r10, r11, r12, r13, r14, r15, 55 }; 56 enum Xmm { 57 xmm0, xmm1, xmm2 , xmm3 , xmm4 , xmm5 , xmm6 , xmm7 , 58 xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, 59 }; 60 enum Ymm { 61 ymm0, ymm1, ymm2 , ymm3 , ymm4 , ymm5 , ymm6 , ymm7 , 62 ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15, 63 }; 64 65 // X and V values match 5-bit encoding for each (nothing tricky). 66 enum X { 67 x0 , x1 , x2 , x3 , x4 , x5 , x6 , x7 , 68 x8 , x9 , x10, x11, x12, x13, x14, x15, 69 x16, x17, x18, x19, x20, x21, x22, x23, 70 x24, x25, x26, x27, x28, x29, x30, xzr, sp=xzr, 71 }; 72 enum V { 73 v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 , 74 v8 , v9 , v10, v11, v12, v13, v14, v15, 75 v16, v17, v18, v19, v20, v21, v22, v23, 76 v24, v25, v26, v27, v28, v29, v30, v31, 77 }; 78 79 void bytes(const void*, int); 80 void byte(uint8_t); 81 void word(uint32_t); 82 83 struct Label { 84 int offset = 0; 85 enum { NotYetSet, ARMDisp19, X86Disp32 } kind = NotYetSet; 86 SkSTArray<2, int> references; 87 }; 88 89 // x86-64 90 91 void align(int mod); 92 93 void int3(); 94 void vzeroupper(); 95 void ret(); 96 97 // Mem represents a value at base + disp + scale*index, 98 // or simply at base + disp if index=rsp. 99 enum Scale { ONE, TWO, FOUR, EIGHT }; 100 struct Mem { 101 GP64 base; 102 int disp = 0; 103 GP64 index = rsp; 104 Scale scale = ONE; 105 }; 106 107 struct Operand { 108 union { 109 int reg; 110 Mem mem; 111 Label* label; 112 }; 113 enum { REG, MEM, LABEL } kind; 114 OperandOperand115 Operand(GP64 r) : reg (r), kind(REG ) {} OperandOperand116 Operand(Xmm r) : reg (r), kind(REG ) {} OperandOperand117 Operand(Ymm r) : reg (r), kind(REG ) {} OperandOperand118 Operand(Mem m) : mem (m), kind(MEM ) {} OperandOperand119 Operand(Label* l) : label(l), kind(LABEL) {} 120 }; 121 122 void vpand (Ymm dst, Ymm x, Operand y); 123 void vpandn(Ymm dst, Ymm x, Operand y); 124 void vpor (Ymm dst, Ymm x, Operand y); 125 void vpxor (Ymm dst, Ymm x, Operand y); 126 127 void vpaddd (Ymm dst, Ymm x, Operand y); 128 void vpsubd (Ymm dst, Ymm x, Operand y); 129 void vpmulld(Ymm dst, Ymm x, Operand y); 130 131 void vpaddw (Ymm dst, Ymm x, Operand y); 132 void vpsubw (Ymm dst, Ymm x, Operand y); 133 void vpmullw (Ymm dst, Ymm x, Operand y); 134 135 void vpabsw (Ymm dst, Operand x); 136 void vpavgw (Ymm dst, Ymm x, Operand y); // dst = (x+y+1)>>1, unsigned. 137 void vpmulhrsw(Ymm dst, Ymm x, Operand y); // dst = (x*y + (1<<14)) >> 15, signed. 138 void vpminsw (Ymm dst, Ymm x, Operand y); 139 void vpminuw (Ymm dst, Ymm x, Operand y); 140 void vpmaxsw (Ymm dst, Ymm x, Operand y); 141 void vpmaxuw (Ymm dst, Ymm x, Operand y); 142 143 void vaddps(Ymm dst, Ymm x, Operand y); 144 void vsubps(Ymm dst, Ymm x, Operand y); 145 void vmulps(Ymm dst, Ymm x, Operand y); 146 void vdivps(Ymm dst, Ymm x, Operand y); 147 void vminps(Ymm dst, Ymm x, Operand y); 148 void vmaxps(Ymm dst, Ymm x, Operand y); 149 150 void vsqrtps(Ymm dst, Operand x); 151 152 void vfmadd132ps(Ymm dst, Ymm x, Operand y); 153 void vfmadd213ps(Ymm dst, Ymm x, Operand y); 154 void vfmadd231ps(Ymm dst, Ymm x, Operand y); 155 156 void vfmsub132ps(Ymm dst, Ymm x, Operand y); 157 void vfmsub213ps(Ymm dst, Ymm x, Operand y); 158 void vfmsub231ps(Ymm dst, Ymm x, Operand y); 159 160 void vfnmadd132ps(Ymm dst, Ymm x, Operand y); 161 void vfnmadd213ps(Ymm dst, Ymm x, Operand y); 162 void vfnmadd231ps(Ymm dst, Ymm x, Operand y); 163 164 void vpackusdw(Ymm dst, Ymm x, Operand y); 165 void vpackuswb(Ymm dst, Ymm x, Operand y); 166 167 void vpunpckldq(Ymm dst, Ymm x, Operand y); 168 void vpunpckhdq(Ymm dst, Ymm x, Operand y); 169 170 void vpcmpeqd(Ymm dst, Ymm x, Operand y); 171 void vpcmpgtd(Ymm dst, Ymm x, Operand y); 172 void vpcmpeqw(Ymm dst, Ymm x, Operand y); 173 void vpcmpgtw(Ymm dst, Ymm x, Operand y); 174 175 void vcmpps (Ymm dst, Ymm x, Operand y, int imm); vcmpeqps(Ymm dst,Ymm x,Operand y)176 void vcmpeqps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,0); } vcmpltps(Ymm dst,Ymm x,Operand y)177 void vcmpltps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,1); } vcmpleps(Ymm dst,Ymm x,Operand y)178 void vcmpleps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,2); } vcmpneqps(Ymm dst,Ymm x,Operand y)179 void vcmpneqps(Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,4); } 180 181 // Sadly, the x parameter cannot be a general Operand for these shifts. 182 void vpslld(Ymm dst, Ymm x, int imm); 183 void vpsrld(Ymm dst, Ymm x, int imm); 184 void vpsrad(Ymm dst, Ymm x, int imm); 185 186 void vpsllw(Ymm dst, Ymm x, int imm); 187 void vpsrlw(Ymm dst, Ymm x, int imm); 188 void vpsraw(Ymm dst, Ymm x, int imm); 189 190 void vpermq (Ymm dst, Operand x, int imm); 191 void vperm2f128(Ymm dst, Ymm x, Operand y, int imm); 192 void vpermps (Ymm dst, Ymm ix, Operand src); // dst[i] = src[ix[i]] 193 194 enum Rounding { NEAREST, FLOOR, CEIL, TRUNC, CURRENT }; 195 void vroundps(Ymm dst, Operand x, Rounding); 196 197 void vmovdqa(Ymm dst, Operand x); 198 void vmovups(Ymm dst, Operand x); 199 void vmovups(Xmm dst, Operand x); 200 void vmovups(Operand dst, Ymm x); 201 void vmovups(Operand dst, Xmm x); 202 203 void vcvtdq2ps (Ymm dst, Operand x); 204 void vcvttps2dq(Ymm dst, Operand x); 205 void vcvtps2dq (Ymm dst, Operand x); 206 207 void vcvtps2ph(Operand dst, Ymm x, Rounding); 208 void vcvtph2ps(Ymm dst, Operand x); 209 210 void vpblendvb(Ymm dst, Ymm x, Operand y, Ymm z); 211 212 void vpshufb(Ymm dst, Ymm x, Operand y); 213 214 void vptest(Ymm x, Operand y); 215 216 void vbroadcastss(Ymm dst, Operand y); 217 218 void vpmovzxwd(Ymm dst, Operand src); // dst = src, 128-bit, uint16_t -> int 219 void vpmovzxbd(Ymm dst, Operand src); // dst = src, 64-bit, uint8_t -> int 220 221 void vmovq(Operand dst, Xmm src); // dst = src, 64-bit 222 void vmovd(Operand dst, Xmm src); // dst = src, 32-bit 223 void vmovd(Xmm dst, Operand src); // dst = src, 32-bit 224 225 void vpinsrd(Xmm dst, Xmm src, Operand y, int imm); // dst = src; dst[imm] = y, 32-bit 226 void vpinsrw(Xmm dst, Xmm src, Operand y, int imm); // dst = src; dst[imm] = y, 16-bit 227 void vpinsrb(Xmm dst, Xmm src, Operand y, int imm); // dst = src; dst[imm] = y, 8-bit 228 229 void vextracti128(Operand dst, Ymm src, int imm); // dst = src[imm], 128-bit 230 void vpextrd (Operand dst, Xmm src, int imm); // dst = src[imm], 32-bit 231 void vpextrw (Operand dst, Xmm src, int imm); // dst = src[imm], 16-bit 232 void vpextrb (Operand dst, Xmm src, int imm); // dst = src[imm], 8-bit 233 234 // if (mask & 0x8000'0000) { 235 // dst = base[scale*ix]; 236 // } 237 // mask = 0; 238 void vgatherdps(Ymm dst, Scale scale, Ymm ix, GP64 base, Ymm mask); 239 240 241 void label(Label*); 242 243 void jmp(Label*); 244 void je (Label*); 245 void jne(Label*); 246 void jl (Label*); 247 void jc (Label*); 248 249 void add (Operand dst, int imm); 250 void sub (Operand dst, int imm); 251 void cmp (Operand dst, int imm); 252 void mov (Operand dst, int imm); 253 void movb(Operand dst, int imm); 254 255 void add (Operand dst, GP64 x); 256 void sub (Operand dst, GP64 x); 257 void cmp (Operand dst, GP64 x); 258 void mov (Operand dst, GP64 x); 259 void movb(Operand dst, GP64 x); 260 261 void add (GP64 dst, Operand x); 262 void sub (GP64 dst, Operand x); 263 void cmp (GP64 dst, Operand x); 264 void mov (GP64 dst, Operand x); 265 void movb(GP64 dst, Operand x); 266 267 // Disambiguators... choice is arbitrary (but generates different code!). add(GP64 dst,GP64 x)268 void add (GP64 dst, GP64 x) { this->add (Operand(dst), x); } sub(GP64 dst,GP64 x)269 void sub (GP64 dst, GP64 x) { this->sub (Operand(dst), x); } cmp(GP64 dst,GP64 x)270 void cmp (GP64 dst, GP64 x) { this->cmp (Operand(dst), x); } mov(GP64 dst,GP64 x)271 void mov (GP64 dst, GP64 x) { this->mov (Operand(dst), x); } movb(GP64 dst,GP64 x)272 void movb(GP64 dst, GP64 x) { this->movb(Operand(dst), x); } 273 274 void movzbq(GP64 dst, Operand x); // dst = x, uint8_t -> int 275 void movzwq(GP64 dst, Operand x); // dst = x, uint16_t -> int 276 277 // aarch64 278 279 // d = op(n,m) 280 using DOpNM = void(V d, V n, V m); 281 DOpNM and16b, orr16b, eor16b, bic16b, bsl16b, 282 add4s, sub4s, mul4s, 283 cmeq4s, cmgt4s, 284 sub8h, mul8h, 285 fadd4s, fsub4s, fmul4s, fdiv4s, fmin4s, fmax4s, 286 fcmeq4s, fcmgt4s, fcmge4s, 287 tbl, 288 uzp14s, uzp24s, 289 zip14s, zip24s; 290 291 // TODO: there are also float ==,<,<=,>,>= instructions with an immediate 0.0f, 292 // and the register comparison > and >= can also compare absolute values. Interesting. 293 294 // d += n*m 295 void fmla4s(V d, V n, V m); 296 297 // d -= n*m 298 void fmls4s(V d, V n, V m); 299 300 // d = op(n,imm) 301 using DOpNImm = void(V d, V n, int imm); 302 DOpNImm sli4s, 303 shl4s, sshr4s, ushr4s, 304 ushr8h; 305 306 // d = op(n) 307 using DOpN = void(V d, V n); 308 DOpN not16b, // d = ~n 309 fneg4s, // d = -n 310 fsqrt4s, // d = sqrtf(n) 311 scvtf4s, // int -> float 312 fcvtzs4s, // truncate float -> int 313 fcvtns4s, // round float -> int (nearest even) 314 frintp4s, // round float -> int as float, toward plus infinity (ceil) 315 frintm4s, // round float -> int as float, toward minus infinity (floor) 316 fcvtn, // f32 -> f16 in low half 317 fcvtl, // f16 in low half -> f32 318 xtns2h, // u32 -> u16 319 xtnh2b, // u16 -> u8 320 uxtlb2h, // u8 -> u16 (TODO: this is a special case of ushll.8h) 321 uxtlh2s, // u16 -> u32 (TODO: this is a special case of ushll.4s) 322 uminv4s; // dst[0] = min(n[0],n[1],n[2],n[3]), n as unsigned 323 324 void brk (int imm16); 325 void ret (X); 326 void add (X d, X n, int imm12); 327 void sub (X d, X n, int imm12); 328 void subs(X d, X n, int imm12); // subtract setting condition flags 329 330 enum Shift { LSL,LSR,ASR,ROR }; 331 void add (X d, X n, X m, Shift=LSL, int imm6=0); // d=n+Shift(m,imm6), for Shift != ROR. 332 333 // There's another encoding for unconditional branches that can jump further, 334 // but this one encoded as b.al is simple to implement and should be fine. b(Label * l)335 void b (Label* l) { this->b(Condition::al, l); } bne(Label * l)336 void bne(Label* l) { this->b(Condition::ne, l); } blt(Label * l)337 void blt(Label* l) { this->b(Condition::lt, l); } 338 339 // "cmp ..." is just an assembler mnemonic for "subs xzr, ..."! cmp(X n,int imm12)340 void cmp(X n, int imm12) { this->subs(xzr, n, imm12); } 341 342 // Compare and branch if zero/non-zero, as if 343 // cmp(t,0) 344 // beq/bne(l) 345 // but without setting condition flags. 346 void cbz (X t, Label* l); 347 void cbnz(X t, Label* l); 348 349 // TODO: there are ldur variants with unscaled imm, useful? 350 void ldrd(X dst, X src, int imm12=0); // 64-bit dst = *(src+imm12*8) 351 void ldrs(X dst, X src, int imm12=0); // 32-bit dst = *(src+imm12*4) 352 void ldrh(X dst, X src, int imm12=0); // 16-bit dst = *(src+imm12*2) 353 void ldrb(X dst, X src, int imm12=0); // 8-bit dst = *(src+imm12) 354 355 void ldrq(V dst, Label*); // 128-bit PC-relative load 356 357 void ldrq(V dst, X src, int imm12=0); // 128-bit dst = *(src+imm12*16) 358 void ldrd(V dst, X src, int imm12=0); // 64-bit dst = *(src+imm12*8) 359 void ldrs(V dst, X src, int imm12=0); // 32-bit dst = *(src+imm12*4) 360 void ldrh(V dst, X src, int imm12=0); // 16-bit dst = *(src+imm12*2) 361 void ldrb(V dst, X src, int imm12=0); // 8-bit dst = *(src+imm12) 362 363 void strs(X src, X dst, int imm12=0); // 32-bit *(dst+imm12*4) = src 364 365 void strq(V src, X dst, int imm12=0); // 128-bit *(dst+imm12*16) = src 366 void strd(V src, X dst, int imm12=0); // 64-bit *(dst+imm12*8) = src 367 void strs(V src, X dst, int imm12=0); // 32-bit *(dst+imm12*4) = src 368 void strh(V src, X dst, int imm12=0); // 16-bit *(dst+imm12*2) = src 369 void strb(V src, X dst, int imm12=0); // 8-bit *(dst+imm12) = src 370 371 void movs(X dst, V src, int lane); // dst = 32-bit src[lane] 372 void inss(V dst, X src, int lane); // dst[lane] = 32-bit src 373 374 void dup4s (V dst, X src); // Each 32-bit lane = src 375 376 void ld1r4s (V dst, X src); // Each 32-bit lane = *src 377 void ld1r8h (V dst, X src); // Each 16-bit lane = *src 378 void ld1r16b(V dst, X src); // Each 8-bit lane = *src 379 380 void ld24s(V dst, X src); // deinterleave(dst,dst+1) = 256-bit *src 381 void ld44s(V dst, X src); // deinterleave(dst,dst+1,dst+2,dst+3) = 512-bit *src 382 void st24s(V src, X dst); // 256-bit *dst = interleave_32bit_lanes(src,src+1) 383 void st44s(V src, X dst); // 512-bit *dst = interleave_32bit_lanes(src,src+1,src+2,src+3) 384 385 void ld24s(V dst, X src, int lane); // Load 2 32-bit values into given lane of dst..dst+1 386 void ld44s(V dst, X src, int lane); // Load 4 32-bit values into given lane of dst..dst+3 387 388 private: 389 uint8_t* fCode; 390 size_t fSize; 391 392 // x86-64 393 enum W { W0, W1 }; // Are the lanes 64-bit (W1) or default (W0)? Intel Vol 2A 2.3.5.5 394 enum L { L128, L256 }; // Is this a 128- or 256-bit operation? Intel Vol 2A 2.3.6.2 395 396 // Helpers for vector instructions. 397 void op(int prefix, int map, int opcode, int dst, int x, Operand y, W,L); 398 void op(int p, int m, int o, Ymm d, Ymm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L256); } 399 void op(int p, int m, int o, Ymm d, Operand y, W w=W0) { op(p,m,o, d,0,y,w,L256); } 400 void op(int p, int m, int o, Xmm d, Xmm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L128); } 401 void op(int p, int m, int o, Xmm d, Operand y, W w=W0) { op(p,m,o, d,0,y,w,L128); } 402 403 // Helpers for GP64 instructions. 404 void op(int opcode, Operand dst, GP64 x); 405 void op(int opcode, int opcode_ext, Operand dst, int imm); 406 407 void jump(uint8_t condition, Label*); 408 int disp32(Label*); 409 void imm_byte_after_operand(const Operand&, int byte); 410 411 // aarch64 412 413 // Opcode for 3-arguments ops is split between hi and lo: 414 // [11 bits hi] [5 bits m] [6 bits lo] [5 bits n] [5 bits d] 415 void op(uint32_t hi, V m, uint32_t lo, V n, V d); 416 417 // 0,1,2-argument ops, with or without an immediate: 418 // [ 22 bits op ] [5 bits n] [5 bits d] 419 // Any immediate falls in the middle somewhere overlapping with either op, n, or both. 420 void op(uint32_t op22, V n, V d, int imm=0); 421 void op(uint32_t op22, X n, V d, int imm=0) { this->op(op22,(V)n, d,imm); } 422 void op(uint32_t op22, V n, X d, int imm=0) { this->op(op22, n,(V)d,imm); } 423 void op(uint32_t op22, X n, X d, int imm=0) { this->op(op22,(V)n,(V)d,imm); } 424 void op(uint32_t op22, int imm=0) { this->op(op22,(V)0,(V)0,imm); } 425 // (1-argument ops don't seem to have a consistent convention of passing as n or d.) 426 427 428 // Order matters... value is 4-bit encoding for condition code. 429 enum class Condition { eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,al }; 430 void b(Condition, Label*); 431 int disp19(Label*); 432 }; 433 434 // Order matters a little: Ops <=store128 are treated as having side effects. 435 #define SKVM_OPS(M) \ 436 M(assert_true) \ 437 M(store8) M(store16) M(store32) M(store64) M(store128) \ 438 M(load8) M(load16) M(load32) M(load64) M(load128) \ 439 M(index) \ 440 M(gather8) M(gather16) M(gather32) \ 441 M(uniform32) \ 442 M(splat) \ 443 M(add_f32) M(add_i32) \ 444 M(sub_f32) M(sub_i32) \ 445 M(mul_f32) M(mul_i32) \ 446 M(div_f32) \ 447 M(min_f32) M(max_f32) \ 448 M(fma_f32) M(fms_f32) M(fnma_f32) \ 449 M(sqrt_f32) \ 450 M(shl_i32) M(shr_i32) M(sra_i32) \ 451 M(ceil) M(floor) M(trunc) M(round) M(to_fp16) M(from_fp16) \ 452 M(to_f32) \ 453 M(neq_f32) M(eq_f32) M(eq_i32) \ 454 M(gte_f32) M(gt_f32) M(gt_i32) \ 455 M(bit_and) M(bit_or) M(bit_xor) M(bit_clear) \ 456 M(select) 457 // End of SKVM_OPS 458 459 enum class Op : int { 460 #define M(op) op, 461 SKVM_OPS(M) 462 #undef M 463 }; 464 has_side_effect(Op op)465 static inline bool has_side_effect(Op op) { 466 return op <= Op::store128; 467 } touches_varying_memory(Op op)468 static inline bool touches_varying_memory(Op op) { 469 return Op::store8 <= op && op <= Op::load128; 470 } is_always_varying(Op op)471 static inline bool is_always_varying(Op op) { 472 return Op::store8 <= op && op <= Op::index; 473 } 474 475 using Val = int; 476 // We reserve an impossibe Val ID as a sentinel 477 // NA meaning none, n/a, null, nil, etc. 478 static const Val NA = -1; 479 480 struct Ptr { int ix; }; 481 482 struct I32 { 483 Builder* builder = nullptr; 484 Val id = NA; 485 explicit operator bool() const { return id != NA; } 486 Builder* operator->() const { return builder; } 487 }; 488 489 struct F32 { 490 Builder* builder = nullptr; 491 Val id = NA; 492 explicit operator bool() const { return id != NA; } 493 Builder* operator->() const { return builder; } 494 }; 495 496 struct Color { 497 F32 r,g,b,a; 498 explicit operator bool() const { return r && g && b && a; } 499 Builder* operator->() const { return a.operator->(); } 500 }; 501 502 struct HSLA { 503 F32 h,s,l,a; 504 explicit operator bool() const { return h && s && l && a; } 505 Builder* operator->() const { return a.operator->(); } 506 }; 507 508 struct Coord { 509 F32 x,y; 510 explicit operator bool() const { return x && y; } 511 Builder* operator->() const { return x.operator->(); } 512 }; 513 514 struct Uniform { 515 Ptr ptr; 516 int offset; 517 }; 518 struct Uniforms { 519 Ptr base; 520 std::vector<int> buf; 521 UniformsUniforms522 Uniforms(Ptr ptr, int init) : base(ptr), buf(init) {} 523 pushUniforms524 Uniform push(int val) { 525 buf.push_back(val); 526 return {base, (int)( sizeof(int)*(buf.size() - 1) )}; 527 } 528 pushFUniforms529 Uniform pushF(float val) { 530 int bits; 531 memcpy(&bits, &val, sizeof(int)); 532 return this->push(bits); 533 } 534 pushPtrUniforms535 Uniform pushPtr(const void* ptr) { 536 // Jam the pointer into 1 or 2 ints. 537 int ints[sizeof(ptr) / sizeof(int)]; 538 memcpy(ints, &ptr, sizeof(ptr)); 539 for (int bits : ints) { 540 buf.push_back(bits); 541 } 542 return {base, (int)( sizeof(int)*(buf.size() - SK_ARRAY_COUNT(ints)) )}; 543 } 544 }; 545 546 struct PixelFormat { 547 enum { UNORM, FLOAT} encoding; 548 int r_bits, g_bits, b_bits, a_bits, 549 r_shift, g_shift, b_shift, a_shift; 550 }; 551 PixelFormat SkColorType_to_PixelFormat(SkColorType); 552 553 SK_BEGIN_REQUIRE_DENSE 554 struct Instruction { 555 Op op; // v* = op(x,y,z,w,immA,immB), where * == index of this Instruction. 556 Val x,y,z,w; // Enough arguments for Op::store128. 557 int immA,immB; // Immediate bit pattern, shift count, pointer index, byte offset, etc. 558 }; 559 SK_END_REQUIRE_DENSE 560 561 bool operator==(const Instruction&, const Instruction&); 562 struct InstructionHash { 563 uint32_t operator()(const Instruction&, uint32_t seed=0) const; 564 }; 565 566 struct OptimizedInstruction { 567 Op op; 568 Val x,y,z,w; 569 int immA,immB; 570 571 Val death; 572 bool can_hoist; 573 }; 574 575 struct Features { 576 bool fma = false; 577 bool fp16 = false; 578 }; 579 580 class Builder { 581 public: 582 583 Builder(); 584 explicit Builder(Features); 585 586 Program done(const char* debug_name = nullptr, bool allow_jit=true) const; 587 588 // Mostly for debugging, tests, etc. program()589 std::vector<Instruction> program() const { return fProgram; } 590 std::vector<OptimizedInstruction> optimize() const; 591 592 // Declare an argument with given stride (use stride=0 for uniforms). 593 // TODO: different types for varying and uniforms? 594 Ptr arg(int stride); 595 596 // Convenience arg() wrappers for most common strides, sizeof(T) and 0. 597 template <typename T> varying()598 Ptr varying() { return this->arg(sizeof(T)); } uniform()599 Ptr uniform() { return this->arg(0); } 600 601 // TODO: allow uniform (i.e. Ptr) offsets to store* and load*? 602 // TODO: sign extension (signed types) for <32-bit loads? 603 // TODO: unsigned integer operations where relevant (just comparisons?)? 604 605 // Assert cond is true, printing debug when not. 606 void assert_true(I32 cond, I32 debug); assert_true(I32 cond,F32 debug)607 void assert_true(I32 cond, F32 debug) { assert_true(cond, pun_to_I32(debug)); } assert_true(I32 cond)608 void assert_true(I32 cond) { assert_true(cond, cond); } 609 610 // Store {8,16,32,64,128}-bit varying. 611 void store8 (Ptr ptr, I32 val); 612 void store16 (Ptr ptr, I32 val); 613 void store32 (Ptr ptr, I32 val); storeF(Ptr ptr,F32 val)614 void storeF (Ptr ptr, F32 val) { store32(ptr, pun_to_I32(val)); } 615 void store64 (Ptr ptr, I32 lo, I32 hi); // *ptr = lo|(hi<<32) 616 void store128(Ptr ptr, I32 x, I32 y, I32 z, I32 w); // *ptr = x|(y<<32)|(z<<64)|(w<<96) 617 618 // Returns varying {n, n-1, n-2, ..., 1}, where n is the argument to Program::eval(). 619 I32 index(); 620 621 // Load {8,16,32,64,128}-bit varying. 622 I32 load8 (Ptr ptr); 623 I32 load16 (Ptr ptr); 624 I32 load32 (Ptr ptr); loadF(Ptr ptr)625 F32 loadF (Ptr ptr) { return pun_to_F32(load32(ptr)); } 626 I32 load64 (Ptr ptr, int lane); // Load 32-bit lane 0-1 of 64-bit value. 627 I32 load128(Ptr ptr, int lane); // Load 32-bit lane 0-3 of 128-bit value. 628 629 // Load i32/f32 uniform with byte-count offset. 630 I32 uniform32(Ptr ptr, int offset); uniformF(Ptr ptr,int offset)631 F32 uniformF (Ptr ptr, int offset) { return pun_to_F32(uniform32(ptr,offset)); } 632 633 // Push and load this color as a uniform. 634 Color uniformColor(SkColor4f, Uniforms*); 635 636 // Gather u8,u16,i32 with varying element-count index from *(ptr + byte-count offset). 637 I32 gather8 (Ptr ptr, int offset, I32 index); 638 I32 gather16(Ptr ptr, int offset, I32 index); 639 I32 gather32(Ptr ptr, int offset, I32 index); gatherF(Ptr ptr,int offset,I32 index)640 F32 gatherF (Ptr ptr, int offset, I32 index) { 641 return pun_to_F32(gather32(ptr, offset, index)); 642 } 643 644 // Convenience methods for working with skvm::Uniform(s). uniform32(Uniform u)645 I32 uniform32(Uniform u) { return this->uniform32(u.ptr, u.offset); } uniformF(Uniform u)646 F32 uniformF (Uniform u) { return this->uniformF (u.ptr, u.offset); } gather8(Uniform u,I32 index)647 I32 gather8 (Uniform u, I32 index) { return this->gather8 (u.ptr, u.offset, index); } gather16(Uniform u,I32 index)648 I32 gather16 (Uniform u, I32 index) { return this->gather16 (u.ptr, u.offset, index); } gather32(Uniform u,I32 index)649 I32 gather32 (Uniform u, I32 index) { return this->gather32 (u.ptr, u.offset, index); } gatherF(Uniform u,I32 index)650 F32 gatherF (Uniform u, I32 index) { return this->gatherF (u.ptr, u.offset, index); } 651 652 // Load an immediate constant. 653 I32 splat(int n); splat(unsigned u)654 I32 splat(unsigned u) { return splat((int)u); } splat(float f)655 F32 splat(float f) { 656 int bits; 657 memcpy(&bits, &f, 4); 658 return pun_to_F32(splat(bits)); 659 } 660 661 // Some operations make sense with immediate arguments, 662 // so we provide overloads inline to make that seamless. 663 // 664 // We omit overloads that may indicate a bug or performance issue. 665 // In general it does not make sense to pass immediates to unary operations, 666 // and even sometimes not for binary operations, e.g. 667 // 668 // div(x, y) -- normal every day divide 669 // div(3.0f, y) -- yep, makes sense 670 // div(x, 3.0f) -- omitted as a reminder you probably want mul(x, 1/3.0f). 671 // 672 // You can of course always splat() to override these opinions. 673 674 // float math, comparisons, etc. 675 F32 add(F32, F32); add(F32 x,float y)676 F32 add(F32 x, float y) { return add(x, splat(y)); } add(float x,F32 y)677 F32 add(float x, F32 y) { return add(splat(x), y); } 678 679 F32 sub(F32, F32); sub(F32 x,float y)680 F32 sub(F32 x, float y) { return sub(x, splat(y)); } sub(float x,F32 y)681 F32 sub(float x, F32 y) { return sub(splat(x), y); } 682 683 F32 mul(F32, F32); mul(F32 x,float y)684 F32 mul(F32 x, float y) { return mul(x, splat(y)); } mul(float x,F32 y)685 F32 mul(float x, F32 y) { return mul(splat(x), y); } 686 687 // mul(), but allowing optimizations not strictly legal under IEEE-754 rules. 688 F32 fast_mul(F32, F32); fast_mul(F32 x,float y)689 F32 fast_mul(F32 x, float y) { return fast_mul(x, splat(y)); } fast_mul(float x,F32 y)690 F32 fast_mul(float x, F32 y) { return fast_mul(splat(x), y); } 691 692 F32 div(F32, F32); div(float x,F32 y)693 F32 div(float x, F32 y) { return div(splat(x), y); } 694 695 F32 min(F32, F32); min(F32 x,float y)696 F32 min(F32 x, float y) { return min(x, splat(y)); } min(float x,F32 y)697 F32 min(float x, F32 y) { return min(splat(x), y); } 698 699 F32 max(F32, F32); max(F32 x,float y)700 F32 max(F32 x, float y) { return max(x, splat(y)); } max(float x,F32 y)701 F32 max(float x, F32 y) { return max(splat(x), y); } 702 703 // TODO: remove mad()? It's just sugar. mad(F32 x,F32 y,F32 z)704 F32 mad(F32 x, F32 y, F32 z) { return add(mul(x,y), z); } mad(F32 x,F32 y,float z)705 F32 mad(F32 x, F32 y, float z) { return mad( x , y , splat(z)); } mad(F32 x,float y,F32 z)706 F32 mad(F32 x, float y, F32 z) { return mad( x , splat(y), z ); } mad(F32 x,float y,float z)707 F32 mad(F32 x, float y, float z) { return mad( x , splat(y), splat(z)); } mad(float x,F32 y,F32 z)708 F32 mad(float x, F32 y, F32 z) { return mad(splat(x), y , z ); } mad(float x,F32 y,float z)709 F32 mad(float x, F32 y, float z) { return mad(splat(x), y , splat(z)); } mad(float x,float y,F32 z)710 F32 mad(float x, float y, F32 z) { return mad(splat(x), splat(y), z ); } 711 712 F32 sqrt(F32); 713 F32 approx_log2(F32); 714 F32 approx_pow2(F32); approx_log(F32 x)715 F32 approx_log (F32 x) { return mul(0.69314718f, approx_log2(x)); } approx_exp(F32 x)716 F32 approx_exp (F32 x) { return approx_pow2(mul(x, 1.4426950408889634074f)); } 717 718 F32 approx_powf(F32 base, F32 exp); approx_powf(F32 base,float exp)719 F32 approx_powf(F32 base, float exp) { return approx_powf(base, splat(exp)); } approx_powf(float base,F32 exp)720 F32 approx_powf(float base, F32 exp) { return approx_powf(splat(base), exp); } 721 722 723 F32 approx_sin(F32 radians); approx_cos(F32 radians)724 F32 approx_cos(F32 radians) { return approx_sin(add(radians, SK_ScalarPI/2)); } 725 F32 approx_tan(F32 radians); 726 727 F32 approx_asin(F32 x); approx_acos(F32 x)728 F32 approx_acos(F32 x) { return sub(SK_ScalarPI/2, approx_asin(x)); } 729 F32 approx_atan(F32 x); 730 F32 approx_atan2(F32 y, F32 x); 731 732 F32 lerp(F32 lo, F32 hi, F32 t); lerp(F32 lo,F32 hi,float t)733 F32 lerp(F32 lo, F32 hi, float t) { return lerp( lo , hi , splat(t)); } lerp(F32 lo,float hi,float t)734 F32 lerp(F32 lo, float hi, float t) { return lerp( lo , splat(hi), splat(t)); } lerp(F32 lo,float hi,F32 t)735 F32 lerp(F32 lo, float hi, F32 t) { return lerp( lo , splat(hi), t ); } lerp(float lo,F32 hi,F32 t)736 F32 lerp(float lo, F32 hi, F32 t) { return lerp(splat(lo), hi , t ); } lerp(float lo,F32 hi,float t)737 F32 lerp(float lo, F32 hi, float t) { return lerp(splat(lo), hi , splat(t)); } lerp(float lo,float hi,F32 t)738 F32 lerp(float lo, float hi, F32 t) { return lerp(splat(lo), splat(hi), t ); } 739 clamp(F32 x,F32 lo,F32 hi)740 F32 clamp(F32 x, F32 lo, F32 hi) { return max(lo, min(x, hi)); } clamp(F32 x,F32 lo,float hi)741 F32 clamp(F32 x, F32 lo, float hi) { return clamp( x , lo , splat(hi)); } clamp(F32 x,float lo,float hi)742 F32 clamp(F32 x, float lo, float hi) { return clamp( x , splat(lo), splat(hi)); } clamp(F32 x,float lo,F32 hi)743 F32 clamp(F32 x, float lo, F32 hi) { return clamp( x , splat(lo), hi ); } clamp(float x,F32 lo,F32 hi)744 F32 clamp(float x, F32 lo, F32 hi) { return clamp(splat(x), lo , hi ); } clamp(float x,F32 lo,float hi)745 F32 clamp(float x, F32 lo, float hi) { return clamp(splat(x), lo , splat(hi)); } clamp(float x,float lo,F32 hi)746 F32 clamp(float x, float lo, F32 hi) { return clamp(splat(x), splat(lo), hi ); } 747 clamp01(F32 x)748 F32 clamp01(F32 x) { return clamp(x, 0.0f, 1.0f); } 749 abs(F32 x)750 F32 abs(F32 x) { return pun_to_F32(bit_and(pun_to_I32(x), 0x7fff'ffff)); } 751 F32 fract(F32 x) { return sub(x, floor(x)); } 752 F32 ceil(F32); 753 F32 floor(F32); 754 I32 is_NaN (F32 x) { return neq(x,x); } 755 I32 is_finite(F32 x) { return lt(bit_and(pun_to_I32(x), 0x7f80'0000), 0x7f80'0000); } 756 757 I32 trunc(F32 x); 758 I32 round(F32 x); // Round to int using current rounding mode (as if lrintf()). 759 I32 pun_to_I32(F32 x) { return {x.builder, x.id}; } 760 761 I32 to_fp16(F32 x); 762 F32 from_fp16(I32 x); 763 764 I32 eq(F32, F32); 765 I32 eq(F32 x, float y) { return eq(x, splat(y)); } 766 I32 eq(float x, F32 y) { return eq(splat(x), y); } 767 768 I32 neq(F32, F32); 769 I32 neq(F32 x, float y) { return neq(x, splat(y)); } 770 I32 neq(float x, F32 y) { return neq(splat(x), y); } 771 772 I32 lt(F32, F32); 773 I32 lt(F32 x, float y) { return lt(x, splat(y)); } 774 I32 lt(float x, F32 y) { return lt(splat(x), y); } 775 776 I32 lte(F32, F32); 777 I32 lte(F32 x, float y) { return lte(x, splat(y)); } 778 I32 lte(float x, F32 y) { return lte(splat(x), y); } 779 780 I32 gt(F32, F32); 781 I32 gt(F32 x, float y) { return gt(x, splat(y)); } 782 I32 gt(float x, F32 y) { return gt(splat(x), y); } 783 784 I32 gte(F32, F32); 785 I32 gte(F32 x, float y) { return gte(x, splat(y)); } 786 I32 gte(float x, F32 y) { return gte(splat(x), y); } 787 788 // int math, comparisons, etc. 789 I32 add(I32, I32); 790 I32 add(I32 x, int y) { return add(x, splat(y)); } 791 I32 add(int x, I32 y) { return add(splat(x), y); } 792 793 I32 sub(I32, I32); 794 I32 sub(I32 x, int y) { return sub(x, splat(y)); } 795 I32 sub(int x, I32 y) { return sub(splat(x), y); } 796 797 I32 mul(I32, I32); 798 I32 mul(I32 x, int y) { return mul(x, splat(y)); } 799 I32 mul(int x, I32 y) { return mul(splat(x), y); } 800 801 I32 shl(I32 x, int bits); 802 I32 shr(I32 x, int bits); 803 I32 sra(I32 x, int bits); 804 805 I32 eq(I32, I32); 806 I32 eq(I32 x, int y) { return eq(x, splat(y)); } 807 I32 eq(int x, I32 y) { return eq(splat(x), y); } 808 809 I32 neq(I32, I32); 810 I32 neq(I32 x, int y) { return neq(x, splat(y)); } 811 I32 neq(int x, I32 y) { return neq(splat(x), y); } 812 813 I32 lt(I32, I32); 814 I32 lt(I32 x, int y) { return lt(x, splat(y)); } 815 I32 lt(int x, I32 y) { return lt(splat(x), y); } 816 817 I32 lte(I32, I32); 818 I32 lte(I32 x, int y) { return lte(x, splat(y)); } 819 I32 lte(int x, I32 y) { return lte(splat(x), y); } 820 821 I32 gt(I32, I32); 822 I32 gt(I32 x, int y) { return gt(x, splat(y)); } 823 I32 gt(int x, I32 y) { return gt(splat(x), y); } 824 825 I32 gte(I32, I32); 826 I32 gte(I32 x, int y) { return gte(x, splat(y)); } 827 I32 gte(int x, I32 y) { return gte(splat(x), y); } 828 829 F32 to_F32(I32 x); 830 F32 pun_to_F32(I32 x) { return {x.builder, x.id}; } 831 832 // Bitwise operations. 833 I32 bit_and(I32, I32); 834 I32 bit_and(I32 x, int y) { return bit_and(x, splat(y)); } 835 I32 bit_and(int x, I32 y) { return bit_and(splat(x), y); } 836 837 I32 bit_or(I32, I32); 838 I32 bit_or(I32 x, int y) { return bit_or(x, splat(y)); } 839 I32 bit_or(int x, I32 y) { return bit_or(splat(x), y); } 840 841 I32 bit_xor(I32, I32); 842 I32 bit_xor(I32 x, int y) { return bit_xor(x, splat(y)); } 843 I32 bit_xor(int x, I32 y) { return bit_xor(splat(x), y); } 844 845 I32 bit_clear(I32, I32); 846 I32 bit_clear(I32 x, int y) { return bit_clear(x, splat(y)); } 847 I32 bit_clear(int x, I32 y) { return bit_clear(splat(x), y); } 848 849 I32 min(I32 x, I32 y) { return select(lte(x,y), x, y); } 850 I32 min(I32 x, int y) { return min(x, splat(y)); } 851 I32 min(int x, I32 y) { return min(splat(x), y); } 852 853 I32 max(I32 x, I32 y) { return select(gte(x,y), x, y); } 854 I32 max(I32 x, int y) { return max(x, splat(y)); } 855 I32 max(int x, I32 y) { return max(splat(x), y); } 856 857 I32 select(I32 cond, I32 t, I32 f); // cond ? t : f 858 I32 select(I32 cond, int t, I32 f) { return select(cond, splat(t), f ); } 859 I32 select(I32 cond, I32 t, int f) { return select(cond, t , splat(f)); } 860 I32 select(I32 cond, int t, int f) { return select(cond, splat(t), splat(f)); } 861 862 F32 select(I32 cond, F32 t, F32 f) { 863 return pun_to_F32(select(cond, pun_to_I32(t) 864 , pun_to_I32(f))); 865 } 866 F32 select(I32 cond, float t, F32 f) { return select(cond, splat(t), f ); } 867 F32 select(I32 cond, F32 t, float f) { return select(cond, t , splat(f)); } 868 F32 select(I32 cond, float t, float f) { return select(cond, splat(t), splat(f)); } 869 870 I32 extract(I32 x, int bits, I32 z); // (x>>bits) & z 871 I32 extract(I32 x, int bits, int z) { return extract(x, bits, splat(z)); } 872 I32 extract(int x, int bits, I32 z) { return extract(splat(x), bits, z); } 873 874 I32 pack(I32 x, I32 y, int bits); // x | (y<<bits) 875 I32 pack(I32 x, int y, int bits) { return pack(x, splat(y), bits); } 876 I32 pack(int x, I32 y, int bits) { return pack(splat(x), y, bits); } 877 878 879 // Common idioms used in several places, worth centralizing for consistency. 880 F32 from_unorm(int bits, I32); // E.g. from_unorm(8, x) -> x * (1/255.0f) 881 I32 to_unorm(int bits, F32); // E.g. to_unorm(8, x) -> round(x * 255) 882 883 Color load(PixelFormat, Ptr ptr); 884 void store(PixelFormat, Ptr ptr, Color); 885 Color gather(PixelFormat, Ptr ptr, int offset, I32 index); 886 Color gather(PixelFormat f, Uniform u, I32 index) { 887 return gather(f, u.ptr, u.offset, index); 888 } 889 890 void premul(F32* r, F32* g, F32* b, F32 a); 891 void unpremul(F32* r, F32* g, F32* b, F32 a); 892 893 Color premul(Color c) { this->premul(&c.r, &c.g, &c.b, c.a); return c; } 894 Color unpremul(Color c) { this->unpremul(&c.r, &c.g, &c.b, c.a); return c; } 895 896 Color lerp(Color lo, Color hi, F32 t); 897 Color blend(SkBlendMode, Color src, Color dst); 898 899 Color clamp01(Color c) { 900 return { clamp01(c.r), clamp01(c.g), clamp01(c.b), clamp01(c.a) }; 901 } 902 903 HSLA to_hsla(Color); 904 Color to_rgba(HSLA); 905 906 void dump(SkWStream* = nullptr) const; 907 908 uint64_t hash() const; 909 910 Val push(Instruction); 911 912 bool allImm() const { return true; } 913 914 template <typename T, typename... Rest> 915 bool allImm(Val id, T* imm, Rest... rest) const { 916 if (fProgram[id].op == Op::splat) { 917 static_assert(sizeof(T) == 4); 918 memcpy(imm, &fProgram[id].immA, 4); 919 return this->allImm(rest...); 920 } 921 return false; 922 } 923 924 private: 925 Val push(Op op, Val x=NA, Val y=NA, Val z=NA, Val w=NA, int immA=0, int immB=0) { 926 return this->push(Instruction{op, x,y,z,w, immA,immB}); 927 } 928 929 template <typename T> 930 bool isImm(Val id, T want) const { 931 T imm = 0; 932 return this->allImm(id, &imm) && imm == want; 933 } 934 935 SkTHashMap<Instruction, Val, InstructionHash> fIndex; 936 std::vector<Instruction> fProgram; 937 std::vector<int> fStrides; 938 const Features fFeatures; 939 }; 940 941 // Optimization passes and data structures normally used by Builder::optimize(), 942 // extracted here so they can be unit tested. 943 std::vector<Instruction> eliminate_dead_code(std::vector<Instruction>); 944 std::vector<OptimizedInstruction> finalize (std::vector<Instruction>); 945 946 using Reg = int; 947 948 // d = op(x,y,z,w, immA,immB) 949 struct InterpreterInstruction { 950 Op op; 951 Reg d,x,y,z,w; 952 int immA,immB; 953 }; 954 955 class Program { 956 public: 957 Program(const std::vector<OptimizedInstruction>& instructions, 958 const std::vector<int>& strides, 959 const char* debug_name, bool allow_jit); 960 961 Program(); 962 ~Program(); 963 964 Program(Program&&); 965 Program& operator=(Program&&); 966 967 Program(const Program&) = delete; 968 Program& operator=(const Program&) = delete; 969 970 void eval(int n, void* args[]) const; 971 972 template <typename... T> 973 void eval(int n, T*... arg) const { 974 SkASSERT(sizeof...(arg) == this->nargs()); 975 // This nullptr isn't important except that it makes args[] non-empty if you pass none. 976 void* args[] = { (void*)arg..., nullptr }; 977 this->eval(n, args); 978 } 979 980 std::vector<InterpreterInstruction> instructions() const; 981 int nargs() const; 982 int nregs() const; 983 int loop () const; 984 bool empty() const; 985 986 bool hasJIT() const; // Has this Program been JITted? 987 988 void dump(SkWStream* = nullptr) const; 989 990 private: 991 void setupInterpreter(const std::vector<OptimizedInstruction>&); 992 void setupJIT (const std::vector<OptimizedInstruction>&, const char* debug_name); 993 void setupLLVM (const std::vector<OptimizedInstruction>&, const char* debug_name); 994 995 bool jit(const std::vector<OptimizedInstruction>&, 996 int* stack_hint, uint32_t* registers_used, 997 Assembler*) const; 998 999 void waitForLLVM() const; 1000 void dropJIT(); 1001 1002 struct Impl; 1003 std::unique_ptr<Impl> fImpl; 1004 }; 1005 1006 // TODO: control flow 1007 // TODO: 64-bit values? 1008 1009 #define SI static inline 1010 1011 SI I32 operator+(I32 x, I32 y) { return x->add(x,y); } 1012 SI I32 operator+(I32 x, int y) { return x->add(x,y); } 1013 SI I32 operator+(int x, I32 y) { return y->add(x,y); } 1014 1015 SI I32 operator-(I32 x, I32 y) { return x->sub(x,y); } 1016 SI I32 operator-(I32 x, int y) { return x->sub(x,y); } 1017 SI I32 operator-(int x, I32 y) { return y->sub(x,y); } 1018 1019 SI I32 operator*(I32 x, I32 y) { return x->mul(x,y); } 1020 SI I32 operator*(I32 x, int y) { return x->mul(x,y); } 1021 SI I32 operator*(int x, I32 y) { return y->mul(x,y); } 1022 min(I32 x,I32 y)1023 SI I32 min(I32 x, I32 y) { return x->min(x,y); } min(I32 x,int y)1024 SI I32 min(I32 x, int y) { return x->min(x,y); } min(int x,I32 y)1025 SI I32 min(int x, I32 y) { return y->min(x,y); } 1026 max(I32 x,I32 y)1027 SI I32 max(I32 x, I32 y) { return x->max(x,y); } max(I32 x,int y)1028 SI I32 max(I32 x, int y) { return x->max(x,y); } max(int x,I32 y)1029 SI I32 max(int x, I32 y) { return y->max(x,y); } 1030 1031 SI I32 operator==(I32 x, I32 y) { return x->eq(x,y); } 1032 SI I32 operator==(I32 x, int y) { return x->eq(x,y); } 1033 SI I32 operator==(int x, I32 y) { return y->eq(x,y); } 1034 1035 SI I32 operator!=(I32 x, I32 y) { return x->neq(x,y); } 1036 SI I32 operator!=(I32 x, int y) { return x->neq(x,y); } 1037 SI I32 operator!=(int x, I32 y) { return y->neq(x,y); } 1038 1039 SI I32 operator< (I32 x, I32 y) { return x->lt(x,y); } 1040 SI I32 operator< (I32 x, int y) { return x->lt(x,y); } 1041 SI I32 operator< (int x, I32 y) { return y->lt(x,y); } 1042 1043 SI I32 operator<=(I32 x, I32 y) { return x->lte(x,y); } 1044 SI I32 operator<=(I32 x, int y) { return x->lte(x,y); } 1045 SI I32 operator<=(int x, I32 y) { return y->lte(x,y); } 1046 1047 SI I32 operator> (I32 x, I32 y) { return x->gt(x,y); } 1048 SI I32 operator> (I32 x, int y) { return x->gt(x,y); } 1049 SI I32 operator> (int x, I32 y) { return y->gt(x,y); } 1050 1051 SI I32 operator>=(I32 x, I32 y) { return x->gte(x,y); } 1052 SI I32 operator>=(I32 x, int y) { return x->gte(x,y); } 1053 SI I32 operator>=(int x, I32 y) { return y->gte(x,y); } 1054 1055 1056 SI F32 operator+(F32 x, F32 y) { return x->add(x,y); } 1057 SI F32 operator+(F32 x, float y) { return x->add(x,y); } 1058 SI F32 operator+(float x, F32 y) { return y->add(x,y); } 1059 1060 SI F32 operator-(F32 x, F32 y) { return x->sub(x,y); } 1061 SI F32 operator-(F32 x, float y) { return x->sub(x,y); } 1062 SI F32 operator-(float x, F32 y) { return y->sub(x,y); } 1063 1064 SI F32 operator*(F32 x, F32 y) { return x->mul(x,y); } 1065 SI F32 operator*(F32 x, float y) { return x->mul(x,y); } 1066 SI F32 operator*(float x, F32 y) { return y->mul(x,y); } 1067 fast_mul(F32 x,F32 y)1068 SI F32 fast_mul(F32 x, F32 y) { return x->fast_mul(x,y); } fast_mul(F32 x,float y)1069 SI F32 fast_mul(F32 x, float y) { return x->fast_mul(x,y); } fast_mul(float x,F32 y)1070 SI F32 fast_mul(float x, F32 y) { return y->fast_mul(x,y); } 1071 1072 SI F32 operator/(F32 x, F32 y) { return x->div(x,y); } 1073 SI F32 operator/(float x, F32 y) { return y->div(x,y); } 1074 min(F32 x,F32 y)1075 SI F32 min(F32 x, F32 y) { return x->min(x,y); } min(F32 x,float y)1076 SI F32 min(F32 x, float y) { return x->min(x,y); } min(float x,F32 y)1077 SI F32 min(float x, F32 y) { return y->min(x,y); } 1078 max(F32 x,F32 y)1079 SI F32 max(F32 x, F32 y) { return x->max(x,y); } max(F32 x,float y)1080 SI F32 max(F32 x, float y) { return x->max(x,y); } max(float x,F32 y)1081 SI F32 max(float x, F32 y) { return y->max(x,y); } 1082 1083 SI I32 operator==(F32 x, F32 y) { return x->eq(x,y); } 1084 SI I32 operator==(F32 x, float y) { return x->eq(x,y); } 1085 SI I32 operator==(float x, F32 y) { return y->eq(x,y); } 1086 1087 SI I32 operator!=(F32 x, F32 y) { return x->neq(x,y); } 1088 SI I32 operator!=(F32 x, float y) { return x->neq(x,y); } 1089 SI I32 operator!=(float x, F32 y) { return y->neq(x,y); } 1090 1091 SI I32 operator< (F32 x, F32 y) { return x->lt(x,y); } 1092 SI I32 operator< (F32 x, float y) { return x->lt(x,y); } 1093 SI I32 operator< (float x, F32 y) { return y->lt(x,y); } 1094 1095 SI I32 operator<=(F32 x, F32 y) { return x->lte(x,y); } 1096 SI I32 operator<=(F32 x, float y) { return x->lte(x,y); } 1097 SI I32 operator<=(float x, F32 y) { return y->lte(x,y); } 1098 1099 SI I32 operator> (F32 x, F32 y) { return x->gt(x,y); } 1100 SI I32 operator> (F32 x, float y) { return x->gt(x,y); } 1101 SI I32 operator> (float x, F32 y) { return y->gt(x,y); } 1102 1103 SI I32 operator>=(F32 x, F32 y) { return x->gte(x,y); } 1104 SI I32 operator>=(F32 x, float y) { return x->gte(x,y); } 1105 SI I32 operator>=(float x, F32 y) { return y->gte(x,y); } 1106 1107 SI I32& operator+=(I32& x, I32 y) { return (x = x + y); } 1108 SI I32& operator+=(I32& x, int y) { return (x = x + y); } 1109 1110 SI I32& operator-=(I32& x, I32 y) { return (x = x - y); } 1111 SI I32& operator-=(I32& x, int y) { return (x = x - y); } 1112 1113 SI I32& operator*=(I32& x, I32 y) { return (x = x * y); } 1114 SI I32& operator*=(I32& x, int y) { return (x = x * y); } 1115 1116 SI F32& operator+=(F32& x, F32 y) { return (x = x + y); } 1117 SI F32& operator+=(F32& x, float y) { return (x = x + y); } 1118 1119 SI F32& operator-=(F32& x, F32 y) { return (x = x - y); } 1120 SI F32& operator-=(F32& x, float y) { return (x = x - y); } 1121 1122 SI F32& operator*=(F32& x, F32 y) { return (x = x * y); } 1123 SI F32& operator*=(F32& x, float y) { return (x = x * y); } 1124 1125 SI F32& operator/=(F32& x, F32 y) { return (x = x / y); } 1126 assert_true(I32 cond,I32 debug)1127 SI void assert_true(I32 cond, I32 debug) { cond->assert_true(cond,debug); } assert_true(I32 cond,F32 debug)1128 SI void assert_true(I32 cond, F32 debug) { cond->assert_true(cond,debug); } assert_true(I32 cond)1129 SI void assert_true(I32 cond) { cond->assert_true(cond); } 1130 store8(Ptr ptr,I32 val)1131 SI void store8 (Ptr ptr, I32 val) { val->store8 (ptr, val); } store16(Ptr ptr,I32 val)1132 SI void store16 (Ptr ptr, I32 val) { val->store16 (ptr, val); } store32(Ptr ptr,I32 val)1133 SI void store32 (Ptr ptr, I32 val) { val->store32 (ptr, val); } storeF(Ptr ptr,F32 val)1134 SI void storeF (Ptr ptr, F32 val) { val->storeF (ptr, val); } store64(Ptr ptr,I32 lo,I32 hi)1135 SI void store64 (Ptr ptr, I32 lo, I32 hi) { lo ->store64 (ptr, lo,hi); } store128(Ptr ptr,I32 x,I32 y,I32 z,I32 w)1136 SI void store128(Ptr ptr, I32 x, I32 y, I32 z, I32 w) { x ->store128(ptr, x,y,z,w); } 1137 gather8(Ptr ptr,int off,I32 ix)1138 SI I32 gather8 (Ptr ptr, int off, I32 ix) { return ix->gather8 (ptr, off, ix); } gather16(Ptr ptr,int off,I32 ix)1139 SI I32 gather16(Ptr ptr, int off, I32 ix) { return ix->gather16(ptr, off, ix); } gather32(Ptr ptr,int off,I32 ix)1140 SI I32 gather32(Ptr ptr, int off, I32 ix) { return ix->gather32(ptr, off, ix); } gatherF(Ptr ptr,int off,I32 ix)1141 SI F32 gatherF (Ptr ptr, int off, I32 ix) { return ix->gatherF (ptr, off, ix); } 1142 gather8(Uniform u,I32 ix)1143 SI I32 gather8 (Uniform u, I32 ix) { return ix->gather8 (u, ix); } gather16(Uniform u,I32 ix)1144 SI I32 gather16(Uniform u, I32 ix) { return ix->gather16(u, ix); } gather32(Uniform u,I32 ix)1145 SI I32 gather32(Uniform u, I32 ix) { return ix->gather32(u, ix); } gatherF(Uniform u,I32 ix)1146 SI F32 gatherF (Uniform u, I32 ix) { return ix->gatherF (u, ix); } 1147 sqrt(F32 x)1148 SI F32 sqrt(F32 x) { return x-> sqrt(x); } approx_log2(F32 x)1149 SI F32 approx_log2(F32 x) { return x->approx_log2(x); } approx_pow2(F32 x)1150 SI F32 approx_pow2(F32 x) { return x->approx_pow2(x); } approx_log(F32 x)1151 SI F32 approx_log (F32 x) { return x->approx_log (x); } approx_exp(F32 x)1152 SI F32 approx_exp (F32 x) { return x->approx_exp (x); } 1153 approx_powf(F32 base,F32 exp)1154 SI F32 approx_powf(F32 base, F32 exp) { return base->approx_powf(base, exp); } approx_powf(F32 base,float exp)1155 SI F32 approx_powf(F32 base, float exp) { return base->approx_powf(base, exp); } approx_powf(float base,F32 exp)1156 SI F32 approx_powf(float base, F32 exp) { return exp->approx_powf(base, exp); } 1157 approx_sin(F32 radians)1158 SI F32 approx_sin(F32 radians) { return radians->approx_sin(radians); } approx_cos(F32 radians)1159 SI F32 approx_cos(F32 radians) { return radians->approx_cos(radians); } approx_tan(F32 radians)1160 SI F32 approx_tan(F32 radians) { return radians->approx_tan(radians); } 1161 approx_asin(F32 x)1162 SI F32 approx_asin(F32 x) { return x->approx_asin(x); } approx_acos(F32 x)1163 SI F32 approx_acos(F32 x) { return x->approx_acos(x); } approx_atan(F32 x)1164 SI F32 approx_atan(F32 x) { return x->approx_atan(x); } approx_atan2(F32 y,F32 x)1165 SI F32 approx_atan2(F32 y, F32 x) { return x->approx_atan2(y, x); } 1166 clamp01(F32 x)1167 SI F32 clamp01(F32 x) { return x-> clamp01(x); } abs(F32 x)1168 SI F32 abs(F32 x) { return x-> abs(x); } ceil(F32 x)1169 SI F32 ceil(F32 x) { return x-> ceil(x); } fract(F32 x)1170 SI F32 fract(F32 x) { return x-> fract(x); } floor(F32 x)1171 SI F32 floor(F32 x) { return x-> floor(x); } is_NaN(F32 x)1172 SI I32 is_NaN(F32 x) { return x-> is_NaN(x); } is_finite(F32 x)1173 SI I32 is_finite(F32 x) { return x->is_finite(x); } 1174 trunc(F32 x)1175 SI I32 trunc(F32 x) { return x-> trunc(x); } round(F32 x)1176 SI I32 round(F32 x) { return x-> round(x); } pun_to_I32(F32 x)1177 SI I32 pun_to_I32(F32 x) { return x-> pun_to_I32(x); } pun_to_F32(I32 x)1178 SI F32 pun_to_F32(I32 x) { return x-> pun_to_F32(x); } to_F32(I32 x)1179 SI F32 to_F32(I32 x) { return x-> to_F32(x); } to_fp16(F32 x)1180 SI I32 to_fp16(F32 x) { return x-> to_fp16(x); } from_fp16(I32 x)1181 SI F32 from_fp16(I32 x) { return x-> from_fp16(x); } 1182 lerp(F32 lo,F32 hi,F32 t)1183 SI F32 lerp(F32 lo, F32 hi, F32 t) { return lo->lerp(lo,hi,t); } lerp(F32 lo,F32 hi,float t)1184 SI F32 lerp(F32 lo, F32 hi, float t) { return lo->lerp(lo,hi,t); } lerp(F32 lo,float hi,F32 t)1185 SI F32 lerp(F32 lo, float hi, F32 t) { return lo->lerp(lo,hi,t); } lerp(F32 lo,float hi,float t)1186 SI F32 lerp(F32 lo, float hi, float t) { return lo->lerp(lo,hi,t); } lerp(float lo,F32 hi,F32 t)1187 SI F32 lerp(float lo, F32 hi, F32 t) { return hi->lerp(lo,hi,t); } lerp(float lo,F32 hi,float t)1188 SI F32 lerp(float lo, F32 hi, float t) { return hi->lerp(lo,hi,t); } lerp(float lo,float hi,F32 t)1189 SI F32 lerp(float lo, float hi, F32 t) { return t->lerp(lo,hi,t); } 1190 clamp(F32 x,F32 lo,F32 hi)1191 SI F32 clamp(F32 x, F32 lo, F32 hi) { return x->clamp(x,lo,hi); } clamp(F32 x,F32 lo,float hi)1192 SI F32 clamp(F32 x, F32 lo, float hi) { return x->clamp(x,lo,hi); } clamp(F32 x,float lo,F32 hi)1193 SI F32 clamp(F32 x, float lo, F32 hi) { return x->clamp(x,lo,hi); } clamp(F32 x,float lo,float hi)1194 SI F32 clamp(F32 x, float lo, float hi) { return x->clamp(x,lo,hi); } clamp(float x,F32 lo,F32 hi)1195 SI F32 clamp(float x, F32 lo, F32 hi) { return lo->clamp(x,lo,hi); } clamp(float x,F32 lo,float hi)1196 SI F32 clamp(float x, F32 lo, float hi) { return lo->clamp(x,lo,hi); } clamp(float x,float lo,F32 hi)1197 SI F32 clamp(float x, float lo, F32 hi) { return hi->clamp(x,lo,hi); } 1198 1199 SI I32 operator<<(I32 x, int bits) { return x->shl(x, bits); } shl(I32 x,int bits)1200 SI I32 shl(I32 x, int bits) { return x->shl(x, bits); } shr(I32 x,int bits)1201 SI I32 shr(I32 x, int bits) { return x->shr(x, bits); } sra(I32 x,int bits)1202 SI I32 sra(I32 x, int bits) { return x->sra(x, bits); } 1203 1204 SI I32 operator&(I32 x, I32 y) { return x->bit_and(x,y); } 1205 SI I32 operator&(I32 x, int y) { return x->bit_and(x,y); } 1206 SI I32 operator&(int x, I32 y) { return y->bit_and(x,y); } 1207 1208 SI I32 operator|(I32 x, I32 y) { return x->bit_or (x,y); } 1209 SI I32 operator|(I32 x, int y) { return x->bit_or (x,y); } 1210 SI I32 operator|(int x, I32 y) { return y->bit_or (x,y); } 1211 1212 SI I32 operator^(I32 x, I32 y) { return x->bit_xor(x,y); } 1213 SI I32 operator^(I32 x, int y) { return x->bit_xor(x,y); } 1214 SI I32 operator^(int x, I32 y) { return y->bit_xor(x,y); } 1215 1216 SI I32& operator&=(I32& x, I32 y) { return (x = x & y); } 1217 SI I32& operator&=(I32& x, int y) { return (x = x & y); } 1218 SI I32& operator|=(I32& x, I32 y) { return (x = x | y); } 1219 SI I32& operator|=(I32& x, int y) { return (x = x | y); } 1220 SI I32& operator^=(I32& x, I32 y) { return (x = x ^ y); } 1221 SI I32& operator^=(I32& x, int y) { return (x = x ^ y); } 1222 bit_clear(I32 x,I32 y)1223 SI I32 bit_clear(I32 x, I32 y) { return x->bit_clear(x,y); } bit_clear(I32 x,int y)1224 SI I32 bit_clear(I32 x, int y) { return x->bit_clear(x,y); } bit_clear(int x,I32 y)1225 SI I32 bit_clear(int x, I32 y) { return y->bit_clear(x,y); } 1226 select(I32 c,I32 t,I32 f)1227 SI I32 select(I32 c, I32 t, I32 f) { return c->select(c, t , f ); } select(I32 c,I32 t,int f)1228 SI I32 select(I32 c, I32 t, int f) { return c->select(c, t , c->splat(f)); } select(I32 c,int t,I32 f)1229 SI I32 select(I32 c, int t, I32 f) { return c->select(c, c->splat(t), f ); } select(I32 c,int t,int f)1230 SI I32 select(I32 c, int t, int f) { return c->select(c, c->splat(t), c->splat(f)); } 1231 select(I32 c,F32 t,F32 f)1232 SI F32 select(I32 c, F32 t, F32 f) { return c->select(c, t , f ); } select(I32 c,F32 t,float f)1233 SI F32 select(I32 c, F32 t, float f) { return c->select(c, t , c->splat(f)); } select(I32 c,float t,F32 f)1234 SI F32 select(I32 c, float t, F32 f) { return c->select(c, c->splat(t), f ); } select(I32 c,float t,float f)1235 SI F32 select(I32 c, float t, float f) { return c->select(c, c->splat(t), c->splat(f)); } 1236 extract(I32 x,int bits,I32 z)1237 SI I32 extract(I32 x, int bits, I32 z) { return x->extract(x,bits,z); } extract(I32 x,int bits,int z)1238 SI I32 extract(I32 x, int bits, int z) { return x->extract(x,bits,z); } extract(int x,int bits,I32 z)1239 SI I32 extract(int x, int bits, I32 z) { return z->extract(x,bits,z); } 1240 pack(I32 x,I32 y,int bits)1241 SI I32 pack(I32 x, I32 y, int bits) { return x->pack (x,y,bits); } pack(I32 x,int y,int bits)1242 SI I32 pack(I32 x, int y, int bits) { return x->pack (x,y,bits); } pack(int x,I32 y,int bits)1243 SI I32 pack(int x, I32 y, int bits) { return y->pack (x,y,bits); } 1244 1245 SI I32 operator~(I32 x) { return ~0 ^ x; } 1246 SI I32 operator-(I32 x) { return 0 - x; } 1247 SI F32 operator-(F32 x) { return 0.0f - x; } 1248 from_unorm(int bits,I32 x)1249 SI F32 from_unorm(int bits, I32 x) { return x->from_unorm(bits,x); } to_unorm(int bits,F32 x)1250 SI I32 to_unorm(int bits, F32 x) { return x-> to_unorm(bits,x); } 1251 store(PixelFormat f,Ptr p,Color c)1252 SI void store(PixelFormat f, Ptr p, Color c) { return c->store(f,p,c); } 1253 gather(PixelFormat f,Ptr p,int off,I32 ix)1254 SI Color gather(PixelFormat f, Ptr p, int off, I32 ix) { return ix->gather(f,p,off,ix); } gather(PixelFormat f,Uniform u,I32 ix)1255 SI Color gather(PixelFormat f, Uniform u , I32 ix) { return ix->gather(f,u,ix); } 1256 premul(F32 * r,F32 * g,F32 * b,F32 a)1257 SI void premul(F32* r, F32* g, F32* b, F32 a) { a-> premul(r,g,b,a); } unpremul(F32 * r,F32 * g,F32 * b,F32 a)1258 SI void unpremul(F32* r, F32* g, F32* b, F32 a) { a->unpremul(r,g,b,a); } 1259 premul(Color c)1260 SI Color premul(Color c) { return c-> premul(c); } unpremul(Color c)1261 SI Color unpremul(Color c) { return c->unpremul(c); } 1262 lerp(Color lo,Color hi,F32 t)1263 SI Color lerp(Color lo, Color hi, F32 t) { return t->lerp(lo,hi,t); } 1264 blend(SkBlendMode m,Color s,Color d)1265 SI Color blend(SkBlendMode m, Color s, Color d) { return s->blend(m,s,d); } 1266 clamp01(Color c)1267 SI Color clamp01(Color c) { return c->clamp01(c); } 1268 to_hsla(Color c)1269 SI HSLA to_hsla(Color c) { return c->to_hsla(c); } to_rgba(HSLA c)1270 SI Color to_rgba(HSLA c) { return c->to_rgba(c); } 1271 1272 // Evaluate polynomials: ax^n + bx^(n-1) + ... for n >= 1 1273 template <typename F32_or_float, typename... Rest> poly(F32 x,F32_or_float a,float b,Rest...rest)1274 SI F32 poly(F32 x, F32_or_float a, float b, Rest... rest) { 1275 if constexpr (sizeof...(rest) == 0) { 1276 return x*a+b; 1277 } else { 1278 return poly(x, x*a+b, rest...); 1279 } 1280 } 1281 #undef SI 1282 } // namespace skvm 1283 1284 #endif//SkVM_DEFINED 1285