1 /* 2 * Copyright 2019 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef SkVM_DEFINED 9 #define SkVM_DEFINED 10 11 #include "include/core/SkBlendMode.h" 12 #include "include/core/SkColor.h" 13 #include "include/core/SkSpan.h" 14 #include "include/private/SkMacros.h" 15 #include "include/private/SkTArray.h" 16 #include "include/private/SkTHash.h" 17 #include "src/core/SkVM_fwd.h" 18 #include <vector> // std::vector 19 20 class SkWStream; 21 22 #if defined(SKVM_JIT_WHEN_POSSIBLE) && !defined(SK_BUILD_FOR_IOS) 23 #if defined(__x86_64__) || defined(_M_X64) 24 #if defined(_WIN32) || defined(__linux) || defined(__APPLE__) 25 #define SKVM_JIT 26 #endif 27 #endif 28 #if defined(__aarch64__) 29 #if defined(__ANDROID__) || defined(__APPLE__) 30 #define SKVM_JIT 31 #endif 32 #endif 33 #endif 34 35 #if 0 36 #define SKVM_LLVM 37 #endif 38 39 #if 0 40 #undef SKVM_JIT 41 #endif 42 43 namespace skvm { 44 45 class Assembler { 46 public: 47 explicit Assembler(void* buf); 48 49 size_t size() const; 50 51 // Order matters... GP64, Xmm, Ymm values match 4-bit register encoding for each. 52 enum GP64 { 53 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, 54 r8 , r9 , r10, r11, r12, r13, r14, r15, 55 }; 56 enum Xmm { 57 xmm0, xmm1, xmm2 , xmm3 , xmm4 , xmm5 , xmm6 , xmm7 , 58 xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, 59 }; 60 enum Ymm { 61 ymm0, ymm1, ymm2 , ymm3 , ymm4 , ymm5 , ymm6 , ymm7 , 62 ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15, 63 }; 64 65 // X and V values match 5-bit encoding for each (nothing tricky). 66 enum X { 67 x0 , x1 , x2 , x3 , x4 , x5 , x6 , x7 , 68 x8 , x9 , x10, x11, x12, x13, x14, x15, 69 x16, x17, x18, x19, x20, x21, x22, x23, 70 x24, x25, x26, x27, x28, x29, x30, xzr, sp=xzr, 71 }; 72 enum V { 73 v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 , 74 v8 , v9 , v10, v11, v12, v13, v14, v15, 75 v16, v17, v18, v19, v20, v21, v22, v23, 76 v24, v25, v26, v27, v28, v29, v30, v31, 77 }; 78 79 void bytes(const void*, int); 80 void byte(uint8_t); 81 void word(uint32_t); 82 83 struct Label { 84 int offset = 0; 85 enum { NotYetSet, ARMDisp19, X86Disp32 } kind = NotYetSet; 86 SkSTArray<2, int> references; 87 }; 88 89 // x86-64 90 91 void align(int mod); 92 93 void int3(); 94 void vzeroupper(); 95 void ret(); 96 97 // Mem represents a value at base + disp + scale*index, 98 // or simply at base + disp if index=rsp. 99 enum Scale { ONE, TWO, FOUR, EIGHT }; 100 struct Mem { 101 GP64 base; 102 int disp = 0; 103 GP64 index = rsp; 104 Scale scale = ONE; 105 }; 106 107 struct Operand { 108 union { 109 int reg; 110 Mem mem; 111 Label* label; 112 }; 113 enum { REG, MEM, LABEL } kind; 114 OperandOperand115 Operand(GP64 r) : reg (r), kind(REG ) {} OperandOperand116 Operand(Xmm r) : reg (r), kind(REG ) {} OperandOperand117 Operand(Ymm r) : reg (r), kind(REG ) {} OperandOperand118 Operand(Mem m) : mem (m), kind(MEM ) {} OperandOperand119 Operand(Label* l) : label(l), kind(LABEL) {} 120 }; 121 122 void vpand (Ymm dst, Ymm x, Operand y); 123 void vpandn(Ymm dst, Ymm x, Operand y); 124 void vpor (Ymm dst, Ymm x, Operand y); 125 void vpxor (Ymm dst, Ymm x, Operand y); 126 127 void vpaddd (Ymm dst, Ymm x, Operand y); 128 void vpsubd (Ymm dst, Ymm x, Operand y); 129 void vpmulld(Ymm dst, Ymm x, Operand y); 130 131 void vpaddw (Ymm dst, Ymm x, Operand y); 132 void vpsubw (Ymm dst, Ymm x, Operand y); 133 void vpmullw (Ymm dst, Ymm x, Operand y); 134 135 void vpabsw (Ymm dst, Operand x); 136 void vpavgw (Ymm dst, Ymm x, Operand y); // dst = (x+y+1)>>1, unsigned. 137 void vpmulhrsw(Ymm dst, Ymm x, Operand y); // dst = (x*y + (1<<14)) >> 15, signed. 138 void vpminsw (Ymm dst, Ymm x, Operand y); 139 void vpminuw (Ymm dst, Ymm x, Operand y); 140 void vpmaxsw (Ymm dst, Ymm x, Operand y); 141 void vpmaxuw (Ymm dst, Ymm x, Operand y); 142 143 void vaddps(Ymm dst, Ymm x, Operand y); 144 void vsubps(Ymm dst, Ymm x, Operand y); 145 void vmulps(Ymm dst, Ymm x, Operand y); 146 void vdivps(Ymm dst, Ymm x, Operand y); 147 void vminps(Ymm dst, Ymm x, Operand y); 148 void vmaxps(Ymm dst, Ymm x, Operand y); 149 150 void vsqrtps(Ymm dst, Operand x); 151 152 void vfmadd132ps(Ymm dst, Ymm x, Operand y); 153 void vfmadd213ps(Ymm dst, Ymm x, Operand y); 154 void vfmadd231ps(Ymm dst, Ymm x, Operand y); 155 156 void vfmsub132ps(Ymm dst, Ymm x, Operand y); 157 void vfmsub213ps(Ymm dst, Ymm x, Operand y); 158 void vfmsub231ps(Ymm dst, Ymm x, Operand y); 159 160 void vfnmadd132ps(Ymm dst, Ymm x, Operand y); 161 void vfnmadd213ps(Ymm dst, Ymm x, Operand y); 162 void vfnmadd231ps(Ymm dst, Ymm x, Operand y); 163 164 void vpackusdw(Ymm dst, Ymm x, Operand y); 165 void vpackuswb(Ymm dst, Ymm x, Operand y); 166 167 void vpunpckldq(Ymm dst, Ymm x, Operand y); 168 void vpunpckhdq(Ymm dst, Ymm x, Operand y); 169 170 void vpcmpeqd(Ymm dst, Ymm x, Operand y); 171 void vpcmpgtd(Ymm dst, Ymm x, Operand y); 172 void vpcmpeqw(Ymm dst, Ymm x, Operand y); 173 void vpcmpgtw(Ymm dst, Ymm x, Operand y); 174 175 void vcmpps (Ymm dst, Ymm x, Operand y, int imm); vcmpeqps(Ymm dst,Ymm x,Operand y)176 void vcmpeqps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,0); } vcmpltps(Ymm dst,Ymm x,Operand y)177 void vcmpltps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,1); } vcmpleps(Ymm dst,Ymm x,Operand y)178 void vcmpleps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,2); } vcmpneqps(Ymm dst,Ymm x,Operand y)179 void vcmpneqps(Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,4); } 180 181 // Sadly, the x parameter cannot be a general Operand for these shifts. 182 void vpslld(Ymm dst, Ymm x, int imm); 183 void vpsrld(Ymm dst, Ymm x, int imm); 184 void vpsrad(Ymm dst, Ymm x, int imm); 185 186 void vpsllw(Ymm dst, Ymm x, int imm); 187 void vpsrlw(Ymm dst, Ymm x, int imm); 188 void vpsraw(Ymm dst, Ymm x, int imm); 189 190 void vpermq (Ymm dst, Operand x, int imm); 191 void vperm2f128(Ymm dst, Ymm x, Operand y, int imm); 192 void vpermps (Ymm dst, Ymm ix, Operand src); // dst[i] = src[ix[i]] 193 194 enum Rounding { NEAREST, FLOOR, CEIL, TRUNC, CURRENT }; 195 void vroundps(Ymm dst, Operand x, Rounding); 196 197 void vmovdqa(Ymm dst, Operand x); 198 void vmovups(Ymm dst, Operand x); 199 void vmovups(Xmm dst, Operand x); 200 void vmovups(Operand dst, Ymm x); 201 void vmovups(Operand dst, Xmm x); 202 203 void vcvtdq2ps (Ymm dst, Operand x); 204 void vcvttps2dq(Ymm dst, Operand x); 205 void vcvtps2dq (Ymm dst, Operand x); 206 207 void vcvtps2ph(Operand dst, Ymm x, Rounding); 208 void vcvtph2ps(Ymm dst, Operand x); 209 210 void vpblendvb(Ymm dst, Ymm x, Operand y, Ymm z); 211 212 void vpshufb(Ymm dst, Ymm x, Operand y); 213 214 void vptest(Ymm x, Operand y); 215 216 void vbroadcastss(Ymm dst, Operand y); 217 218 void vpmovzxwd(Ymm dst, Operand src); // dst = src, 128-bit, uint16_t -> int 219 void vpmovzxbd(Ymm dst, Operand src); // dst = src, 64-bit, uint8_t -> int 220 221 void vmovq(Operand dst, Xmm src); // dst = src, 64-bit 222 void vmovd(Operand dst, Xmm src); // dst = src, 32-bit 223 void vmovd(Xmm dst, Operand src); // dst = src, 32-bit 224 225 void vpinsrd(Xmm dst, Xmm src, Operand y, int imm); // dst = src; dst[imm] = y, 32-bit 226 void vpinsrw(Xmm dst, Xmm src, Operand y, int imm); // dst = src; dst[imm] = y, 16-bit 227 void vpinsrb(Xmm dst, Xmm src, Operand y, int imm); // dst = src; dst[imm] = y, 8-bit 228 229 void vextracti128(Operand dst, Ymm src, int imm); // dst = src[imm], 128-bit 230 void vpextrd (Operand dst, Xmm src, int imm); // dst = src[imm], 32-bit 231 void vpextrw (Operand dst, Xmm src, int imm); // dst = src[imm], 16-bit 232 void vpextrb (Operand dst, Xmm src, int imm); // dst = src[imm], 8-bit 233 234 // if (mask & 0x8000'0000) { 235 // dst = base[scale*ix]; 236 // } 237 // mask = 0; 238 void vgatherdps(Ymm dst, Scale scale, Ymm ix, GP64 base, Ymm mask); 239 240 241 void label(Label*); 242 243 void jmp(Label*); 244 void je (Label*); 245 void jne(Label*); 246 void jl (Label*); 247 void jc (Label*); 248 249 void add (Operand dst, int imm); 250 void sub (Operand dst, int imm); 251 void cmp (Operand dst, int imm); 252 void mov (Operand dst, int imm); 253 void movb(Operand dst, int imm); 254 255 void add (Operand dst, GP64 x); 256 void sub (Operand dst, GP64 x); 257 void cmp (Operand dst, GP64 x); 258 void mov (Operand dst, GP64 x); 259 void movb(Operand dst, GP64 x); 260 261 void add (GP64 dst, Operand x); 262 void sub (GP64 dst, Operand x); 263 void cmp (GP64 dst, Operand x); 264 void mov (GP64 dst, Operand x); 265 void movb(GP64 dst, Operand x); 266 267 // Disambiguators... choice is arbitrary (but generates different code!). add(GP64 dst,GP64 x)268 void add (GP64 dst, GP64 x) { this->add (Operand(dst), x); } sub(GP64 dst,GP64 x)269 void sub (GP64 dst, GP64 x) { this->sub (Operand(dst), x); } cmp(GP64 dst,GP64 x)270 void cmp (GP64 dst, GP64 x) { this->cmp (Operand(dst), x); } mov(GP64 dst,GP64 x)271 void mov (GP64 dst, GP64 x) { this->mov (Operand(dst), x); } movb(GP64 dst,GP64 x)272 void movb(GP64 dst, GP64 x) { this->movb(Operand(dst), x); } 273 274 void movzbq(GP64 dst, Operand x); // dst = x, uint8_t -> int 275 void movzwq(GP64 dst, Operand x); // dst = x, uint16_t -> int 276 277 // aarch64 278 279 // d = op(n,m) 280 using DOpNM = void(V d, V n, V m); 281 DOpNM and16b, orr16b, eor16b, bic16b, bsl16b, 282 add4s, sub4s, mul4s, 283 cmeq4s, cmgt4s, 284 sub8h, mul8h, 285 fadd4s, fsub4s, fmul4s, fdiv4s, fmin4s, fmax4s, 286 fcmeq4s, fcmgt4s, fcmge4s, 287 tbl, 288 uzp14s, uzp24s, 289 zip14s, zip24s; 290 291 // TODO: there are also float ==,<,<=,>,>= instructions with an immediate 0.0f, 292 // and the register comparison > and >= can also compare absolute values. Interesting. 293 294 // d += n*m 295 void fmla4s(V d, V n, V m); 296 297 // d -= n*m 298 void fmls4s(V d, V n, V m); 299 300 // d = op(n,imm) 301 using DOpNImm = void(V d, V n, int imm); 302 DOpNImm sli4s, 303 shl4s, sshr4s, ushr4s, 304 ushr8h; 305 306 // d = op(n) 307 using DOpN = void(V d, V n); 308 DOpN not16b, // d = ~n 309 fneg4s, // d = -n 310 fsqrt4s, // d = sqrtf(n) 311 scvtf4s, // int -> float 312 fcvtzs4s, // truncate float -> int 313 fcvtns4s, // round float -> int (nearest even) 314 frintp4s, // round float -> int as float, toward plus infinity (ceil) 315 frintm4s, // round float -> int as float, toward minus infinity (floor) 316 fcvtn, // f32 -> f16 in low half 317 fcvtl, // f16 in low half -> f32 318 xtns2h, // u32 -> u16 319 xtnh2b, // u16 -> u8 320 uxtlb2h, // u8 -> u16 (TODO: this is a special case of ushll.8h) 321 uxtlh2s, // u16 -> u32 (TODO: this is a special case of ushll.4s) 322 uminv4s; // dst[0] = min(n[0],n[1],n[2],n[3]), n as unsigned 323 324 void brk (int imm16); 325 void ret (X); 326 void add (X d, X n, int imm12); 327 void sub (X d, X n, int imm12); 328 void subs(X d, X n, int imm12); // subtract setting condition flags 329 330 enum Shift { LSL,LSR,ASR,ROR }; 331 void add (X d, X n, X m, Shift=LSL, int imm6=0); // d=n+Shift(m,imm6), for Shift != ROR. 332 333 // There's another encoding for unconditional branches that can jump further, 334 // but this one encoded as b.al is simple to implement and should be fine. b(Label * l)335 void b (Label* l) { this->b(Condition::al, l); } bne(Label * l)336 void bne(Label* l) { this->b(Condition::ne, l); } blt(Label * l)337 void blt(Label* l) { this->b(Condition::lt, l); } 338 339 // "cmp ..." is just an assembler mnemonic for "subs xzr, ..."! cmp(X n,int imm12)340 void cmp(X n, int imm12) { this->subs(xzr, n, imm12); } 341 342 // Compare and branch if zero/non-zero, as if 343 // cmp(t,0) 344 // beq/bne(l) 345 // but without setting condition flags. 346 void cbz (X t, Label* l); 347 void cbnz(X t, Label* l); 348 349 // TODO: there are ldur variants with unscaled imm, useful? 350 void ldrd(X dst, X src, int imm12=0); // 64-bit dst = *(src+imm12*8) 351 void ldrs(X dst, X src, int imm12=0); // 32-bit dst = *(src+imm12*4) 352 void ldrh(X dst, X src, int imm12=0); // 16-bit dst = *(src+imm12*2) 353 void ldrb(X dst, X src, int imm12=0); // 8-bit dst = *(src+imm12) 354 355 void ldrq(V dst, Label*); // 128-bit PC-relative load 356 357 void ldrq(V dst, X src, int imm12=0); // 128-bit dst = *(src+imm12*16) 358 void ldrd(V dst, X src, int imm12=0); // 64-bit dst = *(src+imm12*8) 359 void ldrs(V dst, X src, int imm12=0); // 32-bit dst = *(src+imm12*4) 360 void ldrh(V dst, X src, int imm12=0); // 16-bit dst = *(src+imm12*2) 361 void ldrb(V dst, X src, int imm12=0); // 8-bit dst = *(src+imm12) 362 363 void strs(X src, X dst, int imm12=0); // 32-bit *(dst+imm12*4) = src 364 365 void strq(V src, X dst, int imm12=0); // 128-bit *(dst+imm12*16) = src 366 void strd(V src, X dst, int imm12=0); // 64-bit *(dst+imm12*8) = src 367 void strs(V src, X dst, int imm12=0); // 32-bit *(dst+imm12*4) = src 368 void strh(V src, X dst, int imm12=0); // 16-bit *(dst+imm12*2) = src 369 void strb(V src, X dst, int imm12=0); // 8-bit *(dst+imm12) = src 370 371 void movs(X dst, V src, int lane); // dst = 32-bit src[lane] 372 void inss(V dst, X src, int lane); // dst[lane] = 32-bit src 373 374 void dup4s (V dst, X src); // Each 32-bit lane = src 375 376 void ld1r4s (V dst, X src); // Each 32-bit lane = *src 377 void ld1r8h (V dst, X src); // Each 16-bit lane = *src 378 void ld1r16b(V dst, X src); // Each 8-bit lane = *src 379 380 void ld24s(V dst, X src); // deinterleave(dst,dst+1) = 256-bit *src 381 void ld44s(V dst, X src); // deinterleave(dst,dst+1,dst+2,dst+3) = 512-bit *src 382 void st24s(V src, X dst); // 256-bit *dst = interleave_32bit_lanes(src,src+1) 383 void st44s(V src, X dst); // 512-bit *dst = interleave_32bit_lanes(src,src+1,src+2,src+3) 384 385 void ld24s(V dst, X src, int lane); // Load 2 32-bit values into given lane of dst..dst+1 386 void ld44s(V dst, X src, int lane); // Load 4 32-bit values into given lane of dst..dst+3 387 388 private: 389 uint8_t* fCode; 390 size_t fSize; 391 392 // x86-64 393 enum W { W0, W1 }; // Are the lanes 64-bit (W1) or default (W0)? Intel Vol 2A 2.3.5.5 394 enum L { L128, L256 }; // Is this a 128- or 256-bit operation? Intel Vol 2A 2.3.6.2 395 396 // Helpers for vector instructions. 397 void op(int prefix, int map, int opcode, int dst, int x, Operand y, W,L); 398 void op(int p, int m, int o, Ymm d, Ymm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L256); } 399 void op(int p, int m, int o, Ymm d, Operand y, W w=W0) { op(p,m,o, d,0,y,w,L256); } 400 void op(int p, int m, int o, Xmm d, Xmm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L128); } 401 void op(int p, int m, int o, Xmm d, Operand y, W w=W0) { op(p,m,o, d,0,y,w,L128); } 402 403 // Helpers for GP64 instructions. 404 void op(int opcode, Operand dst, GP64 x); 405 void op(int opcode, int opcode_ext, Operand dst, int imm); 406 407 void jump(uint8_t condition, Label*); 408 int disp32(Label*); 409 void imm_byte_after_operand(const Operand&, int byte); 410 411 // aarch64 412 413 // Opcode for 3-arguments ops is split between hi and lo: 414 // [11 bits hi] [5 bits m] [6 bits lo] [5 bits n] [5 bits d] 415 void op(uint32_t hi, V m, uint32_t lo, V n, V d); 416 417 // 0,1,2-argument ops, with or without an immediate: 418 // [ 22 bits op ] [5 bits n] [5 bits d] 419 // Any immediate falls in the middle somewhere overlapping with either op, n, or both. 420 void op(uint32_t op22, V n, V d, int imm=0); 421 void op(uint32_t op22, X n, V d, int imm=0) { this->op(op22,(V)n, d,imm); } 422 void op(uint32_t op22, V n, X d, int imm=0) { this->op(op22, n,(V)d,imm); } 423 void op(uint32_t op22, X n, X d, int imm=0) { this->op(op22,(V)n,(V)d,imm); } 424 void op(uint32_t op22, int imm=0) { this->op(op22,(V)0,(V)0,imm); } 425 // (1-argument ops don't seem to have a consistent convention of passing as n or d.) 426 427 428 // Order matters... value is 4-bit encoding for condition code. 429 enum class Condition { eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,al }; 430 void b(Condition, Label*); 431 int disp19(Label*); 432 }; 433 434 // Order matters a little: Ops <=store128 are treated as having side effects. 435 #define SKVM_OPS(M) \ 436 M(assert_true) \ 437 M(trace_line) M(trace_var) M(trace_call) \ 438 M(store8) M(store16) M(store32) M(store64) M(store128) \ 439 M(load8) M(load16) M(load32) M(load64) M(load128) \ 440 M(index) \ 441 M(gather8) M(gather16) M(gather32) \ 442 M(uniform32) \ 443 M(array32) \ 444 M(splat) \ 445 M(add_f32) M(add_i32) \ 446 M(sub_f32) M(sub_i32) \ 447 M(mul_f32) M(mul_i32) \ 448 M(div_f32) \ 449 M(min_f32) M(max_f32) \ 450 M(fma_f32) M(fms_f32) M(fnma_f32) \ 451 M(sqrt_f32) \ 452 M(shl_i32) M(shr_i32) M(sra_i32) \ 453 M(ceil) M(floor) M(trunc) M(round) M(to_fp16) M(from_fp16) \ 454 M(to_f32) \ 455 M(neq_f32) M(eq_f32) M(eq_i32) \ 456 M(gte_f32) M(gt_f32) M(gt_i32) \ 457 M(bit_and) M(bit_or) M(bit_xor) M(bit_clear) \ 458 M(select) 459 // End of SKVM_OPS 460 461 enum class Op : int { 462 #define M(op) op, 463 SKVM_OPS(M) 464 #undef M 465 }; 466 has_side_effect(Op op)467 static inline bool has_side_effect(Op op) { 468 return op <= Op::store128; 469 } touches_varying_memory(Op op)470 static inline bool touches_varying_memory(Op op) { 471 return Op::store8 <= op && op <= Op::load128; 472 } is_always_varying(Op op)473 static inline bool is_always_varying(Op op) { 474 return Op::store8 <= op && op <= Op::index; 475 } is_trace(Op op)476 static inline bool is_trace(Op op) { 477 return Op::trace_line <= op && op <= Op::trace_call; 478 } 479 480 using Val = int; 481 // We reserve an impossibe Val ID as a sentinel 482 // NA meaning none, n/a, null, nil, etc. 483 static const Val NA = -1; 484 485 // Ptr and UPtr are an index into the registers args[]. The two styles of using args are 486 // varyings and uniforms. Varyings use Ptr, have a stride associated with them, and are 487 // evaluated everytime through the loop. Uniforms use UPtr, don't have a stride, and are 488 // usually hoisted above the loop. 489 struct Ptr { 490 Ptr() = default; PtrPtr491 Ptr(int ix_) : ix(ix_) {} 492 int ix; 493 }; 494 struct UPtr : public Ptr { 495 UPtr() = default; UPtrUPtr496 UPtr(int ix_) : Ptr(ix_) {} 497 }; 498 499 bool operator!=(Ptr a, Ptr b); 500 501 struct I32 { 502 Builder* builder = nullptr; 503 Val id = NA; 504 explicit operator bool() const { return id != NA; } 505 Builder* operator->() const { return builder; } 506 }; 507 508 struct F32 { 509 Builder* builder = nullptr; 510 Val id = NA; 511 explicit operator bool() const { return id != NA; } 512 Builder* operator->() const { return builder; } 513 }; 514 515 struct Color { 516 F32 r,g,b,a; 517 explicit operator bool() const { return r && g && b && a; } 518 Builder* operator->() const { return a.operator->(); } 519 }; 520 521 struct HSLA { 522 F32 h,s,l,a; 523 explicit operator bool() const { return h && s && l && a; } 524 Builder* operator->() const { return a.operator->(); } 525 }; 526 527 struct Coord { 528 F32 x,y; 529 explicit operator bool() const { return x && y; } 530 Builder* operator->() const { return x.operator->(); } 531 }; 532 533 struct Uniform { 534 UPtr ptr; 535 int offset; 536 }; 537 struct Uniforms { 538 UPtr base; 539 std::vector<int> buf; 540 UniformsUniforms541 Uniforms(UPtr ptr, int init) : base(ptr), buf(init) {} 542 pushUniforms543 Uniform push(int val) { 544 buf.push_back(val); 545 return {base, (int)( sizeof(int)*(buf.size() - 1) )}; 546 } 547 pushFUniforms548 Uniform pushF(float val) { 549 int bits; 550 memcpy(&bits, &val, sizeof(int)); 551 return this->push(bits); 552 } 553 pushPtrUniforms554 Uniform pushPtr(const void* ptr) { 555 // Jam the pointer into 1 or 2 ints. 556 int ints[sizeof(ptr) / sizeof(int)]; 557 memcpy(ints, &ptr, sizeof(ptr)); 558 for (int bits : ints) { 559 buf.push_back(bits); 560 } 561 return {base, (int)( sizeof(int)*(buf.size() - SK_ARRAY_COUNT(ints)) )}; 562 } 563 pushArrayUniforms564 Uniform pushArray(int32_t a[]) { 565 return this->pushPtr(a); 566 } 567 pushArrayFUniforms568 Uniform pushArrayF(float a[]) { 569 return this->pushPtr(a); 570 } 571 }; 572 573 struct PixelFormat { 574 enum { UNORM, SRGB, FLOAT} encoding; 575 int r_bits, g_bits, b_bits, a_bits, 576 r_shift, g_shift, b_shift, a_shift; 577 }; 578 PixelFormat SkColorType_to_PixelFormat(SkColorType); 579 580 SK_BEGIN_REQUIRE_DENSE 581 struct Instruction { 582 Op op; // v* = op(x,y,z,w,immA,immB), where * == index of this Instruction. 583 Val x,y,z,w; // Enough arguments for Op::store128. 584 int immA,immB,immC; // Immediate bit pattern, shift count, pointer index, byte offset, etc. 585 }; 586 SK_END_REQUIRE_DENSE 587 588 bool operator==(const Instruction&, const Instruction&); 589 struct InstructionHash { 590 uint32_t operator()(const Instruction&, uint32_t seed=0) const; 591 }; 592 593 struct OptimizedInstruction { 594 Op op; 595 Val x,y,z,w; 596 int immA,immB,immC; 597 598 Val death; 599 bool can_hoist; 600 }; 601 602 struct Features { 603 bool fma = false; 604 bool fp16 = false; 605 }; 606 607 class Builder { 608 public: 609 610 Builder(); 611 explicit Builder(Features); 612 613 Program done(const char* debug_name = nullptr, bool allow_jit=true) const; 614 615 // Mostly for debugging, tests, etc. program()616 std::vector<Instruction> program() const { return fProgram; } 617 std::vector<OptimizedInstruction> optimize() const; 618 619 // Convenience arg() wrappers for most common strides, sizeof(T) and 0. 620 template <typename T> varying()621 Ptr varying() { return this->arg(sizeof(T)); } varying(int stride)622 Ptr varying(int stride) { SkASSERT(stride > 0); return this->arg(stride); } uniform()623 UPtr uniform() { Ptr p = this->arg(0); return UPtr{p.ix}; } 624 625 // TODO: allow uniform (i.e. Ptr) offsets to store* and load*? 626 // TODO: sign extension (signed types) for <32-bit loads? 627 // TODO: unsigned integer operations where relevant (just comparisons?)? 628 629 // Assert cond is true, printing debug when not. 630 void assert_true(I32 cond, I32 debug); assert_true(I32 cond,F32 debug)631 void assert_true(I32 cond, F32 debug) { assert_true(cond, pun_to_I32(debug)); } assert_true(I32 cond)632 void assert_true(I32 cond) { assert_true(cond, cond); } 633 634 // Insert debug traces into the instruction stream 635 void trace_line(I32 mask, int line); 636 void trace_var(I32 mask, int slot, I32 val); 637 void trace_var(I32 mask, int slot, F32 val); 638 void trace_var(I32 mask, int slot, bool b); 639 void trace_call_enter(I32 mask, int line); 640 void trace_call_exit(I32 mask, int line); 641 642 // Store {8,16,32,64,128}-bit varying. 643 void store8 (Ptr ptr, I32 val); 644 void store16 (Ptr ptr, I32 val); 645 void store32 (Ptr ptr, I32 val); storeF(Ptr ptr,F32 val)646 void storeF (Ptr ptr, F32 val) { store32(ptr, pun_to_I32(val)); } 647 void store64 (Ptr ptr, I32 lo, I32 hi); // *ptr = lo|(hi<<32) 648 void store128(Ptr ptr, I32 x, I32 y, I32 z, I32 w); // *ptr = x|(y<<32)|(z<<64)|(w<<96) 649 650 // Returns varying {n, n-1, n-2, ..., 1}, where n is the argument to Program::eval(). 651 I32 index(); 652 653 // Load {8,16,32,64,128}-bit varying. 654 I32 load8 (Ptr ptr); 655 I32 load16 (Ptr ptr); 656 I32 load32 (Ptr ptr); loadF(Ptr ptr)657 F32 loadF (Ptr ptr) { return pun_to_F32(load32(ptr)); } 658 I32 load64 (Ptr ptr, int lane); // Load 32-bit lane 0-1 of 64-bit value. 659 I32 load128(Ptr ptr, int lane); // Load 32-bit lane 0-3 of 128-bit value. 660 661 // Load i32/f32 uniform with byte-count offset. 662 I32 uniform32(UPtr ptr, int offset); uniformF(UPtr ptr,int offset)663 F32 uniformF (UPtr ptr, int offset) { return pun_to_F32(uniform32(ptr,offset)); } 664 665 // Load i32/f32 uniform with byte-count offset and an c-style array index. The address of 666 // the element is (*(ptr + byte-count offset))[index]. 667 I32 array32 (UPtr ptr, int offset, int index); arrayF(UPtr ptr,int offset,int index)668 F32 arrayF (UPtr ptr, int offset, int index) { 669 return pun_to_F32(array32(ptr, offset, index)); 670 } 671 672 // Push and load this color as a uniform. 673 Color uniformColor(SkColor4f, Uniforms*); 674 675 // Gather u8,u16,i32 with varying element-count index from *(ptr + byte-count offset). 676 I32 gather8 (UPtr ptr, int offset, I32 index); 677 I32 gather16(UPtr ptr, int offset, I32 index); 678 I32 gather32(UPtr ptr, int offset, I32 index); gatherF(UPtr ptr,int offset,I32 index)679 F32 gatherF (UPtr ptr, int offset, I32 index) { 680 return pun_to_F32(gather32(ptr, offset, index)); 681 } 682 683 // Convenience methods for working with skvm::Uniform(s). uniform32(Uniform u)684 I32 uniform32(Uniform u) { return this->uniform32(u.ptr, u.offset); } uniformF(Uniform u)685 F32 uniformF (Uniform u) { return this->uniformF (u.ptr, u.offset); } gather8(Uniform u,I32 index)686 I32 gather8 (Uniform u, I32 index) { return this->gather8 (u.ptr, u.offset, index); } gather16(Uniform u,I32 index)687 I32 gather16 (Uniform u, I32 index) { return this->gather16 (u.ptr, u.offset, index); } gather32(Uniform u,I32 index)688 I32 gather32 (Uniform u, I32 index) { return this->gather32 (u.ptr, u.offset, index); } gatherF(Uniform u,I32 index)689 F32 gatherF (Uniform u, I32 index) { return this->gatherF (u.ptr, u.offset, index); } 690 691 // Convenience methods for working with array pointers in skvm::Uniforms. Index is an 692 // array index and not a byte offset. The array pointer is stored at u. array32(Uniform a,int index)693 I32 array32 (Uniform a, int index) { return this->array32 (a.ptr, a.offset, index); } arrayF(Uniform a,int index)694 F32 arrayF (Uniform a, int index) { return this->arrayF (a.ptr, a.offset, index); } 695 696 // Load an immediate constant. 697 I32 splat(int n); splat(unsigned u)698 I32 splat(unsigned u) { return splat((int)u); } splat(float f)699 F32 splat(float f) { 700 int bits; 701 memcpy(&bits, &f, 4); 702 return pun_to_F32(splat(bits)); 703 } 704 705 // Some operations make sense with immediate arguments, 706 // so we provide overloads inline to make that seamless. 707 // 708 // We omit overloads that may indicate a bug or performance issue. 709 // In general it does not make sense to pass immediates to unary operations, 710 // and even sometimes not for binary operations, e.g. 711 // 712 // div(x, y) -- normal every day divide 713 // div(3.0f, y) -- yep, makes sense 714 // div(x, 3.0f) -- omitted as a reminder you probably want mul(x, 1/3.0f). 715 // 716 // You can of course always splat() to override these opinions. 717 718 // float math, comparisons, etc. 719 F32 add(F32, F32); add(F32 x,float y)720 F32 add(F32 x, float y) { return add(x, splat(y)); } add(float x,F32 y)721 F32 add(float x, F32 y) { return add(splat(x), y); } 722 723 F32 sub(F32, F32); sub(F32 x,float y)724 F32 sub(F32 x, float y) { return sub(x, splat(y)); } sub(float x,F32 y)725 F32 sub(float x, F32 y) { return sub(splat(x), y); } 726 727 F32 mul(F32, F32); mul(F32 x,float y)728 F32 mul(F32 x, float y) { return mul(x, splat(y)); } mul(float x,F32 y)729 F32 mul(float x, F32 y) { return mul(splat(x), y); } 730 731 // mul(), but allowing optimizations not strictly legal under IEEE-754 rules. 732 F32 fast_mul(F32, F32); fast_mul(F32 x,float y)733 F32 fast_mul(F32 x, float y) { return fast_mul(x, splat(y)); } fast_mul(float x,F32 y)734 F32 fast_mul(float x, F32 y) { return fast_mul(splat(x), y); } 735 736 F32 div(F32, F32); div(float x,F32 y)737 F32 div(float x, F32 y) { return div(splat(x), y); } 738 739 F32 min(F32, F32); min(F32 x,float y)740 F32 min(F32 x, float y) { return min(x, splat(y)); } min(float x,F32 y)741 F32 min(float x, F32 y) { return min(splat(x), y); } 742 743 F32 max(F32, F32); max(F32 x,float y)744 F32 max(F32 x, float y) { return max(x, splat(y)); } max(float x,F32 y)745 F32 max(float x, F32 y) { return max(splat(x), y); } 746 747 // TODO: remove mad()? It's just sugar. mad(F32 x,F32 y,F32 z)748 F32 mad(F32 x, F32 y, F32 z) { return add(mul(x,y), z); } mad(F32 x,F32 y,float z)749 F32 mad(F32 x, F32 y, float z) { return mad( x , y , splat(z)); } mad(F32 x,float y,F32 z)750 F32 mad(F32 x, float y, F32 z) { return mad( x , splat(y), z ); } mad(F32 x,float y,float z)751 F32 mad(F32 x, float y, float z) { return mad( x , splat(y), splat(z)); } mad(float x,F32 y,F32 z)752 F32 mad(float x, F32 y, F32 z) { return mad(splat(x), y , z ); } mad(float x,F32 y,float z)753 F32 mad(float x, F32 y, float z) { return mad(splat(x), y , splat(z)); } mad(float x,float y,F32 z)754 F32 mad(float x, float y, F32 z) { return mad(splat(x), splat(y), z ); } 755 756 F32 sqrt(F32); 757 F32 approx_log2(F32); 758 F32 approx_pow2(F32); approx_log(F32 x)759 F32 approx_log (F32 x) { return mul(0.69314718f, approx_log2(x)); } approx_exp(F32 x)760 F32 approx_exp (F32 x) { return approx_pow2(mul(x, 1.4426950408889634074f)); } 761 762 F32 approx_powf(F32 base, F32 exp); approx_powf(F32 base,float exp)763 F32 approx_powf(F32 base, float exp) { return approx_powf(base, splat(exp)); } approx_powf(float base,F32 exp)764 F32 approx_powf(float base, F32 exp) { return approx_powf(splat(base), exp); } 765 766 767 F32 approx_sin(F32 radians); approx_cos(F32 radians)768 F32 approx_cos(F32 radians) { return approx_sin(add(radians, SK_ScalarPI/2)); } 769 F32 approx_tan(F32 radians); 770 771 F32 approx_asin(F32 x); approx_acos(F32 x)772 F32 approx_acos(F32 x) { return sub(SK_ScalarPI/2, approx_asin(x)); } 773 F32 approx_atan(F32 x); 774 F32 approx_atan2(F32 y, F32 x); 775 776 F32 lerp(F32 lo, F32 hi, F32 t); lerp(F32 lo,F32 hi,float t)777 F32 lerp(F32 lo, F32 hi, float t) { return lerp( lo , hi , splat(t)); } lerp(F32 lo,float hi,float t)778 F32 lerp(F32 lo, float hi, float t) { return lerp( lo , splat(hi), splat(t)); } lerp(F32 lo,float hi,F32 t)779 F32 lerp(F32 lo, float hi, F32 t) { return lerp( lo , splat(hi), t ); } lerp(float lo,F32 hi,F32 t)780 F32 lerp(float lo, F32 hi, F32 t) { return lerp(splat(lo), hi , t ); } lerp(float lo,F32 hi,float t)781 F32 lerp(float lo, F32 hi, float t) { return lerp(splat(lo), hi , splat(t)); } lerp(float lo,float hi,F32 t)782 F32 lerp(float lo, float hi, F32 t) { return lerp(splat(lo), splat(hi), t ); } 783 clamp(F32 x,F32 lo,F32 hi)784 F32 clamp(F32 x, F32 lo, F32 hi) { return max(lo, min(x, hi)); } clamp(F32 x,F32 lo,float hi)785 F32 clamp(F32 x, F32 lo, float hi) { return clamp( x , lo , splat(hi)); } clamp(F32 x,float lo,float hi)786 F32 clamp(F32 x, float lo, float hi) { return clamp( x , splat(lo), splat(hi)); } clamp(F32 x,float lo,F32 hi)787 F32 clamp(F32 x, float lo, F32 hi) { return clamp( x , splat(lo), hi ); } clamp(float x,F32 lo,F32 hi)788 F32 clamp(float x, F32 lo, F32 hi) { return clamp(splat(x), lo , hi ); } clamp(float x,F32 lo,float hi)789 F32 clamp(float x, F32 lo, float hi) { return clamp(splat(x), lo , splat(hi)); } clamp(float x,float lo,F32 hi)790 F32 clamp(float x, float lo, F32 hi) { return clamp(splat(x), splat(lo), hi ); } 791 clamp01(F32 x)792 F32 clamp01(F32 x) { return clamp(x, 0.0f, 1.0f); } 793 abs(F32 x)794 F32 abs(F32 x) { return pun_to_F32(bit_and(pun_to_I32(x), 0x7fff'ffff)); } 795 F32 fract(F32 x) { return sub(x, floor(x)); } 796 F32 ceil(F32); 797 F32 floor(F32); 798 I32 is_NaN (F32 x) { return neq(x,x); } 799 I32 is_finite(F32 x) { return lt(bit_and(pun_to_I32(x), 0x7f80'0000), 0x7f80'0000); } 800 801 I32 trunc(F32 x); 802 I32 round(F32 x); // Round to int using current rounding mode (as if lrintf()). 803 I32 pun_to_I32(F32 x) { return {x.builder, x.id}; } 804 805 I32 to_fp16(F32 x); 806 F32 from_fp16(I32 x); 807 808 I32 eq(F32, F32); 809 I32 eq(F32 x, float y) { return eq(x, splat(y)); } 810 I32 eq(float x, F32 y) { return eq(splat(x), y); } 811 812 I32 neq(F32, F32); 813 I32 neq(F32 x, float y) { return neq(x, splat(y)); } 814 I32 neq(float x, F32 y) { return neq(splat(x), y); } 815 816 I32 lt(F32, F32); 817 I32 lt(F32 x, float y) { return lt(x, splat(y)); } 818 I32 lt(float x, F32 y) { return lt(splat(x), y); } 819 820 I32 lte(F32, F32); 821 I32 lte(F32 x, float y) { return lte(x, splat(y)); } 822 I32 lte(float x, F32 y) { return lte(splat(x), y); } 823 824 I32 gt(F32, F32); 825 I32 gt(F32 x, float y) { return gt(x, splat(y)); } 826 I32 gt(float x, F32 y) { return gt(splat(x), y); } 827 828 I32 gte(F32, F32); 829 I32 gte(F32 x, float y) { return gte(x, splat(y)); } 830 I32 gte(float x, F32 y) { return gte(splat(x), y); } 831 832 // int math, comparisons, etc. 833 I32 add(I32, I32); 834 I32 add(I32 x, int y) { return add(x, splat(y)); } 835 I32 add(int x, I32 y) { return add(splat(x), y); } 836 837 I32 sub(I32, I32); 838 I32 sub(I32 x, int y) { return sub(x, splat(y)); } 839 I32 sub(int x, I32 y) { return sub(splat(x), y); } 840 841 I32 mul(I32, I32); 842 I32 mul(I32 x, int y) { return mul(x, splat(y)); } 843 I32 mul(int x, I32 y) { return mul(splat(x), y); } 844 845 I32 shl(I32 x, int bits); 846 I32 shr(I32 x, int bits); 847 I32 sra(I32 x, int bits); 848 849 I32 eq(I32, I32); 850 I32 eq(I32 x, int y) { return eq(x, splat(y)); } 851 I32 eq(int x, I32 y) { return eq(splat(x), y); } 852 853 I32 neq(I32, I32); 854 I32 neq(I32 x, int y) { return neq(x, splat(y)); } 855 I32 neq(int x, I32 y) { return neq(splat(x), y); } 856 857 I32 lt(I32, I32); 858 I32 lt(I32 x, int y) { return lt(x, splat(y)); } 859 I32 lt(int x, I32 y) { return lt(splat(x), y); } 860 861 I32 lte(I32, I32); 862 I32 lte(I32 x, int y) { return lte(x, splat(y)); } 863 I32 lte(int x, I32 y) { return lte(splat(x), y); } 864 865 I32 gt(I32, I32); 866 I32 gt(I32 x, int y) { return gt(x, splat(y)); } 867 I32 gt(int x, I32 y) { return gt(splat(x), y); } 868 869 I32 gte(I32, I32); 870 I32 gte(I32 x, int y) { return gte(x, splat(y)); } 871 I32 gte(int x, I32 y) { return gte(splat(x), y); } 872 873 F32 to_F32(I32 x); 874 F32 pun_to_F32(I32 x) { return {x.builder, x.id}; } 875 876 // Bitwise operations. 877 I32 bit_and(I32, I32); 878 I32 bit_and(I32 x, int y) { return bit_and(x, splat(y)); } 879 I32 bit_and(int x, I32 y) { return bit_and(splat(x), y); } 880 881 I32 bit_or(I32, I32); 882 I32 bit_or(I32 x, int y) { return bit_or(x, splat(y)); } 883 I32 bit_or(int x, I32 y) { return bit_or(splat(x), y); } 884 885 I32 bit_xor(I32, I32); 886 I32 bit_xor(I32 x, int y) { return bit_xor(x, splat(y)); } 887 I32 bit_xor(int x, I32 y) { return bit_xor(splat(x), y); } 888 889 I32 bit_clear(I32, I32); 890 I32 bit_clear(I32 x, int y) { return bit_clear(x, splat(y)); } 891 I32 bit_clear(int x, I32 y) { return bit_clear(splat(x), y); } 892 893 I32 min(I32 x, I32 y) { return select(lte(x,y), x, y); } 894 I32 min(I32 x, int y) { return min(x, splat(y)); } 895 I32 min(int x, I32 y) { return min(splat(x), y); } 896 897 I32 max(I32 x, I32 y) { return select(gte(x,y), x, y); } 898 I32 max(I32 x, int y) { return max(x, splat(y)); } 899 I32 max(int x, I32 y) { return max(splat(x), y); } 900 901 I32 select(I32 cond, I32 t, I32 f); // cond ? t : f 902 I32 select(I32 cond, int t, I32 f) { return select(cond, splat(t), f ); } 903 I32 select(I32 cond, I32 t, int f) { return select(cond, t , splat(f)); } 904 I32 select(I32 cond, int t, int f) { return select(cond, splat(t), splat(f)); } 905 906 F32 select(I32 cond, F32 t, F32 f) { 907 return pun_to_F32(select(cond, pun_to_I32(t) 908 , pun_to_I32(f))); 909 } 910 F32 select(I32 cond, float t, F32 f) { return select(cond, splat(t), f ); } 911 F32 select(I32 cond, F32 t, float f) { return select(cond, t , splat(f)); } 912 F32 select(I32 cond, float t, float f) { return select(cond, splat(t), splat(f)); } 913 914 I32 extract(I32 x, int bits, I32 z); // (x>>bits) & z 915 I32 extract(I32 x, int bits, int z) { return extract(x, bits, splat(z)); } 916 I32 extract(int x, int bits, I32 z) { return extract(splat(x), bits, z); } 917 918 I32 pack(I32 x, I32 y, int bits); // x | (y<<bits) 919 I32 pack(I32 x, int y, int bits) { return pack(x, splat(y), bits); } 920 I32 pack(int x, I32 y, int bits) { return pack(splat(x), y, bits); } 921 922 923 // Common idioms used in several places, worth centralizing for consistency. 924 F32 from_unorm(int bits, I32); // E.g. from_unorm(8, x) -> x * (1/255.0f) 925 I32 to_unorm(int bits, F32); // E.g. to_unorm(8, x) -> round(x * 255) 926 927 Color load(PixelFormat, Ptr ptr); 928 void store(PixelFormat, Ptr ptr, Color); 929 Color gather(PixelFormat, UPtr ptr, int offset, I32 index); 930 Color gather(PixelFormat f, Uniform u, I32 index) { 931 return gather(f, u.ptr, u.offset, index); 932 } 933 934 void premul(F32* r, F32* g, F32* b, F32 a); 935 void unpremul(F32* r, F32* g, F32* b, F32 a); 936 937 Color premul(Color c) { this->premul(&c.r, &c.g, &c.b, c.a); return c; } 938 Color unpremul(Color c) { this->unpremul(&c.r, &c.g, &c.b, c.a); return c; } 939 940 Color lerp(Color lo, Color hi, F32 t); 941 Color blend(SkBlendMode, Color src, Color dst); 942 943 Color clamp01(Color c) { 944 return { clamp01(c.r), clamp01(c.g), clamp01(c.b), clamp01(c.a) }; 945 } 946 947 HSLA to_hsla(Color); 948 Color to_rgba(HSLA); 949 950 void dump(SkWStream* = nullptr) const; 951 952 uint64_t hash() const; 953 954 Val push(Instruction); 955 956 bool allImm() const { return true; } 957 958 template <typename T, typename... Rest> 959 bool allImm(Val id, T* imm, Rest... rest) const { 960 if (fProgram[id].op == Op::splat) { 961 static_assert(sizeof(T) == 4); 962 memcpy(imm, &fProgram[id].immA, 4); 963 return this->allImm(rest...); 964 } 965 return false; 966 } 967 968 bool allUniform() const { return true; } 969 970 template <typename... Rest> 971 bool allUniform(Val id, Uniform* uni, Rest... rest) const { 972 if (fProgram[id].op == Op::uniform32) { 973 uni->ptr.ix = fProgram[id].immA; 974 uni->offset = fProgram[id].immB; 975 return this->allUniform(rest...); 976 } 977 return false; 978 } 979 980 private: 981 // Declare an argument with given stride (use stride=0 for uniforms). 982 Ptr arg(int stride); 983 984 Val push( 985 Op op, Val x=NA, Val y=NA, Val z=NA, Val w=NA, int immA=0, int immB=0, int immC=0) { 986 return this->push(Instruction{op, x,y,z,w, immA,immB,immC}); 987 } 988 989 template <typename T> 990 bool isImm(Val id, T want) const { 991 T imm = 0; 992 return this->allImm(id, &imm) && imm == want; 993 } 994 995 SkTHashMap<Instruction, Val, InstructionHash> fIndex; 996 std::vector<Instruction> fProgram; 997 std::vector<int> fStrides; 998 const Features fFeatures; 999 }; 1000 1001 // Optimization passes and data structures normally used by Builder::optimize(), 1002 // extracted here so they can be unit tested. 1003 std::vector<Instruction> eliminate_dead_code(std::vector<Instruction>); 1004 std::vector<OptimizedInstruction> finalize (std::vector<Instruction>); 1005 1006 using Reg = int; 1007 1008 // d = op(x,y,z,w, immA,immB) 1009 struct InterpreterInstruction { 1010 Op op; 1011 Reg d,x,y,z,w; 1012 int immA,immB,immC; 1013 }; 1014 1015 class Program { 1016 public: 1017 Program(const std::vector<OptimizedInstruction>& instructions, 1018 const std::vector<int>& strides, 1019 const char* debug_name, bool allow_jit); 1020 1021 Program(); 1022 ~Program(); 1023 1024 Program(Program&&); 1025 Program& operator=(Program&&); 1026 1027 Program(const Program&) = delete; 1028 Program& operator=(const Program&) = delete; 1029 1030 void eval(int n, void* args[]) const; 1031 1032 template <typename... T> 1033 void eval(int n, T*... arg) const { 1034 SkASSERT(sizeof...(arg) == this->nargs()); 1035 // This nullptr isn't important except that it makes args[] non-empty if you pass none. 1036 void* args[] = { (void*)arg..., nullptr }; 1037 this->eval(n, args); 1038 } 1039 1040 std::vector<InterpreterInstruction> instructions() const; 1041 int nargs() const; 1042 int nregs() const; 1043 int loop () const; 1044 bool empty() const; 1045 1046 bool hasJIT() const; // Has this Program been JITted? 1047 1048 void dump(SkWStream* = nullptr) const; 1049 void disassemble(SkWStream* = nullptr) const; 1050 1051 private: 1052 void setupInterpreter(const std::vector<OptimizedInstruction>&); 1053 void setupJIT (const std::vector<OptimizedInstruction>&, const char* debug_name); 1054 void setupLLVM (const std::vector<OptimizedInstruction>&, const char* debug_name); 1055 1056 bool jit(const std::vector<OptimizedInstruction>&, 1057 int* stack_hint, uint32_t* registers_used, 1058 Assembler*) const; 1059 1060 void waitForLLVM() const; 1061 void dropJIT(); 1062 1063 struct Impl; 1064 std::unique_ptr<Impl> fImpl; 1065 }; 1066 1067 // TODO: control flow 1068 // TODO: 64-bit values? 1069 1070 #define SI static inline 1071 1072 SI I32 operator+(I32 x, I32 y) { return x->add(x,y); } 1073 SI I32 operator+(I32 x, int y) { return x->add(x,y); } 1074 SI I32 operator+(int x, I32 y) { return y->add(x,y); } 1075 1076 SI I32 operator-(I32 x, I32 y) { return x->sub(x,y); } 1077 SI I32 operator-(I32 x, int y) { return x->sub(x,y); } 1078 SI I32 operator-(int x, I32 y) { return y->sub(x,y); } 1079 1080 SI I32 operator*(I32 x, I32 y) { return x->mul(x,y); } 1081 SI I32 operator*(I32 x, int y) { return x->mul(x,y); } 1082 SI I32 operator*(int x, I32 y) { return y->mul(x,y); } 1083 min(I32 x,I32 y)1084 SI I32 min(I32 x, I32 y) { return x->min(x,y); } min(I32 x,int y)1085 SI I32 min(I32 x, int y) { return x->min(x,y); } min(int x,I32 y)1086 SI I32 min(int x, I32 y) { return y->min(x,y); } 1087 max(I32 x,I32 y)1088 SI I32 max(I32 x, I32 y) { return x->max(x,y); } max(I32 x,int y)1089 SI I32 max(I32 x, int y) { return x->max(x,y); } max(int x,I32 y)1090 SI I32 max(int x, I32 y) { return y->max(x,y); } 1091 1092 SI I32 operator==(I32 x, I32 y) { return x->eq(x,y); } 1093 SI I32 operator==(I32 x, int y) { return x->eq(x,y); } 1094 SI I32 operator==(int x, I32 y) { return y->eq(x,y); } 1095 1096 SI I32 operator!=(I32 x, I32 y) { return x->neq(x,y); } 1097 SI I32 operator!=(I32 x, int y) { return x->neq(x,y); } 1098 SI I32 operator!=(int x, I32 y) { return y->neq(x,y); } 1099 1100 SI I32 operator< (I32 x, I32 y) { return x->lt(x,y); } 1101 SI I32 operator< (I32 x, int y) { return x->lt(x,y); } 1102 SI I32 operator< (int x, I32 y) { return y->lt(x,y); } 1103 1104 SI I32 operator<=(I32 x, I32 y) { return x->lte(x,y); } 1105 SI I32 operator<=(I32 x, int y) { return x->lte(x,y); } 1106 SI I32 operator<=(int x, I32 y) { return y->lte(x,y); } 1107 1108 SI I32 operator> (I32 x, I32 y) { return x->gt(x,y); } 1109 SI I32 operator> (I32 x, int y) { return x->gt(x,y); } 1110 SI I32 operator> (int x, I32 y) { return y->gt(x,y); } 1111 1112 SI I32 operator>=(I32 x, I32 y) { return x->gte(x,y); } 1113 SI I32 operator>=(I32 x, int y) { return x->gte(x,y); } 1114 SI I32 operator>=(int x, I32 y) { return y->gte(x,y); } 1115 1116 1117 SI F32 operator+(F32 x, F32 y) { return x->add(x,y); } 1118 SI F32 operator+(F32 x, float y) { return x->add(x,y); } 1119 SI F32 operator+(float x, F32 y) { return y->add(x,y); } 1120 1121 SI F32 operator-(F32 x, F32 y) { return x->sub(x,y); } 1122 SI F32 operator-(F32 x, float y) { return x->sub(x,y); } 1123 SI F32 operator-(float x, F32 y) { return y->sub(x,y); } 1124 1125 SI F32 operator*(F32 x, F32 y) { return x->mul(x,y); } 1126 SI F32 operator*(F32 x, float y) { return x->mul(x,y); } 1127 SI F32 operator*(float x, F32 y) { return y->mul(x,y); } 1128 fast_mul(F32 x,F32 y)1129 SI F32 fast_mul(F32 x, F32 y) { return x->fast_mul(x,y); } fast_mul(F32 x,float y)1130 SI F32 fast_mul(F32 x, float y) { return x->fast_mul(x,y); } fast_mul(float x,F32 y)1131 SI F32 fast_mul(float x, F32 y) { return y->fast_mul(x,y); } 1132 1133 SI F32 operator/(F32 x, F32 y) { return x->div(x,y); } 1134 SI F32 operator/(float x, F32 y) { return y->div(x,y); } 1135 min(F32 x,F32 y)1136 SI F32 min(F32 x, F32 y) { return x->min(x,y); } min(F32 x,float y)1137 SI F32 min(F32 x, float y) { return x->min(x,y); } min(float x,F32 y)1138 SI F32 min(float x, F32 y) { return y->min(x,y); } 1139 max(F32 x,F32 y)1140 SI F32 max(F32 x, F32 y) { return x->max(x,y); } max(F32 x,float y)1141 SI F32 max(F32 x, float y) { return x->max(x,y); } max(float x,F32 y)1142 SI F32 max(float x, F32 y) { return y->max(x,y); } 1143 1144 SI I32 operator==(F32 x, F32 y) { return x->eq(x,y); } 1145 SI I32 operator==(F32 x, float y) { return x->eq(x,y); } 1146 SI I32 operator==(float x, F32 y) { return y->eq(x,y); } 1147 1148 SI I32 operator!=(F32 x, F32 y) { return x->neq(x,y); } 1149 SI I32 operator!=(F32 x, float y) { return x->neq(x,y); } 1150 SI I32 operator!=(float x, F32 y) { return y->neq(x,y); } 1151 1152 SI I32 operator< (F32 x, F32 y) { return x->lt(x,y); } 1153 SI I32 operator< (F32 x, float y) { return x->lt(x,y); } 1154 SI I32 operator< (float x, F32 y) { return y->lt(x,y); } 1155 1156 SI I32 operator<=(F32 x, F32 y) { return x->lte(x,y); } 1157 SI I32 operator<=(F32 x, float y) { return x->lte(x,y); } 1158 SI I32 operator<=(float x, F32 y) { return y->lte(x,y); } 1159 1160 SI I32 operator> (F32 x, F32 y) { return x->gt(x,y); } 1161 SI I32 operator> (F32 x, float y) { return x->gt(x,y); } 1162 SI I32 operator> (float x, F32 y) { return y->gt(x,y); } 1163 1164 SI I32 operator>=(F32 x, F32 y) { return x->gte(x,y); } 1165 SI I32 operator>=(F32 x, float y) { return x->gte(x,y); } 1166 SI I32 operator>=(float x, F32 y) { return y->gte(x,y); } 1167 1168 SI I32& operator+=(I32& x, I32 y) { return (x = x + y); } 1169 SI I32& operator+=(I32& x, int y) { return (x = x + y); } 1170 1171 SI I32& operator-=(I32& x, I32 y) { return (x = x - y); } 1172 SI I32& operator-=(I32& x, int y) { return (x = x - y); } 1173 1174 SI I32& operator*=(I32& x, I32 y) { return (x = x * y); } 1175 SI I32& operator*=(I32& x, int y) { return (x = x * y); } 1176 1177 SI F32& operator+=(F32& x, F32 y) { return (x = x + y); } 1178 SI F32& operator+=(F32& x, float y) { return (x = x + y); } 1179 1180 SI F32& operator-=(F32& x, F32 y) { return (x = x - y); } 1181 SI F32& operator-=(F32& x, float y) { return (x = x - y); } 1182 1183 SI F32& operator*=(F32& x, F32 y) { return (x = x * y); } 1184 SI F32& operator*=(F32& x, float y) { return (x = x * y); } 1185 1186 SI F32& operator/=(F32& x, F32 y) { return (x = x / y); } 1187 assert_true(I32 cond,I32 debug)1188 SI void assert_true(I32 cond, I32 debug) { cond->assert_true(cond,debug); } assert_true(I32 cond,F32 debug)1189 SI void assert_true(I32 cond, F32 debug) { cond->assert_true(cond,debug); } assert_true(I32 cond)1190 SI void assert_true(I32 cond) { cond->assert_true(cond); } 1191 store8(Ptr ptr,I32 val)1192 SI void store8 (Ptr ptr, I32 val) { val->store8 (ptr, val); } store16(Ptr ptr,I32 val)1193 SI void store16 (Ptr ptr, I32 val) { val->store16 (ptr, val); } store32(Ptr ptr,I32 val)1194 SI void store32 (Ptr ptr, I32 val) { val->store32 (ptr, val); } storeF(Ptr ptr,F32 val)1195 SI void storeF (Ptr ptr, F32 val) { val->storeF (ptr, val); } store64(Ptr ptr,I32 lo,I32 hi)1196 SI void store64 (Ptr ptr, I32 lo, I32 hi) { lo ->store64 (ptr, lo,hi); } store128(Ptr ptr,I32 x,I32 y,I32 z,I32 w)1197 SI void store128(Ptr ptr, I32 x, I32 y, I32 z, I32 w) { x ->store128(ptr, x,y,z,w); } 1198 gather8(UPtr ptr,int off,I32 ix)1199 SI I32 gather8 (UPtr ptr, int off, I32 ix) { return ix->gather8 (ptr, off, ix); } gather16(UPtr ptr,int off,I32 ix)1200 SI I32 gather16(UPtr ptr, int off, I32 ix) { return ix->gather16(ptr, off, ix); } gather32(UPtr ptr,int off,I32 ix)1201 SI I32 gather32(UPtr ptr, int off, I32 ix) { return ix->gather32(ptr, off, ix); } gatherF(UPtr ptr,int off,I32 ix)1202 SI F32 gatherF (UPtr ptr, int off, I32 ix) { return ix->gatherF (ptr, off, ix); } 1203 gather8(Uniform u,I32 ix)1204 SI I32 gather8 (Uniform u, I32 ix) { return ix->gather8 (u, ix); } gather16(Uniform u,I32 ix)1205 SI I32 gather16(Uniform u, I32 ix) { return ix->gather16(u, ix); } gather32(Uniform u,I32 ix)1206 SI I32 gather32(Uniform u, I32 ix) { return ix->gather32(u, ix); } gatherF(Uniform u,I32 ix)1207 SI F32 gatherF (Uniform u, I32 ix) { return ix->gatherF (u, ix); } 1208 sqrt(F32 x)1209 SI F32 sqrt(F32 x) { return x-> sqrt(x); } approx_log2(F32 x)1210 SI F32 approx_log2(F32 x) { return x->approx_log2(x); } approx_pow2(F32 x)1211 SI F32 approx_pow2(F32 x) { return x->approx_pow2(x); } approx_log(F32 x)1212 SI F32 approx_log (F32 x) { return x->approx_log (x); } approx_exp(F32 x)1213 SI F32 approx_exp (F32 x) { return x->approx_exp (x); } 1214 approx_powf(F32 base,F32 exp)1215 SI F32 approx_powf(F32 base, F32 exp) { return base->approx_powf(base, exp); } approx_powf(F32 base,float exp)1216 SI F32 approx_powf(F32 base, float exp) { return base->approx_powf(base, exp); } approx_powf(float base,F32 exp)1217 SI F32 approx_powf(float base, F32 exp) { return exp->approx_powf(base, exp); } 1218 approx_sin(F32 radians)1219 SI F32 approx_sin(F32 radians) { return radians->approx_sin(radians); } approx_cos(F32 radians)1220 SI F32 approx_cos(F32 radians) { return radians->approx_cos(radians); } approx_tan(F32 radians)1221 SI F32 approx_tan(F32 radians) { return radians->approx_tan(radians); } 1222 approx_asin(F32 x)1223 SI F32 approx_asin(F32 x) { return x->approx_asin(x); } approx_acos(F32 x)1224 SI F32 approx_acos(F32 x) { return x->approx_acos(x); } approx_atan(F32 x)1225 SI F32 approx_atan(F32 x) { return x->approx_atan(x); } approx_atan2(F32 y,F32 x)1226 SI F32 approx_atan2(F32 y, F32 x) { return x->approx_atan2(y, x); } 1227 clamp01(F32 x)1228 SI F32 clamp01(F32 x) { return x-> clamp01(x); } abs(F32 x)1229 SI F32 abs(F32 x) { return x-> abs(x); } ceil(F32 x)1230 SI F32 ceil(F32 x) { return x-> ceil(x); } fract(F32 x)1231 SI F32 fract(F32 x) { return x-> fract(x); } floor(F32 x)1232 SI F32 floor(F32 x) { return x-> floor(x); } is_NaN(F32 x)1233 SI I32 is_NaN(F32 x) { return x-> is_NaN(x); } is_finite(F32 x)1234 SI I32 is_finite(F32 x) { return x->is_finite(x); } 1235 trunc(F32 x)1236 SI I32 trunc(F32 x) { return x-> trunc(x); } round(F32 x)1237 SI I32 round(F32 x) { return x-> round(x); } pun_to_I32(F32 x)1238 SI I32 pun_to_I32(F32 x) { return x-> pun_to_I32(x); } pun_to_F32(I32 x)1239 SI F32 pun_to_F32(I32 x) { return x-> pun_to_F32(x); } to_F32(I32 x)1240 SI F32 to_F32(I32 x) { return x-> to_F32(x); } to_fp16(F32 x)1241 SI I32 to_fp16(F32 x) { return x-> to_fp16(x); } from_fp16(I32 x)1242 SI F32 from_fp16(I32 x) { return x-> from_fp16(x); } 1243 lerp(F32 lo,F32 hi,F32 t)1244 SI F32 lerp(F32 lo, F32 hi, F32 t) { return lo->lerp(lo,hi,t); } lerp(F32 lo,F32 hi,float t)1245 SI F32 lerp(F32 lo, F32 hi, float t) { return lo->lerp(lo,hi,t); } lerp(F32 lo,float hi,F32 t)1246 SI F32 lerp(F32 lo, float hi, F32 t) { return lo->lerp(lo,hi,t); } lerp(F32 lo,float hi,float t)1247 SI F32 lerp(F32 lo, float hi, float t) { return lo->lerp(lo,hi,t); } lerp(float lo,F32 hi,F32 t)1248 SI F32 lerp(float lo, F32 hi, F32 t) { return hi->lerp(lo,hi,t); } lerp(float lo,F32 hi,float t)1249 SI F32 lerp(float lo, F32 hi, float t) { return hi->lerp(lo,hi,t); } lerp(float lo,float hi,F32 t)1250 SI F32 lerp(float lo, float hi, F32 t) { return t->lerp(lo,hi,t); } 1251 clamp(F32 x,F32 lo,F32 hi)1252 SI F32 clamp(F32 x, F32 lo, F32 hi) { return x->clamp(x,lo,hi); } clamp(F32 x,F32 lo,float hi)1253 SI F32 clamp(F32 x, F32 lo, float hi) { return x->clamp(x,lo,hi); } clamp(F32 x,float lo,F32 hi)1254 SI F32 clamp(F32 x, float lo, F32 hi) { return x->clamp(x,lo,hi); } clamp(F32 x,float lo,float hi)1255 SI F32 clamp(F32 x, float lo, float hi) { return x->clamp(x,lo,hi); } clamp(float x,F32 lo,F32 hi)1256 SI F32 clamp(float x, F32 lo, F32 hi) { return lo->clamp(x,lo,hi); } clamp(float x,F32 lo,float hi)1257 SI F32 clamp(float x, F32 lo, float hi) { return lo->clamp(x,lo,hi); } clamp(float x,float lo,F32 hi)1258 SI F32 clamp(float x, float lo, F32 hi) { return hi->clamp(x,lo,hi); } 1259 1260 SI I32 operator<<(I32 x, int bits) { return x->shl(x, bits); } shl(I32 x,int bits)1261 SI I32 shl(I32 x, int bits) { return x->shl(x, bits); } shr(I32 x,int bits)1262 SI I32 shr(I32 x, int bits) { return x->shr(x, bits); } sra(I32 x,int bits)1263 SI I32 sra(I32 x, int bits) { return x->sra(x, bits); } 1264 1265 SI I32 operator&(I32 x, I32 y) { return x->bit_and(x,y); } 1266 SI I32 operator&(I32 x, int y) { return x->bit_and(x,y); } 1267 SI I32 operator&(int x, I32 y) { return y->bit_and(x,y); } 1268 1269 SI I32 operator|(I32 x, I32 y) { return x->bit_or (x,y); } 1270 SI I32 operator|(I32 x, int y) { return x->bit_or (x,y); } 1271 SI I32 operator|(int x, I32 y) { return y->bit_or (x,y); } 1272 1273 SI I32 operator^(I32 x, I32 y) { return x->bit_xor(x,y); } 1274 SI I32 operator^(I32 x, int y) { return x->bit_xor(x,y); } 1275 SI I32 operator^(int x, I32 y) { return y->bit_xor(x,y); } 1276 1277 SI I32& operator&=(I32& x, I32 y) { return (x = x & y); } 1278 SI I32& operator&=(I32& x, int y) { return (x = x & y); } 1279 SI I32& operator|=(I32& x, I32 y) { return (x = x | y); } 1280 SI I32& operator|=(I32& x, int y) { return (x = x | y); } 1281 SI I32& operator^=(I32& x, I32 y) { return (x = x ^ y); } 1282 SI I32& operator^=(I32& x, int y) { return (x = x ^ y); } 1283 bit_clear(I32 x,I32 y)1284 SI I32 bit_clear(I32 x, I32 y) { return x->bit_clear(x,y); } bit_clear(I32 x,int y)1285 SI I32 bit_clear(I32 x, int y) { return x->bit_clear(x,y); } bit_clear(int x,I32 y)1286 SI I32 bit_clear(int x, I32 y) { return y->bit_clear(x,y); } 1287 select(I32 c,I32 t,I32 f)1288 SI I32 select(I32 c, I32 t, I32 f) { return c->select(c, t , f ); } select(I32 c,I32 t,int f)1289 SI I32 select(I32 c, I32 t, int f) { return c->select(c, t , c->splat(f)); } select(I32 c,int t,I32 f)1290 SI I32 select(I32 c, int t, I32 f) { return c->select(c, c->splat(t), f ); } select(I32 c,int t,int f)1291 SI I32 select(I32 c, int t, int f) { return c->select(c, c->splat(t), c->splat(f)); } 1292 select(I32 c,F32 t,F32 f)1293 SI F32 select(I32 c, F32 t, F32 f) { return c->select(c, t , f ); } select(I32 c,F32 t,float f)1294 SI F32 select(I32 c, F32 t, float f) { return c->select(c, t , c->splat(f)); } select(I32 c,float t,F32 f)1295 SI F32 select(I32 c, float t, F32 f) { return c->select(c, c->splat(t), f ); } select(I32 c,float t,float f)1296 SI F32 select(I32 c, float t, float f) { return c->select(c, c->splat(t), c->splat(f)); } 1297 extract(I32 x,int bits,I32 z)1298 SI I32 extract(I32 x, int bits, I32 z) { return x->extract(x,bits,z); } extract(I32 x,int bits,int z)1299 SI I32 extract(I32 x, int bits, int z) { return x->extract(x,bits,z); } extract(int x,int bits,I32 z)1300 SI I32 extract(int x, int bits, I32 z) { return z->extract(x,bits,z); } 1301 pack(I32 x,I32 y,int bits)1302 SI I32 pack(I32 x, I32 y, int bits) { return x->pack (x,y,bits); } pack(I32 x,int y,int bits)1303 SI I32 pack(I32 x, int y, int bits) { return x->pack (x,y,bits); } pack(int x,I32 y,int bits)1304 SI I32 pack(int x, I32 y, int bits) { return y->pack (x,y,bits); } 1305 1306 SI I32 operator~(I32 x) { return ~0 ^ x; } 1307 SI I32 operator-(I32 x) { return 0 - x; } 1308 SI F32 operator-(F32 x) { return 0.0f - x; } 1309 from_unorm(int bits,I32 x)1310 SI F32 from_unorm(int bits, I32 x) { return x->from_unorm(bits,x); } to_unorm(int bits,F32 x)1311 SI I32 to_unorm(int bits, F32 x) { return x-> to_unorm(bits,x); } 1312 store(PixelFormat f,Ptr p,Color c)1313 SI void store(PixelFormat f, Ptr p, Color c) { return c->store(f,p,c); } 1314 gather(PixelFormat f,UPtr p,int off,I32 ix)1315 SI Color gather(PixelFormat f, UPtr p, int off, I32 ix) { return ix->gather(f,p,off,ix); } gather(PixelFormat f,Uniform u,I32 ix)1316 SI Color gather(PixelFormat f, Uniform u , I32 ix) { return ix->gather(f,u,ix); } 1317 premul(F32 * r,F32 * g,F32 * b,F32 a)1318 SI void premul(F32* r, F32* g, F32* b, F32 a) { a-> premul(r,g,b,a); } unpremul(F32 * r,F32 * g,F32 * b,F32 a)1319 SI void unpremul(F32* r, F32* g, F32* b, F32 a) { a->unpremul(r,g,b,a); } 1320 premul(Color c)1321 SI Color premul(Color c) { return c-> premul(c); } unpremul(Color c)1322 SI Color unpremul(Color c) { return c->unpremul(c); } 1323 lerp(Color lo,Color hi,F32 t)1324 SI Color lerp(Color lo, Color hi, F32 t) { return t->lerp(lo,hi,t); } 1325 blend(SkBlendMode m,Color s,Color d)1326 SI Color blend(SkBlendMode m, Color s, Color d) { return s->blend(m,s,d); } 1327 clamp01(Color c)1328 SI Color clamp01(Color c) { return c->clamp01(c); } 1329 to_hsla(Color c)1330 SI HSLA to_hsla(Color c) { return c->to_hsla(c); } to_rgba(HSLA c)1331 SI Color to_rgba(HSLA c) { return c->to_rgba(c); } 1332 1333 // Evaluate polynomials: ax^n + bx^(n-1) + ... for n >= 1 1334 template <typename F32_or_float, typename... Rest> poly(F32 x,F32_or_float a,float b,Rest...rest)1335 SI F32 poly(F32 x, F32_or_float a, float b, Rest... rest) { 1336 if constexpr (sizeof...(rest) == 0) { 1337 return x*a+b; 1338 } else { 1339 return poly(x, x*a+b, rest...); 1340 } 1341 } 1342 #undef SI 1343 } // namespace skvm 1344 1345 #endif//SkVM_DEFINED 1346