1 /* 2 * Copyright 2019 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef SkVM_DEFINED 9 #define SkVM_DEFINED 10 11 #include "include/core/SkTypes.h" 12 #include "include/private/SkMacros.h" 13 #include "include/private/SkTHash.h" 14 #include "src/core/SkVM_fwd.h" 15 #include <vector> // std::vector 16 17 class SkWStream; 18 19 namespace skvm { 20 21 class Assembler { 22 public: 23 explicit Assembler(void* buf); 24 25 size_t size() const; 26 27 // Order matters... GP64, Xmm, Ymm values match 4-bit register encoding for each. 28 enum GP64 { 29 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, 30 r8 , r9 , r10, r11, r12, r13, r14, r15, 31 }; 32 enum Xmm { 33 xmm0, xmm1, xmm2 , xmm3 , xmm4 , xmm5 , xmm6 , xmm7 , 34 xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, 35 }; 36 enum Ymm { 37 ymm0, ymm1, ymm2 , ymm3 , ymm4 , ymm5 , ymm6 , ymm7 , 38 ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15, 39 }; 40 41 // X and V values match 5-bit encoding for each (nothing tricky). 42 enum X { 43 x0 , x1 , x2 , x3 , x4 , x5 , x6 , x7 , 44 x8 , x9 , x10, x11, x12, x13, x14, x15, 45 x16, x17, x18, x19, x20, x21, x22, x23, 46 x24, x25, x26, x27, x28, x29, x30, xzr, 47 }; 48 enum V { 49 v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 , 50 v8 , v9 , v10, v11, v12, v13, v14, v15, 51 v16, v17, v18, v19, v20, v21, v22, v23, 52 v24, v25, v26, v27, v28, v29, v30, v31, 53 }; 54 55 void bytes(const void*, int); 56 void byte(uint8_t); 57 void word(uint32_t); 58 59 // x86-64 60 61 void align(int mod); 62 63 void int3(); 64 void vzeroupper(); 65 void ret(); 66 67 void add(GP64, int imm); 68 void sub(GP64, int imm); 69 70 void movq(GP64 dst, GP64 src, int off); // dst = *(src+off) 71 72 struct Label { 73 int offset = 0; 74 enum { NotYetSet, ARMDisp19, X86Disp32 } kind = NotYetSet; 75 std::vector<int> references; 76 }; 77 78 struct YmmOrLabel { 79 Ymm ymm = ymm0; 80 Label* label = nullptr; 81 YmmOrLabelYmmOrLabel82 /*implicit*/ YmmOrLabel(Ymm y) : ymm (y) { SkASSERT(!label); } YmmOrLabelYmmOrLabel83 /*implicit*/ YmmOrLabel(Label* l) : label(l) { SkASSERT( label); } 84 }; 85 86 // All dst = x op y. 87 using DstEqXOpY = void(Ymm dst, Ymm x, Ymm y); 88 DstEqXOpY vpandn, 89 vpmulld, 90 vpsubw, vpmullw, 91 vdivps, 92 vfmadd132ps, vfmadd213ps, vfmadd231ps, 93 vpackusdw, vpackuswb, 94 vpcmpeqd, vpcmpgtd; 95 96 using DstEqXOpYOrLabel = void(Ymm dst, Ymm x, YmmOrLabel y); 97 DstEqXOpYOrLabel vpand, vpor, vpxor, 98 vpaddd, vpsubd, 99 vaddps, vsubps, vmulps, vminps, vmaxps; 100 101 // Floating point comparisons are all the same instruction with varying imm. 102 void vcmpps(Ymm dst, Ymm x, Ymm y, int imm); vcmpeqps(Ymm dst,Ymm x,Ymm y)103 void vcmpeqps (Ymm dst, Ymm x, Ymm y) { this->vcmpps(dst,x,y,0); } vcmpltps(Ymm dst,Ymm x,Ymm y)104 void vcmpltps (Ymm dst, Ymm x, Ymm y) { this->vcmpps(dst,x,y,1); } vcmpleps(Ymm dst,Ymm x,Ymm y)105 void vcmpleps (Ymm dst, Ymm x, Ymm y) { this->vcmpps(dst,x,y,2); } vcmpneqps(Ymm dst,Ymm x,Ymm y)106 void vcmpneqps(Ymm dst, Ymm x, Ymm y) { this->vcmpps(dst,x,y,4); } 107 108 using DstEqXOpImm = void(Ymm dst, Ymm x, int imm); 109 DstEqXOpImm vpslld, vpsrld, vpsrad, 110 vpsrlw, 111 vpermq, 112 vroundps; 113 114 enum { NEAREST, FLOOR, CEIL, TRUNC }; // vroundps immediates 115 116 using DstEqOpX = void(Ymm dst, Ymm x); 117 DstEqOpX vmovdqa, vcvtdq2ps, vcvttps2dq, vcvtps2dq, vsqrtps; 118 119 void vpblendvb(Ymm dst, Ymm x, Ymm y, Ymm z); 120 121 Label here(); 122 void label(Label*); 123 124 void jmp(Label*); 125 void je (Label*); 126 void jne(Label*); 127 void jl (Label*); 128 void jc (Label*); 129 void cmp(GP64, int imm); 130 131 void vpshufb(Ymm dst, Ymm x, Label*); 132 void vptest(Ymm dst, Label*); 133 134 void vbroadcastss(Ymm dst, Label*); 135 void vbroadcastss(Ymm dst, Xmm src); 136 void vbroadcastss(Ymm dst, GP64 ptr, int off); // dst = *(ptr+off) 137 138 void vmovups (Ymm dst, GP64 ptr); // dst = *ptr, 256-bit 139 void vpmovzxwd(Ymm dst, GP64 ptr); // dst = *ptr, 128-bit, each uint16_t expanded to int 140 void vpmovzxbd(Ymm dst, GP64 ptr); // dst = *ptr, 64-bit, each uint8_t expanded to int 141 void vmovd (Xmm dst, GP64 ptr); // dst = *ptr, 32-bit 142 143 enum Scale { ONE, TWO, FOUR, EIGHT }; 144 void vmovd(Xmm dst, Scale, GP64 index, GP64 base); // dst = *(base + scale*index), 32-bit 145 146 void vmovups(GP64 ptr, Ymm src); // *ptr = src, 256-bit 147 void vmovups(GP64 ptr, Xmm src); // *ptr = src, 128-bit 148 void vmovq (GP64 ptr, Xmm src); // *ptr = src, 64-bit 149 void vmovd (GP64 ptr, Xmm src); // *ptr = src, 32-bit 150 151 void movzbl(GP64 dst, GP64 ptr, int off); // dst = *(ptr+off), uint8_t -> int 152 void movb (GP64 ptr, GP64 src); // *ptr = src, 8-bit 153 154 void vmovd_direct(GP64 dst, Xmm src); // dst = src, 32-bit 155 void vmovd_direct(Xmm dst, GP64 src); // dst = src, 32-bit 156 157 void vpinsrw(Xmm dst, Xmm src, GP64 ptr, int imm); // dst = src; dst[imm] = *ptr, 16-bit 158 void vpinsrb(Xmm dst, Xmm src, GP64 ptr, int imm); // dst = src; dst[imm] = *ptr, 8-bit 159 160 void vpextrw(GP64 ptr, Xmm src, int imm); // *dst = src[imm] , 16-bit 161 void vpextrb(GP64 ptr, Xmm src, int imm); // *dst = src[imm] , 8-bit 162 163 // if (mask & 0x8000'0000) { 164 // dst = base[scale*ix]; 165 // } 166 // mask = 0; 167 void vgatherdps(Ymm dst, Scale scale, Ymm ix, GP64 base, Ymm mask); 168 169 // aarch64 170 171 // d = op(n,m) 172 using DOpNM = void(V d, V n, V m); 173 DOpNM and16b, orr16b, eor16b, bic16b, bsl16b, 174 add4s, sub4s, mul4s, 175 cmeq4s, cmgt4s, 176 sub8h, mul8h, 177 fadd4s, fsub4s, fmul4s, fdiv4s, fmin4s, fmax4s, 178 fcmeq4s, fcmgt4s, fcmge4s, 179 tbl; 180 181 // TODO: there are also float ==,<,<=,>,>= instructions with an immediate 0.0f, 182 // and the register comparison > and >= can also compare absolute values. Interesting. 183 184 // d += n*m 185 void fmla4s(V d, V n, V m); 186 187 // d -= n*m 188 void fmls4s(V d, V n, V m); 189 190 // d = op(n,imm) 191 using DOpNImm = void(V d, V n, int imm); 192 DOpNImm sli4s, 193 shl4s, sshr4s, ushr4s, 194 ushr8h; 195 196 // d = op(n) 197 using DOpN = void(V d, V n); 198 DOpN not16b, // d = ~n 199 scvtf4s, // int -> float 200 fcvtzs4s, // truncate float -> int 201 fcvtns4s, // round float -> int 202 xtns2h, // u32 -> u16 203 xtnh2b, // u16 -> u8 204 uxtlb2h, // u8 -> u16 205 uxtlh2s, // u16 -> u32 206 uminv4s; // dst[0] = min(n[0],n[1],n[2],n[3]), n as unsigned 207 208 void brk (int imm16); 209 void ret (X); 210 void add (X d, X n, int imm12); 211 void sub (X d, X n, int imm12); 212 void subs(X d, X n, int imm12); // subtract setting condition flags 213 214 // There's another encoding for unconditional branches that can jump further, 215 // but this one encoded as b.al is simple to implement and should be fine. b(Label * l)216 void b (Label* l) { this->b(Condition::al, l); } bne(Label * l)217 void bne(Label* l) { this->b(Condition::ne, l); } blt(Label * l)218 void blt(Label* l) { this->b(Condition::lt, l); } 219 220 // "cmp ..." is just an assembler mnemonic for "subs xzr, ..."! cmp(X n,int imm12)221 void cmp(X n, int imm12) { this->subs(xzr, n, imm12); } 222 223 // Compare and branch if zero/non-zero, as if 224 // cmp(t,0) 225 // beq/bne(l) 226 // but without setting condition flags. 227 void cbz (X t, Label* l); 228 void cbnz(X t, Label* l); 229 230 void ldrq(V dst, Label*); // 128-bit PC-relative load 231 232 void ldrq(V dst, X src); // 128-bit dst = *src 233 void ldrs(V dst, X src); // 32-bit dst = *src 234 void ldrb(V dst, X src); // 8-bit dst = *src 235 236 void strq(V src, X dst); // 128-bit *dst = src 237 void strs(V src, X dst); // 32-bit *dst = src 238 void strb(V src, X dst); // 8-bit *dst = src 239 240 void fmovs(X dst, V src); // dst = 32-bit src[0] 241 242 private: 243 // dst = op(dst, imm) 244 void op(int opcode, int opcode_ext, GP64 dst, int imm); 245 246 247 // dst = op(x,y) or op(x) 248 void op(int prefix, int map, int opcode, Ymm dst, Ymm x, Ymm y, bool W=false); 249 void op(int prefix, int map, int opcode, Ymm dst, Ymm x, bool W=false) { 250 // Two arguments ops seem to pass them in dst and y, forcing x to 0 so VEX.vvvv == 1111. 251 this->op(prefix, map, opcode, dst,(Ymm)0,x, W); 252 } 253 254 // dst = op(x,imm) 255 void op(int prefix, int map, int opcode, int opcode_ext, Ymm dst, Ymm x, int imm); 256 257 // dst = op(x,label) or op(label) 258 void op(int prefix, int map, int opcode, Ymm dst, Ymm x, Label* l); 259 void op(int prefix, int map, int opcode, Ymm dst, Ymm x, YmmOrLabel); 260 261 // *ptr = ymm or ymm = *ptr, depending on opcode. 262 void load_store(int prefix, int map, int opcode, Ymm ymm, GP64 ptr); 263 264 // Opcode for 3-arguments ops is split between hi and lo: 265 // [11 bits hi] [5 bits m] [6 bits lo] [5 bits n] [5 bits d] 266 void op(uint32_t hi, V m, uint32_t lo, V n, V d); 267 268 // 2-argument ops, with or without an immediate. 269 void op(uint32_t op22, int imm, V n, V d); op(uint32_t op22,V n,V d)270 void op(uint32_t op22, V n, V d) { this->op(op22,0,n,d); } op(uint32_t op22,X x,V v)271 void op(uint32_t op22, X x, V v) { this->op(op22,0,(V)x,v); } 272 273 // Order matters... value is 4-bit encoding for condition code. 274 enum class Condition { eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,al }; 275 void b(Condition, Label*); 276 277 void jump(uint8_t condition, Label*); 278 279 int disp19(Label*); 280 int disp32(Label*); 281 282 uint8_t* fCode; 283 uint8_t* fCurr; 284 size_t fSize; 285 }; 286 287 // Order matters a little: Ops <=store32 are treated as having side effects. 288 #define SKVM_OPS(M) \ 289 M(assert_true) \ 290 M(store8) M(store16) M(store32) \ 291 M(index) \ 292 M(load8) M(load16) M(load32) \ 293 M(gather8) M(gather16) M(gather32) \ 294 M(uniform8) M(uniform16) M(uniform32) \ 295 M(splat) \ 296 M(add_f32) M(add_i32) M(add_i16x2) \ 297 M(sub_f32) M(sub_i32) M(sub_i16x2) \ 298 M(mul_f32) M(mul_i32) M(mul_i16x2) \ 299 M(div_f32) \ 300 M(min_f32) \ 301 M(max_f32) \ 302 M(mad_f32) \ 303 M(sqrt_f32) \ 304 M(shl_i32) M(shl_i16x2) \ 305 M(shr_i32) M(shr_i16x2) \ 306 M(sra_i32) M(sra_i16x2) \ 307 M(add_f32_imm) \ 308 M(sub_f32_imm) \ 309 M(mul_f32_imm) \ 310 M(min_f32_imm) \ 311 M(max_f32_imm) \ 312 M(floor) M(trunc) M(round) M(to_f32) \ 313 M( eq_f32) M( eq_i32) M( eq_i16x2) \ 314 M(neq_f32) M(neq_i32) M(neq_i16x2) \ 315 M( gt_f32) M( gt_i32) M( gt_i16x2) \ 316 M(gte_f32) M(gte_i32) M(gte_i16x2) \ 317 M(bit_and) \ 318 M(bit_or) \ 319 M(bit_xor) \ 320 M(bit_clear) \ 321 M(bit_and_imm) \ 322 M(bit_or_imm) \ 323 M(bit_xor_imm) \ 324 M(select) M(bytes) M(pack) \ 325 // End of SKVM_OPS 326 327 enum class Op : int { 328 #define M(op) op, 329 SKVM_OPS(M) 330 #undef M 331 }; 332 333 using Val = int; 334 // We reserve the last Val ID as a sentinel meaning none, n/a, null, nil, etc. 335 static const Val NA = ~0; 336 337 struct Arg { int ix; }; 338 struct I32 { Val id; }; 339 struct F32 { Val id; }; 340 341 struct Color { skvm::F32 r,g,b,a; }; 342 343 struct OptimizedInstruction { 344 Op op; 345 Val x,y,z; 346 int immy,immz; 347 348 int death; 349 bool can_hoist; 350 bool used_in_loop; 351 }; 352 353 class Builder { 354 public: 355 SK_BEGIN_REQUIRE_DENSE 356 struct Instruction { 357 Op op; // v* = op(x,y,z,imm), where * == index of this Instruction. 358 Val x,y,z; // Enough arguments for mad(). 359 int immy,immz; // Immediate bit pattern, shift count, argument index, etc. 360 }; 361 SK_END_REQUIRE_DENSE 362 363 Program done(const char* debug_name = nullptr) const; 364 365 // Mostly for debugging, tests, etc. program()366 std::vector<Instruction> program() const { return fProgram; } 367 std::vector<OptimizedInstruction> optimize(bool for_jit=false) const; 368 369 // Declare an argument with given stride (use stride=0 for uniforms). 370 // TODO: different types for varying and uniforms? 371 Arg arg(int stride); 372 373 // Convenience arg() wrappers for most common strides, sizeof(T) and 0. 374 template <typename T> varying()375 Arg varying() { return this->arg(sizeof(T)); } uniform()376 Arg uniform() { return this->arg(0); } 377 378 // TODO: allow uniform (i.e. Arg) offsets to store* and load*? 379 // TODO: sign extension (signed types) for <32-bit loads? 380 // TODO: unsigned integer operations where relevant (just comparisons?)? 381 382 // Assert cond is true, printing debug when not. 383 void assert_true(I32 cond, I32 debug); assert_true(I32 cond,F32 debug)384 void assert_true(I32 cond, F32 debug) { this->assert_true(cond, this->bit_cast(debug)); } assert_true(I32 cond)385 void assert_true(I32 cond) { this->assert_true(cond, cond); } 386 387 // Store {8,16,32}-bit varying. 388 void store8 (Arg ptr, I32 val); 389 void store16(Arg ptr, I32 val); 390 void store32(Arg ptr, I32 val); 391 392 // Returns varying {n, n-1, n-2, ..., 1}, where n is the argument to Program::eval(). 393 I32 index(); 394 395 // Load u8,u16,i32 varying. 396 I32 load8 (Arg ptr); 397 I32 load16(Arg ptr); 398 I32 load32(Arg ptr); 399 400 // Load u8,u16,i32 uniform with byte-count offset. 401 I32 uniform8 (Arg ptr, int offset); 402 I32 uniform16(Arg ptr, int offset); 403 I32 uniform32(Arg ptr, int offset); uniformF(Arg ptr,int offset)404 F32 uniformF (Arg ptr, int offset) { return this->bit_cast(this->uniform32(ptr,offset)); } 405 406 // Gather u8,u16,i32 with varying element-count index from *(ptr + byte-count offset). 407 I32 gather8 (Arg ptr, int offset, I32 index); 408 I32 gather16(Arg ptr, int offset, I32 index); 409 I32 gather32(Arg ptr, int offset, I32 index); 410 411 // Convenience methods for working with skvm::Uniforms. 412 struct Uniform { 413 Arg ptr; 414 int offset; 415 }; uniform8(Uniform u)416 I32 uniform8 (Uniform u) { return this->uniform8 (u.ptr, u.offset); } uniform16(Uniform u)417 I32 uniform16(Uniform u) { return this->uniform16(u.ptr, u.offset); } uniform32(Uniform u)418 I32 uniform32(Uniform u) { return this->uniform32(u.ptr, u.offset); } uniformF(Uniform u)419 F32 uniformF (Uniform u) { return this->uniformF (u.ptr, u.offset); } gather8(Uniform u,I32 index)420 I32 gather8 (Uniform u, I32 index) { return this->gather8 (u.ptr, u.offset, index); } gather16(Uniform u,I32 index)421 I32 gather16 (Uniform u, I32 index) { return this->gather16 (u.ptr, u.offset, index); } gather32(Uniform u,I32 index)422 I32 gather32 (Uniform u, I32 index) { return this->gather32 (u.ptr, u.offset, index); } 423 424 // Load an immediate constant. 425 I32 splat(int n); splat(unsigned u)426 I32 splat(unsigned u) { return this->splat((int)u); } 427 F32 splat(float f); 428 429 // float math, comparisons, etc. 430 F32 add(F32 x, F32 y); 431 F32 sub(F32 x, F32 y); 432 F32 mul(F32 x, F32 y); 433 F32 div(F32 x, F32 y); 434 F32 min(F32 x, F32 y); 435 F32 max(F32 x, F32 y); 436 F32 mad(F32 x, F32 y, F32 z); // x*y+z, often an FMA 437 F32 sqrt(F32 x); 438 negate(F32 x)439 F32 negate(F32 x) { 440 return sub(splat(0.0f), x); 441 } lerp(F32 lo,F32 hi,F32 t)442 F32 lerp(F32 lo, F32 hi, F32 t) { 443 return mad(sub(hi,lo), t, lo); 444 } clamp(F32 x,F32 lo,F32 hi)445 F32 clamp(F32 x, F32 lo, F32 hi) { 446 return max(lo, min(x, hi)); 447 } abs(F32 x)448 F32 abs(F32 x) { 449 return bit_cast(bit_and(bit_cast(x), 450 splat(0x7fffffff))); 451 } fract(F32 x)452 F32 fract(F32 x) { 453 return sub(x, floor(x)); 454 } norm(F32 x,F32 y)455 F32 norm(F32 x, F32 y) { 456 return sqrt(mad(x,x, mul(y,y))); 457 } 458 459 I32 eq (F32 x, F32 y); 460 I32 neq(F32 x, F32 y); 461 I32 lt (F32 x, F32 y); 462 I32 lte(F32 x, F32 y); 463 I32 gt (F32 x, F32 y); 464 I32 gte(F32 x, F32 y); 465 466 F32 floor(F32); 467 I32 trunc(F32 x); 468 I32 round(F32 x); bit_cast(F32 x)469 I32 bit_cast(F32 x) { return {x.id}; } 470 471 // int math, comparisons, etc. 472 I32 add(I32 x, I32 y); 473 I32 sub(I32 x, I32 y); 474 I32 mul(I32 x, I32 y); 475 476 I32 shl(I32 x, int bits); 477 I32 shr(I32 x, int bits); 478 I32 sra(I32 x, int bits); 479 480 I32 eq (I32 x, I32 y); 481 I32 neq(I32 x, I32 y); 482 I32 lt (I32 x, I32 y); 483 I32 lte(I32 x, I32 y); 484 I32 gt (I32 x, I32 y); 485 I32 gte(I32 x, I32 y); 486 487 F32 to_f32(I32 x); bit_cast(I32 x)488 F32 bit_cast(I32 x) { return {x.id}; } 489 490 // Treat each 32-bit lane as a pair of 16-bit ints. 491 I32 add_16x2(I32 x, I32 y); 492 I32 sub_16x2(I32 x, I32 y); 493 I32 mul_16x2(I32 x, I32 y); 494 495 I32 shl_16x2(I32 x, int bits); 496 I32 shr_16x2(I32 x, int bits); 497 I32 sra_16x2(I32 x, int bits); 498 499 I32 eq_16x2(I32 x, I32 y); 500 I32 neq_16x2(I32 x, I32 y); 501 I32 lt_16x2(I32 x, I32 y); 502 I32 lte_16x2(I32 x, I32 y); 503 I32 gt_16x2(I32 x, I32 y); 504 I32 gte_16x2(I32 x, I32 y); 505 506 // Bitwise operations. 507 I32 bit_and (I32 x, I32 y); 508 I32 bit_or (I32 x, I32 y); 509 I32 bit_xor (I32 x, I32 y); 510 I32 bit_clear(I32 x, I32 y); // x & ~y 511 512 I32 select(I32 cond, I32 t, I32 f); // cond ? t : f select(I32 cond,F32 t,F32 f)513 F32 select(I32 cond, F32 t, F32 f) { 514 return this->bit_cast(this->select(cond, this->bit_cast(t) 515 , this->bit_cast(f))); 516 } 517 518 // More complex operations... 519 520 // Shuffle the bytes in x according to each nibble of control, as if 521 // 522 // uint8_t bytes[] = { 523 // 0, 524 // ((uint32_t)x ) & 0xff, 525 // ((uint32_t)x >> 8) & 0xff, 526 // ((uint32_t)x >> 16) & 0xff, 527 // ((uint32_t)x >> 24) & 0xff, 528 // }; 529 // return (uint32_t)bytes[(control >> 0) & 0xf] << 0 530 // | (uint32_t)bytes[(control >> 4) & 0xf] << 8 531 // | (uint32_t)bytes[(control >> 8) & 0xf] << 16 532 // | (uint32_t)bytes[(control >> 12) & 0xf] << 24; 533 // 534 // So, e.g., 535 // - bytes(x, 0x1111) splats the low byte of x to all four bytes 536 // - bytes(x, 0x4321) is x, an identity 537 // - bytes(x, 0x0000) is 0 538 // - bytes(x, 0x0404) transforms an RGBA pixel into an A0A0 bit pattern. 539 I32 bytes (I32 x, int control); 540 541 I32 extract(I32 x, int bits, I32 z); // (x>>bits) & z 542 I32 pack (I32 x, I32 y, int bits); // x | (y << bits), assuming (x & (y << bits)) == 0 543 544 // Common idioms used in several places, worth centralizing for consistency. 545 F32 from_unorm(int bits, I32); // E.g. from_unorm(8, x) -> x * (1/255.0f) 546 I32 to_unorm(int bits, F32); // E.g. to_unorm(8, x) -> round(x * 255) 547 548 Color unpack_1010102(I32 rgba); 549 Color unpack_8888 (I32 rgba); 550 Color unpack_565 (I32 bgr ); // bottom 16 bits 551 552 void premul(F32* r, F32* g, F32* b, F32 a); 553 void unpremul(F32* r, F32* g, F32* b, F32 a); 554 555 Color lerp(Color lo, Color hi, F32 t); 556 557 void dump(SkWStream* = nullptr) const; 558 559 uint64_t hash() const; 560 561 private: 562 struct InstructionHash { 563 uint32_t operator()(const Instruction& inst, uint32_t seed=0) const; 564 }; 565 566 Val push(Op, Val x, Val y=NA, Val z=NA, int immy=0, int immz=0); 567 568 bool allImm() const; 569 570 template <typename T, typename... Rest> 571 bool allImm(Val, T* imm, Rest...) const; 572 573 template <typename T> isImm(Val id,T want)574 bool isImm(Val id, T want) const { 575 T imm = 0; 576 return this->allImm(id, &imm) && imm == want; 577 } 578 579 SkTHashMap<Instruction, Val, InstructionHash> fIndex; 580 std::vector<Instruction> fProgram; 581 std::vector<int> fStrides; 582 }; 583 584 // Helper to streamline allocating and working with uniforms. 585 struct Uniforms { 586 Arg base; 587 std::vector<int> buf; 588 UniformsUniforms589 explicit Uniforms(int init) : base(Arg{0}), buf(init) {} 590 pushUniforms591 Builder::Uniform push(int val) { 592 buf.push_back(val); 593 return {base, (int)( sizeof(int)*(buf.size() - 1) )}; 594 } 595 pushFUniforms596 Builder::Uniform pushF(float val) { 597 int bits; 598 memcpy(&bits, &val, sizeof(int)); 599 return this->push(bits); 600 } 601 pushPtrUniforms602 Builder::Uniform pushPtr(const void* ptr) { 603 // Jam the pointer into 1 or 2 ints. 604 int ints[sizeof(ptr) / sizeof(int)]; 605 memcpy(ints, &ptr, sizeof(ptr)); 606 for (int bits : ints) { 607 buf.push_back(bits); 608 } 609 return {base, (int)( sizeof(int)*(buf.size() - SK_ARRAY_COUNT(ints)) )}; 610 } 611 }; 612 613 using Reg = int; 614 615 class Program { 616 public: 617 struct Instruction { // d = op(x, y/imm, z/imm) 618 Op op; 619 Reg d,x; 620 union { Reg y; int immy; }; 621 union { Reg z; int immz; }; 622 }; 623 624 Program(const std::vector<OptimizedInstruction>& interpreter, 625 const std::vector<int>& strides); 626 627 Program(const std::vector<OptimizedInstruction>& interpreter, 628 const std::vector<OptimizedInstruction>& jit, 629 const std::vector<int>& strides, 630 const char* debug_name); 631 632 Program(); 633 ~Program(); 634 Program(Program&&); 635 Program& operator=(Program&&); 636 Program(const Program&) = delete; 637 Program& operator=(const Program&) = delete; 638 639 void eval(int n, void* args[]) const; 640 641 template <typename... T> eval(int n,T * ...arg)642 void eval(int n, T*... arg) const { 643 SkASSERT(sizeof...(arg) == fStrides.size()); 644 // This nullptr isn't important except that it makes args[] non-empty if you pass none. 645 void* args[] = { (void*)arg..., nullptr }; 646 this->eval(n, args); 647 } 648 instructions()649 std::vector<Instruction> instructions() const { return fInstructions; } nregs()650 int nregs() const { return fRegs; } loop()651 int loop() const { return fLoop; } empty()652 bool empty() const { return fInstructions.empty(); } 653 654 bool hasJIT() const; // Has this Program been JITted? 655 void dropJIT(); // If hasJIT(), drop it, forcing interpreter fallback. 656 657 void dump(SkWStream* = nullptr) const; 658 659 private: 660 void setupInterpreter(const std::vector<OptimizedInstruction>&); 661 void setupJIT (const std::vector<OptimizedInstruction>&, const char* debug_name); 662 663 void interpret(int n, void* args[]) const; 664 665 bool jit(const std::vector<OptimizedInstruction>&, 666 bool try_hoisting, 667 Assembler*) const; 668 669 std::vector<Instruction> fInstructions; 670 int fRegs = 0; 671 int fLoop = 0; 672 std::vector<int> fStrides; 673 674 void* fJITEntry = nullptr; 675 size_t fJITSize = 0; 676 void* fDylib = nullptr; 677 }; 678 679 // TODO: control flow 680 // TODO: 64-bit values? 681 // TODO: SSE2/SSE4.1, AVX-512F, ARMv8.2 JITs? 682 // TODO: lower to LLVM or WebASM for comparison? 683 } 684 685 #endif//SkVM_DEFINED 686