• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2019 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "tools/SkVMBuilders.h"
9 
10 // Some parts of this builder code are written less fluently than possible,
11 // to avoid any ambiguity of function argument evaluation order.  This lets
12 // our golden tests work portably.  In general there's no reason to fear
13 // nesting calls to Builder routines.
14 
SrcoverBuilder_F32(Fmt srcFmt,Fmt dstFmt)15 SrcoverBuilder_F32::SrcoverBuilder_F32(Fmt srcFmt, Fmt dstFmt) {
16     auto byte_to_f32 = [&](skvm::I32 byte) {
17         skvm::F32 _1_255 = splat(1/255.0f);
18         return mul(_1_255, to_f32(byte));
19     };
20 
21     auto load = [&](Fmt fmt, skvm::F32* r, skvm::F32* g, skvm::F32* b, skvm::F32* a) {
22         skvm::Arg ptr;
23         switch (fmt) {
24             case Fmt::A8: {
25                 ptr = varying<uint8_t>();
26                 *r = *g = *b = splat(0.0f);
27                 *a = byte_to_f32(load8(ptr));
28             } break;
29 
30             case Fmt::G8: {
31                 ptr = varying<uint8_t>();
32                 *r = *g = *b = byte_to_f32(load8(ptr));
33                 *a = splat(1.0f);
34             } break;
35 
36             case Fmt::RGBA_8888: {
37                 ptr = varying<int>();
38                 skvm::I32 rgba = load32(ptr);
39                 *r = byte_to_f32(extract(rgba,  0, splat(0xff)));
40                 *g = byte_to_f32(extract(rgba,  8, splat(0xff)));
41                 *b = byte_to_f32(extract(rgba, 16, splat(0xff)));
42                 *a = byte_to_f32(extract(rgba, 24, splat(0xff)));
43             } break;
44         }
45         return ptr;
46     };
47 
48     skvm::F32 r,g,b,a;
49     (void)load(srcFmt, &r,&g,&b,&a);
50 
51     skvm::F32 dr,dg,db,da;
52     skvm::Arg dst = load(dstFmt, &dr,&dg,&db,&da);
53 
54     skvm::F32 invA = sub(splat(1.0f), a);
55     r = mad(dr, invA, r);
56     g = mad(dg, invA, g);
57     b = mad(db, invA, b);
58     a = mad(da, invA, a);
59 
60     auto f32_to_byte = [&](skvm::F32 f32) {
61         skvm::F32 _255 = splat(255.0f),
62                   _0_5 = splat(0.5f);
63         return to_i32(mad(f32, _255, _0_5));
64     };
65     switch (dstFmt) {
66         case Fmt::A8: {
67             store8(dst, f32_to_byte(a));
68         } break;
69 
70         case Fmt::G8: {
71             skvm::F32 _2126 = splat(0.2126f),
72                       _7152 = splat(0.7152f),
73                       _0722 = splat(0.0722f);
74             store8(dst, f32_to_byte(mad(r, _2126,
75                                     mad(g, _7152,
76                                     mul(b, _0722)))));
77         } break;
78 
79         case Fmt::RGBA_8888: {
80             skvm::I32 R = f32_to_byte(r),
81                       G = f32_to_byte(g),
82                       B = f32_to_byte(b),
83                       A = f32_to_byte(a);
84 
85             R = pack(R, G, 8);
86             B = pack(B, A, 8);
87             R = pack(R, B, 16);
88 
89             store32(dst, R);
90         } break;
91     }
92 }
93 
SrcoverBuilder_I32_Naive()94 SrcoverBuilder_I32_Naive::SrcoverBuilder_I32_Naive() {
95     skvm::Arg src = varying<int>(),
96               dst = varying<int>();
97 
98     auto load = [&](skvm::Arg ptr,
99                     skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) {
100         skvm::I32 rgba = load32(ptr);
101         *r = extract(rgba,  0, splat(0xff));
102         *g = extract(rgba,  8, splat(0xff));
103         *b = extract(rgba, 16, splat(0xff));
104         *a = extract(rgba, 24, splat(0xff));
105     };
106 
107     skvm::I32 r,g,b,a;
108     load(src, &r,&g,&b,&a);
109 
110     skvm::I32 dr,dg,db,da;
111     load(dst, &dr,&dg,&db,&da);
112 
113     // (xy + x)/256 is a good approximation of (xy + 127)/255
114     //
115     //   == (d*(255-a) + d)/256
116     //   == (d*(255-a+1)  )/256
117     //   == (d*(256-a  )  )/256
118 
119     skvm::I32 invA = sub(splat(256), a);
120     r = add(r, shr(mul(dr, invA), 8));
121     g = add(g, shr(mul(dg, invA), 8));
122     b = add(b, shr(mul(db, invA), 8));
123     a = add(a, shr(mul(da, invA), 8));
124 
125     r = pack(r, g, 8);
126     b = pack(b, a, 8);
127     r = pack(r, b, 16);
128     store32(dst, r);
129 }
130 
SrcoverBuilder_I32()131 SrcoverBuilder_I32::SrcoverBuilder_I32() {
132     skvm::Arg src = varying<int>(),
133               dst = varying<int>();
134 
135     auto load = [&](skvm::Arg ptr,
136                     skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) {
137         skvm::I32 rgba = load32(ptr);
138         *r = bit_and(rgba, splat(0xff));
139         *g = bytes  (rgba, 0x0002);
140         *b = bytes  (rgba, 0x0003);
141         *a = shr    (rgba, 24);
142     };
143 
144     skvm::I32 r,g,b,a;
145     load(src, &r,&g,&b,&a);
146 
147     skvm::I32 dr,dg,db,da;
148     load(dst, &dr,&dg,&db,&da);
149 
150     // (xy + x)/256 is a good approximation of (xy + 127)/255
151     //
152     //   == (d*(255-a) + d)/256
153     //   == (d*(255-a+1)  )/256
154     //   == (d*(256-a  )  )/256
155 
156     // We're doing 8x8 bit multiplies in 32-bit lanes.
157     // Since the inputs and results both fit in 16 bits,
158     // we can use mul_16x2, which tends to be faster than mul.
159     //
160     // (The top 2 zero bytes of the inputs will also multiply
161     // with each other to produce zero... perfect.)
162 
163     skvm::I32 invA = sub(splat(256), a);
164     r = add(r, shr(mul_16x2(dr, invA), 8));
165     g = add(g, shr(mul_16x2(dg, invA), 8));
166     b = add(b, shr(mul_16x2(db, invA), 8));
167     a = add(a, shr(mul_16x2(da, invA), 8));
168 
169     r = pack(r, g, 8);
170     b = pack(b, a, 8);
171     r = pack(r, b, 16);
172     store32(dst, r);
173 }
174 
SrcoverBuilder_I32_SWAR()175 SrcoverBuilder_I32_SWAR::SrcoverBuilder_I32_SWAR() {
176     skvm::Arg src = varying<int>(),
177               dst = varying<int>();
178 
179     // The s += d*invA adds won't overflow,
180     // so we don't have to unpack s beyond grabbing the alpha channel.
181     skvm::I32 s = load32(src),
182             ax2 = bytes(s, 0x0404);  // rgba -> a0a0
183 
184     // We'll use the same approximation math as above, this time making sure to
185     // use both i16 multiplies to our benefit, one for r/g, the other for b/a.
186     skvm::I32 invAx2 = sub_16x2(splat(0x01000100), ax2);
187 
188     skvm::I32 d  = load32(dst),
189               rb = bit_and (d, splat(0x00ff00ff)),
190               ga = shr_16x2(d, 8);
191 
192     rb = shr_16x2(mul_16x2(rb, invAx2), 8);  // Put the high 8 bits back in the low lane.
193     ga =          mul_16x2(ga, invAx2);      // Keep the high 8 bits up high...
194     ga = bit_clear(ga, splat(0x00ff00ff));     // ...and mask off the low bits.
195 
196     store32(dst, add(s, bit_or(rb, ga)));
197 }
198