1 /*
2 * Copyright 2019 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "tools/SkVMBuilders.h"
9
10 // Some parts of this builder code are written less fluently than possible,
11 // to avoid any ambiguity of function argument evaluation order. This lets
12 // our golden tests work portably. In general there's no reason to fear
13 // nesting calls to Builder routines.
14
SrcoverBuilder_F32(Fmt srcFmt,Fmt dstFmt)15 SrcoverBuilder_F32::SrcoverBuilder_F32(Fmt srcFmt, Fmt dstFmt) {
16 auto byte_to_f32 = [&](skvm::I32 byte) {
17 skvm::F32 _1_255 = splat(1/255.0f);
18 return mul(_1_255, to_f32(byte));
19 };
20
21 auto load = [&](Fmt fmt, skvm::F32* r, skvm::F32* g, skvm::F32* b, skvm::F32* a) {
22 skvm::Arg ptr;
23 switch (fmt) {
24 case Fmt::A8: {
25 ptr = varying<uint8_t>();
26 *r = *g = *b = splat(0.0f);
27 *a = byte_to_f32(load8(ptr));
28 } break;
29
30 case Fmt::G8: {
31 ptr = varying<uint8_t>();
32 *r = *g = *b = byte_to_f32(load8(ptr));
33 *a = splat(1.0f);
34 } break;
35
36 case Fmt::RGBA_8888: {
37 ptr = varying<int>();
38 skvm::I32 rgba = load32(ptr);
39 *r = byte_to_f32(extract(rgba, 0, splat(0xff)));
40 *g = byte_to_f32(extract(rgba, 8, splat(0xff)));
41 *b = byte_to_f32(extract(rgba, 16, splat(0xff)));
42 *a = byte_to_f32(extract(rgba, 24, splat(0xff)));
43 } break;
44 }
45 return ptr;
46 };
47
48 skvm::F32 r,g,b,a;
49 (void)load(srcFmt, &r,&g,&b,&a);
50
51 skvm::F32 dr,dg,db,da;
52 skvm::Arg dst = load(dstFmt, &dr,&dg,&db,&da);
53
54 skvm::F32 invA = sub(splat(1.0f), a);
55 r = mad(dr, invA, r);
56 g = mad(dg, invA, g);
57 b = mad(db, invA, b);
58 a = mad(da, invA, a);
59
60 auto f32_to_byte = [&](skvm::F32 f32) {
61 skvm::F32 _255 = splat(255.0f),
62 _0_5 = splat(0.5f);
63 return to_i32(mad(f32, _255, _0_5));
64 };
65 switch (dstFmt) {
66 case Fmt::A8: {
67 store8(dst, f32_to_byte(a));
68 } break;
69
70 case Fmt::G8: {
71 skvm::F32 _2126 = splat(0.2126f),
72 _7152 = splat(0.7152f),
73 _0722 = splat(0.0722f);
74 store8(dst, f32_to_byte(mad(r, _2126,
75 mad(g, _7152,
76 mul(b, _0722)))));
77 } break;
78
79 case Fmt::RGBA_8888: {
80 skvm::I32 R = f32_to_byte(r),
81 G = f32_to_byte(g),
82 B = f32_to_byte(b),
83 A = f32_to_byte(a);
84
85 R = pack(R, G, 8);
86 B = pack(B, A, 8);
87 R = pack(R, B, 16);
88
89 store32(dst, R);
90 } break;
91 }
92 }
93
SrcoverBuilder_I32_Naive()94 SrcoverBuilder_I32_Naive::SrcoverBuilder_I32_Naive() {
95 skvm::Arg src = varying<int>(),
96 dst = varying<int>();
97
98 auto load = [&](skvm::Arg ptr,
99 skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) {
100 skvm::I32 rgba = load32(ptr);
101 *r = extract(rgba, 0, splat(0xff));
102 *g = extract(rgba, 8, splat(0xff));
103 *b = extract(rgba, 16, splat(0xff));
104 *a = extract(rgba, 24, splat(0xff));
105 };
106
107 skvm::I32 r,g,b,a;
108 load(src, &r,&g,&b,&a);
109
110 skvm::I32 dr,dg,db,da;
111 load(dst, &dr,&dg,&db,&da);
112
113 // (xy + x)/256 is a good approximation of (xy + 127)/255
114 //
115 // == (d*(255-a) + d)/256
116 // == (d*(255-a+1) )/256
117 // == (d*(256-a ) )/256
118
119 skvm::I32 invA = sub(splat(256), a);
120 r = add(r, shr(mul(dr, invA), 8));
121 g = add(g, shr(mul(dg, invA), 8));
122 b = add(b, shr(mul(db, invA), 8));
123 a = add(a, shr(mul(da, invA), 8));
124
125 r = pack(r, g, 8);
126 b = pack(b, a, 8);
127 r = pack(r, b, 16);
128 store32(dst, r);
129 }
130
SrcoverBuilder_I32()131 SrcoverBuilder_I32::SrcoverBuilder_I32() {
132 skvm::Arg src = varying<int>(),
133 dst = varying<int>();
134
135 auto load = [&](skvm::Arg ptr,
136 skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) {
137 skvm::I32 rgba = load32(ptr);
138 *r = bit_and(rgba, splat(0xff));
139 *g = bytes (rgba, 0x0002);
140 *b = bytes (rgba, 0x0003);
141 *a = shr (rgba, 24);
142 };
143
144 skvm::I32 r,g,b,a;
145 load(src, &r,&g,&b,&a);
146
147 skvm::I32 dr,dg,db,da;
148 load(dst, &dr,&dg,&db,&da);
149
150 // (xy + x)/256 is a good approximation of (xy + 127)/255
151 //
152 // == (d*(255-a) + d)/256
153 // == (d*(255-a+1) )/256
154 // == (d*(256-a ) )/256
155
156 // We're doing 8x8 bit multiplies in 32-bit lanes.
157 // Since the inputs and results both fit in 16 bits,
158 // we can use mul_16x2, which tends to be faster than mul.
159 //
160 // (The top 2 zero bytes of the inputs will also multiply
161 // with each other to produce zero... perfect.)
162
163 skvm::I32 invA = sub(splat(256), a);
164 r = add(r, shr(mul_16x2(dr, invA), 8));
165 g = add(g, shr(mul_16x2(dg, invA), 8));
166 b = add(b, shr(mul_16x2(db, invA), 8));
167 a = add(a, shr(mul_16x2(da, invA), 8));
168
169 r = pack(r, g, 8);
170 b = pack(b, a, 8);
171 r = pack(r, b, 16);
172 store32(dst, r);
173 }
174
SrcoverBuilder_I32_SWAR()175 SrcoverBuilder_I32_SWAR::SrcoverBuilder_I32_SWAR() {
176 skvm::Arg src = varying<int>(),
177 dst = varying<int>();
178
179 // The s += d*invA adds won't overflow,
180 // so we don't have to unpack s beyond grabbing the alpha channel.
181 skvm::I32 s = load32(src),
182 ax2 = bytes(s, 0x0404); // rgba -> a0a0
183
184 // We'll use the same approximation math as above, this time making sure to
185 // use both i16 multiplies to our benefit, one for r/g, the other for b/a.
186 skvm::I32 invAx2 = sub_16x2(splat(0x01000100), ax2);
187
188 skvm::I32 d = load32(dst),
189 rb = bit_and (d, splat(0x00ff00ff)),
190 ga = shr_16x2(d, 8);
191
192 rb = shr_16x2(mul_16x2(rb, invAx2), 8); // Put the high 8 bits back in the low lane.
193 ga = mul_16x2(ga, invAx2); // Keep the high 8 bits up high...
194 ga = bit_clear(ga, splat(0x00ff00ff)); // ...and mask off the low bits.
195
196 store32(dst, add(s, bit_or(rb, ga)));
197 }
198