• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1A8 over A8
214 values (originally 16):
3↑ v0 = splat 3B808081 (0.0039215689)
4  v1 = load8 arg(0)
5  v2 = to_f32 v1
6  v3 = mul_f32 v0 v2
7  v4 = load8 arg(1)
8  v5 = to_f32 v4
9  v6 = mul_f32 v0 v5
10↑ v7 = splat 3F800000 (1)
11  v8 = sub_f32 v7 v3
12  v9 = mad_f32 v6 v8 v3
13↑ v10 = splat 437F0000 (255)
14  v11 = mul_f32 v9 v10
15  v12 = round v11
16  store8 arg(1) v12
17
186 registers, 14 instructions:
190	r0 = splat 3B808081 (0.0039215689)
201	r1 = splat 3F800000 (1)
212	r2 = splat 437F0000 (255)
22loop:
233	    r3 = load8 arg(0)
244	    r3 = to_f32 r3
255	    r3 = mul_f32 r0 r3
266	    r4 = load8 arg(1)
277	    r4 = to_f32 r4
288	    r4 = mul_f32 r0 r4
299	    r5 = sub_f32 r1 r3
3010	    r3 = mad_f32 r4 r5 r3
3111	    r3 = mul_f32 r3 r2
3212	    r3 = round r3
3313	    store8 arg(1) r3
34
35A8 over G8
3620 values (originally 22):
37↑ v0 = splat 3B808081 (0.0039215689)
38  v1 = load8 arg(1)
39  v2 = to_f32 v1
40  v3 = mul_f32 v0 v2
41  v4 = load8 arg(0)
42  v5 = to_f32 v4
43  v6 = mul_f32 v0 v5
44↑ v7 = splat 3F800000 (1)
45  v8 = sub_f32 v7 v6
46  v9 = mul_f32 v3 v8
47↑ v10 = splat 3E59B3D0 (0.21259999)
48↑ v11 = splat 3F371759 (0.71520001)
49↑ v12 = splat 3D93DD98 (0.0722)
50  v13 = mul_f32 v9 v12
51  v14 = mad_f32 v9 v11 v13
52  v15 = mad_f32 v9 v10 v14
53↑ v16 = splat 437F0000 (255)
54  v17 = mul_f32 v15 v16
55  v18 = round v17
56  store8 arg(1) v18
57
588 registers, 20 instructions:
590	r0 = splat 3B808081 (0.0039215689)
601	r1 = splat 3F800000 (1)
612	r2 = splat 3E59B3D0 (0.21259999)
623	r3 = splat 3F371759 (0.71520001)
634	r4 = splat 3D93DD98 (0.0722)
645	r5 = splat 437F0000 (255)
65loop:
666	    r6 = load8 arg(1)
677	    r6 = to_f32 r6
688	    r6 = mul_f32 r0 r6
699	    r7 = load8 arg(0)
7010	    r7 = to_f32 r7
7111	    r7 = mul_f32 r0 r7
7212	    r7 = sub_f32 r1 r7
7313	    r7 = mul_f32 r6 r7
7414	    r6 = mul_f32 r7 r4
7515	    r6 = mad_f32 r7 r3 r6
7616	    r6 = mad_f32 r7 r2 r6
7717	    r6 = mul_f32 r6 r5
7818	    r6 = round r6
7919	    store8 arg(1) r6
80
81A8 over RGBA_8888
8239 values (originally 40):
83↑ v0 = splat 3B808081 (0.0039215689)
84  v1 = load32 arg(1)
85↑ v2 = splat FF (3.5733111e-43)
86  v3 = bit_and v2 v1
87  v4 = to_f32 v3
88  v5 = mul_f32 v0 v4
89  v6 = load8 arg(0)
90  v7 = to_f32 v6
91  v8 = mul_f32 v0 v7
92↑ v9 = splat 3F800000 (1)
93  v10 = sub_f32 v9 v8
94  v11 = mul_f32 v5 v10
95↑ v12 = splat 437F0000 (255)
96  v13 = mul_f32 v11 v12
97  v14 = round v13
98  v15 = shr_i32 v1 8
99  v16 = bit_and v2 v15
100  v17 = to_f32 v16
101  v18 = mul_f32 v0 v17
102  v19 = mul_f32 v18 v10
103  v20 = mul_f32 v19 v12
104  v21 = round v20
105  v22 = pack v14 v21 8
106  v23 = shr_i32 v1 16
107  v24 = bit_and v2 v23
108  v25 = to_f32 v24
109  v26 = mul_f32 v0 v25
110  v27 = mul_f32 v26 v10
111  v28 = mul_f32 v27 v12
112  v29 = round v28
113  v30 = shr_i32 v1 24
114  v31 = to_f32 v30
115  v32 = mul_f32 v0 v31
116  v33 = mad_f32 v32 v10 v8
117  v34 = mul_f32 v33 v12
118  v35 = round v34
119  v36 = pack v29 v35 8
120  v37 = pack v22 v36 16
121  store32 arg(1) v37
122
1239 registers, 39 instructions:
1240	r0 = splat 3B808081 (0.0039215689)
1251	r1 = splat FF (3.5733111e-43)
1262	r2 = splat 3F800000 (1)
1273	r3 = splat 437F0000 (255)
128loop:
1294	    r4 = load32 arg(1)
1305	    r5 = bit_and r1 r4
1316	    r5 = to_f32 r5
1327	    r5 = mul_f32 r0 r5
1338	    r6 = load8 arg(0)
1349	    r6 = to_f32 r6
13510	    r6 = mul_f32 r0 r6
13611	    r7 = sub_f32 r2 r6
13712	    r5 = mul_f32 r5 r7
13813	    r5 = mul_f32 r5 r3
13914	    r5 = round r5
14015	    r8 = shr_i32 r4 8
14116	    r8 = bit_and r1 r8
14217	    r8 = to_f32 r8
14318	    r8 = mul_f32 r0 r8
14419	    r8 = mul_f32 r8 r7
14520	    r8 = mul_f32 r8 r3
14621	    r8 = round r8
14722	    r8 = pack r5 r8 8
14823	    r5 = shr_i32 r4 16
14924	    r5 = bit_and r1 r5
15025	    r5 = to_f32 r5
15126	    r5 = mul_f32 r0 r5
15227	    r5 = mul_f32 r5 r7
15328	    r5 = mul_f32 r5 r3
15429	    r5 = round r5
15530	    r4 = shr_i32 r4 24
15631	    r4 = to_f32 r4
15732	    r4 = mul_f32 r0 r4
15833	    r6 = mad_f32 r4 r7 r6
15934	    r6 = mul_f32 r6 r3
16035	    r6 = round r6
16136	    r6 = pack r5 r6 8
16237	    r6 = pack r8 r6 16
16338	    store32 arg(1) r6
164
165G8 over A8
16611 values (originally 15):
167↑ v0 = splat 3F800000 (1)
168↑ v1 = splat 0 (0)
169↑ v2 = splat 3B808081 (0.0039215689)
170  v3 = load8 arg(1)
171  v4 = to_f32 v3
172  v5 = mul_f32 v2 v4
173  v6 = mad_f32 v5 v1 v0
174↑ v7 = splat 437F0000 (255)
175  v8 = mul_f32 v6 v7
176  v9 = round v8
177  store8 arg(1) v9
178
1795 registers, 11 instructions:
1800	r0 = splat 3F800000 (1)
1811	r1 = splat 0 (0)
1822	r2 = splat 3B808081 (0.0039215689)
1833	r3 = splat 437F0000 (255)
184loop:
1854	    r4 = load8 arg(1)
1865	    r4 = to_f32 r4
1876	    r4 = mul_f32 r2 r4
1887	    r4 = mad_f32 r4 r1 r0
1898	    r4 = mul_f32 r4 r3
1909	    r4 = round r4
19110	    store8 arg(1) r4
192
193G8 over G8
19419 values (originally 20):
195↑ v0 = splat 3B808081 (0.0039215689)
196  v1 = load8 arg(0)
197  v2 = to_f32 v1
198  v3 = mul_f32 v0 v2
199  v4 = load8 arg(1)
200  v5 = to_f32 v4
201  v6 = mul_f32 v0 v5
202↑ v7 = splat 0 (0)
203  v8 = mad_f32 v6 v7 v3
204↑ v9 = splat 3E59B3D0 (0.21259999)
205↑ v10 = splat 3F371759 (0.71520001)
206↑ v11 = splat 3D93DD98 (0.0722)
207  v12 = mul_f32 v8 v11
208  v13 = mad_f32 v8 v10 v12
209  v14 = mad_f32 v8 v9 v13
210↑ v15 = splat 437F0000 (255)
211  v16 = mul_f32 v14 v15
212  v17 = round v16
213  store8 arg(1) v17
214
2158 registers, 19 instructions:
2160	r0 = splat 3B808081 (0.0039215689)
2171	r1 = splat 0 (0)
2182	r2 = splat 3E59B3D0 (0.21259999)
2193	r3 = splat 3F371759 (0.71520001)
2204	r4 = splat 3D93DD98 (0.0722)
2215	r5 = splat 437F0000 (255)
222loop:
2236	    r6 = load8 arg(0)
2247	    r6 = to_f32 r6
2258	    r6 = mul_f32 r0 r6
2269	    r7 = load8 arg(1)
22710	    r7 = to_f32 r7
22811	    r7 = mul_f32 r0 r7
22912	    r6 = mad_f32 r7 r1 r6
23013	    r7 = mul_f32 r6 r4
23114	    r7 = mad_f32 r6 r3 r7
23215	    r7 = mad_f32 r6 r2 r7
23316	    r7 = mul_f32 r7 r5
23417	    r7 = round r7
23518	    store8 arg(1) r7
236
237G8 over RGBA_8888
23839 values (originally 39):
239↑ v0 = splat 3B808081 (0.0039215689)
240  v1 = load8 arg(0)
241  v2 = to_f32 v1
242  v3 = mul_f32 v0 v2
243  v4 = load32 arg(1)
244↑ v5 = splat FF (3.5733111e-43)
245  v6 = bit_and v5 v4
246  v7 = to_f32 v6
247  v8 = mul_f32 v0 v7
248↑ v9 = splat 0 (0)
249  v10 = mad_f32 v8 v9 v3
250↑ v11 = splat 437F0000 (255)
251  v12 = mul_f32 v10 v11
252  v13 = round v12
253  v14 = shr_i32 v4 8
254  v15 = bit_and v5 v14
255  v16 = to_f32 v15
256  v17 = mul_f32 v0 v16
257  v18 = mad_f32 v17 v9 v3
258  v19 = mul_f32 v18 v11
259  v20 = round v19
260  v21 = pack v13 v20 8
261  v22 = shr_i32 v4 16
262  v23 = bit_and v5 v22
263  v24 = to_f32 v23
264  v25 = mul_f32 v0 v24
265  v26 = mad_f32 v25 v9 v3
266  v27 = mul_f32 v26 v11
267  v28 = round v27
268↑ v29 = splat 3F800000 (1)
269  v30 = shr_i32 v4 24
270  v31 = to_f32 v30
271  v32 = mul_f32 v0 v31
272  v33 = mad_f32 v32 v9 v29
273  v34 = mul_f32 v33 v11
274  v35 = round v34
275  v36 = pack v28 v35 8
276  v37 = pack v21 v36 16
277  store32 arg(1) v37
278
2799 registers, 39 instructions:
2800	r0 = splat 3B808081 (0.0039215689)
2811	r1 = splat FF (3.5733111e-43)
2822	r2 = splat 0 (0)
2833	r3 = splat 437F0000 (255)
2844	r4 = splat 3F800000 (1)
285loop:
2865	    r5 = load8 arg(0)
2876	    r5 = to_f32 r5
2887	    r5 = mul_f32 r0 r5
2898	    r6 = load32 arg(1)
2909	    r7 = bit_and r1 r6
29110	    r7 = to_f32 r7
29211	    r7 = mul_f32 r0 r7
29312	    r7 = mad_f32 r7 r2 r5
29413	    r7 = mul_f32 r7 r3
29514	    r7 = round r7
29615	    r8 = shr_i32 r6 8
29716	    r8 = bit_and r1 r8
29817	    r8 = to_f32 r8
29918	    r8 = mul_f32 r0 r8
30019	    r8 = mad_f32 r8 r2 r5
30120	    r8 = mul_f32 r8 r3
30221	    r8 = round r8
30322	    r8 = pack r7 r8 8
30423	    r7 = shr_i32 r6 16
30524	    r7 = bit_and r1 r7
30625	    r7 = to_f32 r7
30726	    r7 = mul_f32 r0 r7
30827	    r5 = mad_f32 r7 r2 r5
30928	    r5 = mul_f32 r5 r3
31029	    r5 = round r5
31130	    r6 = shr_i32 r6 24
31231	    r6 = to_f32 r6
31332	    r6 = mul_f32 r0 r6
31433	    r6 = mad_f32 r6 r2 r4
31534	    r6 = mul_f32 r6 r3
31635	    r6 = round r6
31736	    r6 = pack r5 r6 8
31837	    r6 = pack r8 r6 16
31938	    store32 arg(1) r6
320
321RGBA_8888 over A8
32215 values (originally 31):
323↑ v0 = splat 3B808081 (0.0039215689)
324  v1 = load32 arg(0)
325  v2 = shr_i32 v1 24
326  v3 = to_f32 v2
327  v4 = mul_f32 v0 v3
328  v5 = load8 arg(1)
329  v6 = to_f32 v5
330  v7 = mul_f32 v0 v6
331↑ v8 = splat 3F800000 (1)
332  v9 = sub_f32 v8 v4
333  v10 = mad_f32 v7 v9 v4
334↑ v11 = splat 437F0000 (255)
335  v12 = mul_f32 v10 v11
336  v13 = round v12
337  store8 arg(1) v13
338
3396 registers, 15 instructions:
3400	r0 = splat 3B808081 (0.0039215689)
3411	r1 = splat 3F800000 (1)
3422	r2 = splat 437F0000 (255)
343loop:
3443	    r3 = load32 arg(0)
3454	    r3 = shr_i32 r3 24
3465	    r3 = to_f32 r3
3476	    r3 = mul_f32 r0 r3
3487	    r4 = load8 arg(1)
3498	    r4 = to_f32 r4
3509	    r4 = mul_f32 r0 r4
35110	    r5 = sub_f32 r1 r3
35211	    r3 = mad_f32 r4 r5 r3
35312	    r3 = mul_f32 r3 r2
35413	    r3 = round r3
35514	    store8 arg(1) r3
356
357RGBA_8888 over G8
35835 values (originally 36):
359↑ v0 = splat 3B808081 (0.0039215689)
360  v1 = load32 arg(0)
361↑ v2 = splat FF (3.5733111e-43)
362  v3 = bit_and v2 v1
363  v4 = to_f32 v3
364  v5 = mul_f32 v0 v4
365  v6 = load8 arg(1)
366  v7 = to_f32 v6
367  v8 = mul_f32 v0 v7
368  v9 = shr_i32 v1 24
369  v10 = to_f32 v9
370  v11 = mul_f32 v0 v10
371↑ v12 = splat 3F800000 (1)
372  v13 = sub_f32 v12 v11
373  v14 = mad_f32 v8 v13 v5
374↑ v15 = splat 3E59B3D0 (0.21259999)
375  v16 = shr_i32 v1 8
376  v17 = bit_and v2 v16
377  v18 = to_f32 v17
378  v19 = mul_f32 v0 v18
379  v20 = mad_f32 v8 v13 v19
380↑ v21 = splat 3F371759 (0.71520001)
381  v22 = shr_i32 v1 16
382  v23 = bit_and v2 v22
383  v24 = to_f32 v23
384  v25 = mul_f32 v0 v24
385  v26 = mad_f32 v8 v13 v25
386↑ v27 = splat 3D93DD98 (0.0722)
387  v28 = mul_f32 v26 v27
388  v29 = mad_f32 v20 v21 v28
389  v30 = mad_f32 v14 v15 v29
390↑ v31 = splat 437F0000 (255)
391  v32 = mul_f32 v30 v31
392  v33 = round v32
393  store8 arg(1) v33
394
39512 registers, 35 instructions:
3960	r0 = splat 3B808081 (0.0039215689)
3971	r1 = splat FF (3.5733111e-43)
3982	r2 = splat 3F800000 (1)
3993	r3 = splat 3E59B3D0 (0.21259999)
4004	r4 = splat 3F371759 (0.71520001)
4015	r5 = splat 3D93DD98 (0.0722)
4026	r6 = splat 437F0000 (255)
403loop:
4047	    r7 = load32 arg(0)
4058	    r8 = bit_and r1 r7
4069	    r8 = to_f32 r8
40710	    r8 = mul_f32 r0 r8
40811	    r9 = load8 arg(1)
40912	    r9 = to_f32 r9
41013	    r9 = mul_f32 r0 r9
41114	    r10 = shr_i32 r7 24
41215	    r10 = to_f32 r10
41316	    r10 = mul_f32 r0 r10
41417	    r10 = sub_f32 r2 r10
41518	    r8 = mad_f32 r9 r10 r8
41619	    r11 = shr_i32 r7 8
41720	    r11 = bit_and r1 r11
41821	    r11 = to_f32 r11
41922	    r11 = mul_f32 r0 r11
42023	    r11 = mad_f32 r9 r10 r11
42124	    r7 = shr_i32 r7 16
42225	    r7 = bit_and r1 r7
42326	    r7 = to_f32 r7
42427	    r7 = mul_f32 r0 r7
42528	    r7 = mad_f32 r9 r10 r7
42629	    r7 = mul_f32 r7 r5
42730	    r7 = mad_f32 r11 r4 r7
42831	    r7 = mad_f32 r8 r3 r7
42932	    r7 = mul_f32 r7 r6
43033	    r7 = round r7
43134	    store8 arg(1) r7
432
433RGBA_8888 over RGBA_8888
43451 values (originally 51):
435↑ v0 = splat 3B808081 (0.0039215689)
436  v1 = load32 arg(0)
437↑ v2 = splat FF (3.5733111e-43)
438  v3 = bit_and v2 v1
439  v4 = to_f32 v3
440  v5 = mul_f32 v0 v4
441  v6 = load32 arg(1)
442  v7 = bit_and v2 v6
443  v8 = to_f32 v7
444  v9 = mul_f32 v0 v8
445  v10 = shr_i32 v1 24
446  v11 = to_f32 v10
447  v12 = mul_f32 v0 v11
448↑ v13 = splat 3F800000 (1)
449  v14 = sub_f32 v13 v12
450  v15 = mad_f32 v9 v14 v5
451↑ v16 = splat 437F0000 (255)
452  v17 = mul_f32 v15 v16
453  v18 = round v17
454  v19 = shr_i32 v1 8
455  v20 = bit_and v2 v19
456  v21 = to_f32 v20
457  v22 = mul_f32 v0 v21
458  v23 = shr_i32 v6 8
459  v24 = bit_and v2 v23
460  v25 = to_f32 v24
461  v26 = mul_f32 v0 v25
462  v27 = mad_f32 v26 v14 v22
463  v28 = mul_f32 v27 v16
464  v29 = round v28
465  v30 = pack v18 v29 8
466  v31 = shr_i32 v1 16
467  v32 = bit_and v2 v31
468  v33 = to_f32 v32
469  v34 = mul_f32 v0 v33
470  v35 = shr_i32 v6 16
471  v36 = bit_and v2 v35
472  v37 = to_f32 v36
473  v38 = mul_f32 v0 v37
474  v39 = mad_f32 v38 v14 v34
475  v40 = mul_f32 v39 v16
476  v41 = round v40
477  v42 = shr_i32 v6 24
478  v43 = to_f32 v42
479  v44 = mul_f32 v0 v43
480  v45 = mad_f32 v44 v14 v12
481  v46 = mul_f32 v45 v16
482  v47 = round v46
483  v48 = pack v41 v47 8
484  v49 = pack v30 v48 16
485  store32 arg(1) v49
486
48711 registers, 51 instructions:
4880	r0 = splat 3B808081 (0.0039215689)
4891	r1 = splat FF (3.5733111e-43)
4902	r2 = splat 3F800000 (1)
4913	r3 = splat 437F0000 (255)
492loop:
4934	    r4 = load32 arg(0)
4945	    r5 = bit_and r1 r4
4956	    r5 = to_f32 r5
4967	    r5 = mul_f32 r0 r5
4978	    r6 = load32 arg(1)
4989	    r7 = bit_and r1 r6
49910	    r7 = to_f32 r7
50011	    r7 = mul_f32 r0 r7
50112	    r8 = shr_i32 r4 24
50213	    r8 = to_f32 r8
50314	    r8 = mul_f32 r0 r8
50415	    r9 = sub_f32 r2 r8
50516	    r5 = mad_f32 r7 r9 r5
50617	    r5 = mul_f32 r5 r3
50718	    r5 = round r5
50819	    r7 = shr_i32 r4 8
50920	    r7 = bit_and r1 r7
51021	    r7 = to_f32 r7
51122	    r7 = mul_f32 r0 r7
51223	    r10 = shr_i32 r6 8
51324	    r10 = bit_and r1 r10
51425	    r10 = to_f32 r10
51526	    r10 = mul_f32 r0 r10
51627	    r7 = mad_f32 r10 r9 r7
51728	    r7 = mul_f32 r7 r3
51829	    r7 = round r7
51930	    r7 = pack r5 r7 8
52031	    r4 = shr_i32 r4 16
52132	    r4 = bit_and r1 r4
52233	    r4 = to_f32 r4
52334	    r4 = mul_f32 r0 r4
52435	    r5 = shr_i32 r6 16
52536	    r5 = bit_and r1 r5
52637	    r5 = to_f32 r5
52738	    r5 = mul_f32 r0 r5
52839	    r4 = mad_f32 r5 r9 r4
52940	    r4 = mul_f32 r4 r3
53041	    r4 = round r4
53142	    r6 = shr_i32 r6 24
53243	    r6 = to_f32 r6
53344	    r6 = mul_f32 r0 r6
53445	    r8 = mad_f32 r6 r9 r8
53546	    r8 = mul_f32 r8 r3
53647	    r8 = round r8
53748	    r8 = pack r4 r8 8
53849	    r8 = pack r7 r8 16
53950	    store32 arg(1) r8
540
541I32 (Naive) 8888 over 8888
54233 values (originally 33):
543  v0 = load32 arg(0)
544↑ v1 = splat FF (3.5733111e-43)
545  v2 = bit_and v1 v0
546  v3 = load32 arg(1)
547  v4 = bit_and v1 v3
548  v5 = shr_i32 v0 24
549↑ v6 = splat 100 (3.5873241e-43)
550  v7 = sub_i32 v6 v5
551  v8 = mul_i32 v4 v7
552  v9 = shr_i32 v8 8
553  v10 = add_i32 v2 v9
554  v11 = shr_i32 v0 8
555  v12 = bit_and v1 v11
556  v13 = shr_i32 v3 8
557  v14 = bit_and v1 v13
558  v15 = mul_i32 v14 v7
559  v16 = shr_i32 v15 8
560  v17 = add_i32 v12 v16
561  v18 = pack v10 v17 8
562  v19 = shr_i32 v0 16
563  v20 = bit_and v1 v19
564  v21 = shr_i32 v3 16
565  v22 = bit_and v1 v21
566  v23 = mul_i32 v22 v7
567  v24 = shr_i32 v23 8
568  v25 = add_i32 v20 v24
569  v26 = shr_i32 v3 24
570  v27 = mul_i32 v26 v7
571  v28 = shr_i32 v27 8
572  v29 = add_i32 v5 v28
573  v30 = pack v25 v29 8
574  v31 = pack v18 v30 16
575  store32 arg(1) v31
576
5779 registers, 33 instructions:
5780	r0 = splat FF (3.5733111e-43)
5791	r1 = splat 100 (3.5873241e-43)
580loop:
5812	    r2 = load32 arg(0)
5823	    r3 = bit_and r0 r2
5834	    r4 = load32 arg(1)
5845	    r5 = bit_and r0 r4
5856	    r6 = shr_i32 r2 24
5867	    r7 = sub_i32 r1 r6
5878	    r5 = mul_i32 r5 r7
5889	    r5 = shr_i32 r5 8
58910	    r5 = add_i32 r3 r5
59011	    r3 = shr_i32 r2 8
59112	    r3 = bit_and r0 r3
59213	    r8 = shr_i32 r4 8
59314	    r8 = bit_and r0 r8
59415	    r8 = mul_i32 r8 r7
59516	    r8 = shr_i32 r8 8
59617	    r8 = add_i32 r3 r8
59718	    r8 = pack r5 r8 8
59819	    r2 = shr_i32 r2 16
59920	    r2 = bit_and r0 r2
60021	    r5 = shr_i32 r4 16
60122	    r5 = bit_and r0 r5
60223	    r5 = mul_i32 r5 r7
60324	    r5 = shr_i32 r5 8
60425	    r5 = add_i32 r2 r5
60526	    r4 = shr_i32 r4 24
60627	    r7 = mul_i32 r4 r7
60728	    r7 = shr_i32 r7 8
60829	    r7 = add_i32 r6 r7
60930	    r7 = pack r5 r7 8
61031	    r7 = pack r8 r7 16
61132	    store32 arg(1) r7
612
613I32 8888 over 8888
61429 values (originally 29):
615  v0 = load32 arg(0)
616↑ v1 = splat FF (3.5733111e-43)
617  v2 = bit_and v0 v1
618  v3 = load32 arg(1)
619  v4 = bit_and v3 v1
620  v5 = shr_i32 v0 24
621↑ v6 = splat 100 (3.5873241e-43)
622  v7 = sub_i32 v6 v5
623  v8 = mul_i16x2 v4 v7
624  v9 = shr_i32 v8 8
625  v10 = add_i32 v2 v9
626  v11 = bytes v0 2
627  v12 = bytes v3 2
628  v13 = mul_i16x2 v12 v7
629  v14 = shr_i32 v13 8
630  v15 = add_i32 v11 v14
631  v16 = pack v10 v15 8
632  v17 = bytes v0 3
633  v18 = bytes v3 3
634  v19 = mul_i16x2 v18 v7
635  v20 = shr_i32 v19 8
636  v21 = add_i32 v17 v20
637  v22 = shr_i32 v3 24
638  v23 = mul_i16x2 v22 v7
639  v24 = shr_i32 v23 8
640  v25 = add_i32 v5 v24
641  v26 = pack v21 v25 8
642  v27 = pack v16 v26 16
643  store32 arg(1) v27
644
6459 registers, 29 instructions:
6460	r0 = splat FF (3.5733111e-43)
6471	r1 = splat 100 (3.5873241e-43)
648loop:
6492	    r2 = load32 arg(0)
6503	    r3 = bit_and r2 r0
6514	    r4 = load32 arg(1)
6525	    r5 = bit_and r4 r0
6536	    r6 = shr_i32 r2 24
6547	    r7 = sub_i32 r1 r6
6558	    r5 = mul_i16x2 r5 r7
6569	    r5 = shr_i32 r5 8
65710	    r5 = add_i32 r3 r5
65811	    r3 = bytes r2 2
65912	    r8 = bytes r4 2
66013	    r8 = mul_i16x2 r8 r7
66114	    r8 = shr_i32 r8 8
66215	    r8 = add_i32 r3 r8
66316	    r8 = pack r5 r8 8
66417	    r2 = bytes r2 3
66518	    r5 = bytes r4 3
66619	    r5 = mul_i16x2 r5 r7
66720	    r5 = shr_i32 r5 8
66821	    r5 = add_i32 r2 r5
66922	    r4 = shr_i32 r4 24
67023	    r7 = mul_i16x2 r4 r7
67124	    r7 = shr_i32 r7 8
67225	    r7 = add_i32 r6 r7
67326	    r7 = pack r5 r7 8
67427	    r7 = pack r8 r7 16
67528	    store32 arg(1) r7
676
677I32 (SWAR) 8888 over 8888
67815 values (originally 15):
679  v0 = load32 arg(0)
680  v1 = bytes v0 404
681↑ v2 = splat 1000100 (2.3510604e-38)
682  v3 = sub_i16x2 v2 v1
683  v4 = load32 arg(1)
684↑ v5 = splat FF00FF (2.3418409e-38)
685  v6 = bit_and v4 v5
686  v7 = mul_i16x2 v6 v3
687  v8 = shr_i16x2 v7 8
688  v9 = shr_i16x2 v4 8
689  v10 = mul_i16x2 v9 v3
690  v11 = bit_clear v10 v5
691  v12 = bit_or v8 v11
692  v13 = add_i32 v0 v12
693  store32 arg(1) v13
694
6956 registers, 15 instructions:
6960	r0 = splat 1000100 (2.3510604e-38)
6971	r1 = splat FF00FF (2.3418409e-38)
698loop:
6992	    r2 = load32 arg(0)
7003	    r3 = bytes r2 404
7014	    r3 = sub_i16x2 r0 r3
7025	    r4 = load32 arg(1)
7036	    r5 = bit_and r4 r1
7047	    r5 = mul_i16x2 r5 r3
7058	    r5 = shr_i16x2 r5 8
7069	    r4 = shr_i16x2 r4 8
70710	    r3 = mul_i16x2 r4 r3
70811	    r3 = bit_clear r3 r1
70912	    r3 = bit_or r5 r3
71013	    r3 = add_i32 r2 r3
71114	    store32 arg(1) r3
712
7136 values (originally 6):
714↟ v0 = splat 1 (1.4012985e-45)
715↟ v1 = splat 2 (2.8025969e-45)
716↑ v2 = add_i32 v0 v1
717  v3 = load32 arg(0)
718  v4 = mul_i32 v3 v2
719  store32 arg(0) v4
720
7212 registers, 6 instructions:
7220	r0 = splat 1 (1.4012985e-45)
7231	r1 = splat 2 (2.8025969e-45)
7242	r1 = add_i32 r0 r1
725loop:
7263	    r0 = load32 arg(0)
7274	    r0 = mul_i32 r0 r1
7285	    store32 arg(0) r0
729
73023 values (originally 23):
731↑ v0 = splat FF (3.5733111e-43)
732  v1 = load32 arg(0)
733  v2 = bit_and v0 v1
734  v3 = load32 arg(1)
735  v4 = bit_and v0 v3
736  v5 = add_i32 v2 v4
737  v6 = shr_i32 v1 8
738  v7 = bit_and v0 v6
739  v8 = shr_i32 v3 8
740  v9 = bit_and v0 v8
741  v10 = add_i32 v7 v9
742  v11 = pack v5 v10 8
743  v12 = shr_i32 v1 16
744  v13 = bit_and v0 v12
745  v14 = shr_i32 v3 16
746  v15 = bit_and v0 v14
747  v16 = add_i32 v13 v15
748  v17 = shr_i32 v1 24
749  v18 = shr_i32 v3 24
750  v19 = add_i32 v17 v18
751  v20 = pack v16 v19 8
752  v21 = pack v11 v20 16
753  store32 arg(1) v21
754
7556 registers, 23 instructions:
7560	r0 = splat FF (3.5733111e-43)
757loop:
7581	    r1 = load32 arg(0)
7592	    r2 = bit_and r0 r1
7603	    r3 = load32 arg(1)
7614	    r4 = bit_and r0 r3
7625	    r4 = add_i32 r2 r4
7636	    r2 = shr_i32 r1 8
7647	    r2 = bit_and r0 r2
7658	    r5 = shr_i32 r3 8
7669	    r5 = bit_and r0 r5
76710	    r5 = add_i32 r2 r5
76811	    r5 = pack r4 r5 8
76912	    r4 = shr_i32 r1 16
77013	    r4 = bit_and r0 r4
77114	    r2 = shr_i32 r3 16
77215	    r2 = bit_and r0 r2
77316	    r2 = add_i32 r4 r2
77417	    r1 = shr_i32 r1 24
77518	    r3 = shr_i32 r3 24
77619	    r3 = add_i32 r1 r3
77720	    r3 = pack r2 r3 8
77821	    r3 = pack r5 r3 16
77922	    store32 arg(1) r3
780
781