• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx  -mattr=+avx | FileCheck %s
2
3; AVX128 tests:
4
5;CHECK-LABEL: vsel_float:
6; select mask is <i1 true, i1 false, i1 true, i1 false>.
7; Big endian representation is 0101 = 5.
8; '1' means takes the first argument, '0' means takes the second argument.
9; This is the opposite of the intel syntax, thus we expect
10; the inverted mask: 1010 = 10.
11; According to the ABI:
12; v1 is in xmm0 => first argument is xmm0.
13; v2 is in xmm1 => second argument is xmm1.
14; result is in xmm0 => destination argument.
15;CHECK: vblendps    $10, %xmm1, %xmm0, %xmm0
16;CHECK: ret
17define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
18  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2
19  ret <4 x float> %vsel
20}
21
22
23;CHECK-LABEL: vsel_i32:
24;CHECK: vblendps   $10, %xmm1, %xmm0, %xmm0
25;CHECK: ret
26define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
27  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
28  ret <4 x i32> %vsel
29}
30
31
32;CHECK-LABEL: vsel_double:
33;CHECK: vmovsd
34;CHECK: ret
35define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
36  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %v1, <2 x double> %v2
37  ret <2 x double> %vsel
38}
39
40
41;CHECK-LABEL: vsel_i64:
42;CHECK: vmovsd
43;CHECK: ret
44define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
45  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v1, <2 x i64> %v2
46  ret <2 x i64> %vsel
47}
48
49
50;CHECK-LABEL: vsel_i8:
51;CHECK: vpblendvb
52;CHECK: ret
53define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
54  %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
55  ret <16 x i8> %vsel
56}
57
58
59; AVX256 tests:
60
61
62;CHECK-LABEL: vsel_float8:
63;CHECK-NOT: vinsertf128
64; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
65; which translates into the boolean mask (big endian representation):
66; 00010001 = 17.
67; '1' means takes the first argument, '0' means takes the second argument.
68; This is the opposite of the intel syntax, thus we expect
69; the inverted mask: 11101110 = 238.
70;CHECK: vblendps    $238, %ymm1, %ymm0, %ymm0
71;CHECK: ret
72define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
73  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
74  ret <8 x float> %vsel
75}
76
77;CHECK-LABEL: vsel_i328:
78;CHECK-NOT: vinsertf128
79;CHECK: vblendps    $238, %ymm1, %ymm0, %ymm0
80;CHECK-NEXT: ret
81define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
82  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
83  ret <8 x i32> %vsel
84}
85
86;CHECK-LABEL: vsel_double8:
87; select mask is 2x: 0001 => intel mask: ~0001 = 14
88; ABI:
89; v1 is in ymm0 and ymm1.
90; v2 is in ymm2 and ymm3.
91; result is in ymm0 and ymm1.
92; Compute the low part: res.low = blend v1.low, v2.low, blendmask
93;CHECK: vblendpd    $14, %ymm2, %ymm0, %ymm0
94; Compute the high part.
95;CHECK: vblendpd    $14, %ymm3, %ymm1, %ymm1
96;CHECK: ret
97define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
98  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
99  ret <8 x double> %vsel
100}
101
102;CHECK-LABEL: vsel_i648:
103;CHECK: vblendpd    $14, %ymm2, %ymm0, %ymm0
104;CHECK: vblendpd    $14, %ymm3, %ymm1, %ymm1
105;CHECK: ret
106define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
107  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
108  ret <8 x i64> %vsel
109}
110
111;CHECK-LABEL: vsel_double4:
112;CHECK-NOT: vinsertf128
113;CHECK: vblendpd $10
114;CHECK-NEXT: ret
115define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
116  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
117  ret <4 x double> %vsel
118}
119
120;; TEST blend + compares
121; CHECK: testa
122define <2 x double> @testa(<2 x double> %x, <2 x double> %y) {
123  ; CHECK: vcmplepd
124  ; CHECK: vblendvpd
125  %max_is_x = fcmp oge <2 x double> %x, %y
126  %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
127  ret <2 x double> %max
128}
129
130; CHECK: testb
131define <2 x double> @testb(<2 x double> %x, <2 x double> %y) {
132  ; CHECK: vcmpnlepd
133  ; CHECK: vblendvpd
134  %min_is_x = fcmp ult <2 x double> %x, %y
135  %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
136  ret <2 x double> %min
137}
138
139; If we can figure out a blend has a constant mask, we should emit the
140; blend instruction with an immediate mask
141define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
142; CHECK-LABEL: constant_blendvpd_avx:
143; CHECK-NOT: mov
144; CHECK: vblendpd
145; CHECK: ret
146  %1 = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %xy, <4 x double> %ab
147  ret <4 x double> %1
148}
149
150define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
151; CHECK-LABEL: constant_blendvps_avx:
152; CHECK-NOT: mov
153; CHECK: vblendps
154; CHECK: ret
155  %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %xyzw, <8 x float> %abcd
156  ret <8 x float> %1
157}
158
159declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
160declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
161
162;; 4 tests for shufflevectors that optimize to blend + immediate
163; CHECK-LABEL: @blend_shufflevector_4xfloat
164define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) {
165; Equivalent select mask is <i1 true, i1 false, i1 true, i1 false>.
166; Big endian representation is 0101 = 5.
167; '1' means takes the first argument, '0' means takes the second argument.
168; This is the opposite of the intel syntax, thus we expect
169; Inverted mask: 1010 = 10.
170; According to the ABI:
171; a is in xmm0 => first argument is xmm0.
172; b is in xmm1 => second argument is xmm1.
173; Result is in xmm0 => destination argument.
174; CHECK: vblendps $10, %xmm1, %xmm0, %xmm0
175; CHECK: ret
176  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
177  ret <4 x float> %1
178}
179
180; CHECK-LABEL: @blend_shufflevector_8xfloat
181define <8 x float> @blend_shufflevector_8xfloat(<8 x float> %a, <8 x float> %b) {
182; CHECK: vblendps $190, %ymm1, %ymm0, %ymm0
183; CHECK: ret
184  %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 6, i32 15>
185  ret <8 x float> %1
186}
187
188; CHECK-LABEL: @blend_shufflevector_4xdouble
189define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) {
190; CHECK: vblendpd $2, %ymm1, %ymm0, %ymm0
191; CHECK: ret
192  %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
193  ret <4 x double> %1
194}
195
196; CHECK-LABEL: @blend_shufflevector_4xi64
197define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) {
198; CHECK: vblendpd $13, %ymm1, %ymm0, %ymm0
199; CHECK: ret
200  %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
201  ret <4 x i64> %1
202}
203