• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx                       | FileCheck %s --check-prefixes=ALL,AVX1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2                      | FileCheck %s --check-prefixes=ALL,AVX2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,avx512vl,avx512f | FileCheck %s --check-prefixes=ALL,AVX512
5
6; PR37427 - https://bugs.llvm.org/show_bug.cgi?id=37427
7
8define <8 x i32> @eq_zero(<8 x i8>* %p, <8 x i32> %x, <8 x i32> %y) {
9; AVX1-LABEL: eq_zero:
10; AVX1:       # %bb.0:
11; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
12; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
13; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm2, %xmm2
14; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
15; AVX1-NEXT:    vpslld $24, %xmm3, %xmm3
16; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
17; AVX1-NEXT:    vpslld $24, %xmm2, %xmm2
18; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
19; AVX1-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
20; AVX1-NEXT:    retq
21;
22; AVX2-LABEL: eq_zero:
23; AVX2:       # %bb.0:
24; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
25; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
26; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm2, %ymm2
27; AVX2-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
28; AVX2-NEXT:    retq
29;
30; AVX512-LABEL: eq_zero:
31; AVX512:       # %bb.0:
32; AVX512-NEXT:    vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
33; AVX512-NEXT:    vptestnmw %xmm2, %xmm2, %k1
34; AVX512-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
35; AVX512-NEXT:    retq
36  %load = load <8 x i8>, <8 x i8>* %p
37  %cmp = icmp eq <8 x i8> %load, zeroinitializer
38  %sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %y
39  ret <8 x i32> %sel
40}
41
42define <4 x i64> @ne_zero(<4 x i16>* %p, <4 x i64> %x, <4 x i64> %y) {
43; AVX1-LABEL: ne_zero:
44; AVX1:       # %bb.0:
45; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
46; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
47; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
48; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
49; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
50; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm3
51; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
52; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm2
53; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
54; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
55; AVX1-NEXT:    retq
56;
57; AVX2-LABEL: ne_zero:
58; AVX2:       # %bb.0:
59; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
60; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
61; AVX2-NEXT:    vpcmpeqq %ymm3, %ymm2, %ymm2
62; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
63; AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
64; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
65; AVX2-NEXT:    retq
66;
67; AVX512-LABEL: ne_zero:
68; AVX512:       # %bb.0:
69; AVX512-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
70; AVX512-NEXT:    vptestmd %xmm2, %xmm2, %k1
71; AVX512-NEXT:    vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
72; AVX512-NEXT:    retq
73  %load = load <4 x i16>, <4 x i16>* %p
74  %cmp = icmp ne <4 x i16> %load, zeroinitializer
75  %sel = select <4 x i1> %cmp, <4 x i64> %x, <4 x i64> %y
76  ret <4 x i64> %sel
77}
78
79define <16 x i16> @sgt_zero(<16 x i8>* %p, <16 x i16> %x, <16 x i16> %y) {
80; AVX1-LABEL: sgt_zero:
81; AVX1:       # %bb.0:
82; AVX1-NEXT:    vmovdqa (%rdi), %xmm2
83; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
84; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm2, %xmm2
85; AVX1-NEXT:    vpmovsxbw %xmm2, %xmm3
86; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
87; AVX1-NEXT:    vpmovsxbw %xmm2, %xmm2
88; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
89; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
90; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
91; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
92; AVX1-NEXT:    retq
93;
94; AVX2-LABEL: sgt_zero:
95; AVX2:       # %bb.0:
96; AVX2-NEXT:    vpmovsxbw (%rdi), %ymm2
97; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
98; AVX2-NEXT:    vpcmpgtw %ymm3, %ymm2, %ymm2
99; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
100; AVX2-NEXT:    retq
101;
102; AVX512-LABEL: sgt_zero:
103; AVX512:       # %bb.0:
104; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
105; AVX512-NEXT:    vpcmpltb (%rdi), %xmm2, %k1
106; AVX512-NEXT:    vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
107; AVX512-NEXT:    retq
108  %load = load <16 x i8>, <16 x i8>* %p
109  %cmp = icmp sgt <16 x i8> %load, zeroinitializer
110  %sel = select <16 x i1> %cmp, <16 x i16> %x, <16 x i16> %y
111  ret <16 x i16> %sel
112}
113
114define <8 x i32> @slt_zero(<8 x i8>* %p, <8 x i32> %x, <8 x i32> %y) {
115; AVX1-LABEL: slt_zero:
116; AVX1:       # %bb.0:
117; AVX1-NEXT:    vpmovsxbw (%rdi), %xmm2
118; AVX1-NEXT:    vpmovsxwd %xmm2, %xmm3
119; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
120; AVX1-NEXT:    vpmovsxwd %xmm2, %xmm2
121; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
122; AVX1-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
123; AVX1-NEXT:    retq
124;
125; AVX2-LABEL: slt_zero:
126; AVX2:       # %bb.0:
127; AVX2-NEXT:    vpmovsxbd (%rdi), %ymm2
128; AVX2-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
129; AVX2-NEXT:    retq
130;
131; AVX512-LABEL: slt_zero:
132; AVX512:       # %bb.0:
133; AVX512-NEXT:    vpmovsxbw (%rdi), %xmm2
134; AVX512-NEXT:    vpmovw2m %xmm2, %k1
135; AVX512-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
136; AVX512-NEXT:    retq
137  %load = load <8 x i8>, <8 x i8>* %p
138  %cmp = icmp slt <8 x i8> %load, zeroinitializer
139  %sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %y
140  ret <8 x i32> %sel
141}
142
143define <4 x double> @eq_zero_fp_select(<4 x i8>* %p, <4 x double> %x, <4 x double> %y) {
144; AVX1-LABEL: eq_zero_fp_select:
145; AVX1:       # %bb.0:
146; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
147; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
148; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
149; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm3
150; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
151; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm2
152; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
153; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
154; AVX1-NEXT:    retq
155;
156; AVX2-LABEL: eq_zero_fp_select:
157; AVX2:       # %bb.0:
158; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
159; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
160; AVX2-NEXT:    vpcmpeqq %ymm3, %ymm2, %ymm2
161; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
162; AVX2-NEXT:    retq
163;
164; AVX512-LABEL: eq_zero_fp_select:
165; AVX512:       # %bb.0:
166; AVX512-NEXT:    vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
167; AVX512-NEXT:    vptestnmd %xmm2, %xmm2, %k1
168; AVX512-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
169; AVX512-NEXT:    retq
170  %load = load <4 x i8>, <4 x i8>* %p
171  %cmp = icmp eq <4 x i8> %load, zeroinitializer
172  %sel = select <4 x i1> %cmp, <4 x double> %x, <4 x double> %y
173  ret <4 x double> %sel
174}
175
176define <8 x float> @ne_zero_fp_select(<8 x i8>* %p, <8 x float> %x, <8 x float> %y) {
177; AVX1-LABEL: ne_zero_fp_select:
178; AVX1:       # %bb.0:
179; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
180; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
181; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm2, %xmm2
182; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
183; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
184; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
185; AVX1-NEXT:    vpslld $24, %xmm3, %xmm3
186; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
187; AVX1-NEXT:    vpslld $24, %xmm2, %xmm2
188; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
189; AVX1-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
190; AVX1-NEXT:    retq
191;
192; AVX2-LABEL: ne_zero_fp_select:
193; AVX2:       # %bb.0:
194; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
195; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
196; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm2, %ymm2
197; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
198; AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
199; AVX2-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
200; AVX2-NEXT:    retq
201;
202; AVX512-LABEL: ne_zero_fp_select:
203; AVX512:       # %bb.0:
204; AVX512-NEXT:    vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
205; AVX512-NEXT:    vptestmw %xmm2, %xmm2, %k1
206; AVX512-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1}
207; AVX512-NEXT:    retq
208  %load = load <8 x i8>, <8 x i8>* %p
209  %cmp = icmp ne <8 x i8> %load, zeroinitializer
210  %sel = select <8 x i1> %cmp, <8 x float> %x, <8 x float> %y
211  ret <8 x float> %sel
212}
213
214define <4 x double> @sgt_zero_fp_select(<4 x i8>* %p, <4 x double> %x, <4 x double> %y) {
215; AVX1-LABEL: sgt_zero_fp_select:
216; AVX1:       # %bb.0:
217; AVX1-NEXT:    vpmovsxbd (%rdi), %xmm2
218; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm3
219; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
220; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm2
221; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
222; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm2, %xmm2
223; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
224; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
225; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
226; AVX1-NEXT:    retq
227;
228; AVX2-LABEL: sgt_zero_fp_select:
229; AVX2:       # %bb.0:
230; AVX2-NEXT:    vpmovsxbq (%rdi), %ymm2
231; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
232; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
233; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
234; AVX2-NEXT:    retq
235;
236; AVX512-LABEL: sgt_zero_fp_select:
237; AVX512:       # %bb.0:
238; AVX512-NEXT:    vpmovsxbd (%rdi), %xmm2
239; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
240; AVX512-NEXT:    vpcmpgtd %xmm3, %xmm2, %k1
241; AVX512-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
242; AVX512-NEXT:    retq
243  %load = load <4 x i8>, <4 x i8>* %p
244  %cmp = icmp sgt <4 x i8> %load, zeroinitializer
245  %sel = select <4 x i1> %cmp, <4 x double> %x, <4 x double> %y
246  ret <4 x double> %sel
247}
248
249define <8 x float> @slt_zero_fp_select(<8 x i16>* %p, <8 x float> %x, <8 x float> %y) {
250; AVX1-LABEL: slt_zero_fp_select:
251; AVX1:       # %bb.0:
252; AVX1-NEXT:    vpmovsxwd 8(%rdi), %xmm2
253; AVX1-NEXT:    vpmovsxwd (%rdi), %xmm3
254; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
255; AVX1-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
256; AVX1-NEXT:    retq
257;
258; AVX2-LABEL: slt_zero_fp_select:
259; AVX2:       # %bb.0:
260; AVX2-NEXT:    vpmovsxwd (%rdi), %ymm2
261; AVX2-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
262; AVX2-NEXT:    retq
263;
264; AVX512-LABEL: slt_zero_fp_select:
265; AVX512:       # %bb.0:
266; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
267; AVX512-NEXT:    vpcmpgtw (%rdi), %xmm2, %k1
268; AVX512-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1}
269; AVX512-NEXT:    retq
270  %load = load <8 x i16>, <8 x i16>* %p
271  %cmp = icmp slt <8 x i16> %load, zeroinitializer
272  %sel = select <8 x i1> %cmp, <8 x float> %x, <8 x float> %y
273  ret <8 x float> %sel
274}
275
276