• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+sse2 | FileCheck %s
3; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+avx  | FileCheck %s
4
5;
6; Check that we can commute operands based on the predicate.
7;
8
9define <4 x i32> @icmp_eq_v4i32(<4 x i32> %a, i32* %b) {
10; CHECK-LABEL: @icmp_eq_v4i32(
11; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
12; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
13; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], [[A:%.*]]
14; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
15; CHECK-NEXT:    ret <4 x i32> [[R]]
16;
17  %a0 = extractelement <4 x i32> %a, i32 0
18  %a1 = extractelement <4 x i32> %a, i32 1
19  %a2 = extractelement <4 x i32> %a, i32 2
20  %a3 = extractelement <4 x i32> %a, i32 3
21  %p0 = getelementptr inbounds i32, i32* %b, i32 0
22  %p1 = getelementptr inbounds i32, i32* %b, i32 1
23  %p2 = getelementptr inbounds i32, i32* %b, i32 2
24  %p3 = getelementptr inbounds i32, i32* %b, i32 3
25  %b0 = load i32, i32* %p0, align 4
26  %b1 = load i32, i32* %p1, align 4
27  %b2 = load i32, i32* %p2, align 4
28  %b3 = load i32, i32* %p3, align 4
29  %c0 = icmp eq i32 %a0, %b0
30  %c1 = icmp eq i32 %b1, %a1
31  %c2 = icmp eq i32 %b2, %a2
32  %c3 = icmp eq i32 %a3, %b3
33  %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
34  %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
35  %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
36  %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
37  %r = sext <4 x i1> %d3 to <4 x i32>
38  ret <4 x i32> %r
39}
40
41define <4 x i32> @icmp_ne_v4i32(<4 x i32> %a, i32* %b) {
42; CHECK-LABEL: @icmp_ne_v4i32(
43; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
44; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
45; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], [[A:%.*]]
46; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
47; CHECK-NEXT:    ret <4 x i32> [[R]]
48;
49  %a0 = extractelement <4 x i32> %a, i32 0
50  %a1 = extractelement <4 x i32> %a, i32 1
51  %a2 = extractelement <4 x i32> %a, i32 2
52  %a3 = extractelement <4 x i32> %a, i32 3
53  %p0 = getelementptr inbounds i32, i32* %b, i32 0
54  %p1 = getelementptr inbounds i32, i32* %b, i32 1
55  %p2 = getelementptr inbounds i32, i32* %b, i32 2
56  %p3 = getelementptr inbounds i32, i32* %b, i32 3
57  %b0 = load i32, i32* %p0, align 4
58  %b1 = load i32, i32* %p1, align 4
59  %b2 = load i32, i32* %p2, align 4
60  %b3 = load i32, i32* %p3, align 4
61  %c0 = icmp ne i32 %a0, %b0
62  %c1 = icmp ne i32 %b1, %a1
63  %c2 = icmp ne i32 %b2, %a2
64  %c3 = icmp ne i32 %a3, %b3
65  %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
66  %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
67  %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
68  %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
69  %r = sext <4 x i1> %d3 to <4 x i32>
70  ret <4 x i32> %r
71}
72
73define <4 x i32> @fcmp_oeq_v4i32(<4 x float> %a, float* %b) {
74; CHECK-LABEL: @fcmp_oeq_v4i32(
75; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
76; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
77; CHECK-NEXT:    [[TMP3:%.*]] = fcmp oeq <4 x float> [[TMP2]], [[A:%.*]]
78; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
79; CHECK-NEXT:    ret <4 x i32> [[R]]
80;
81  %a0 = extractelement <4 x float> %a, i32 0
82  %a1 = extractelement <4 x float> %a, i32 1
83  %a2 = extractelement <4 x float> %a, i32 2
84  %a3 = extractelement <4 x float> %a, i32 3
85  %p0 = getelementptr inbounds float, float* %b, i32 0
86  %p1 = getelementptr inbounds float, float* %b, i32 1
87  %p2 = getelementptr inbounds float, float* %b, i32 2
88  %p3 = getelementptr inbounds float, float* %b, i32 3
89  %b0 = load float, float* %p0, align 4
90  %b1 = load float, float* %p1, align 4
91  %b2 = load float, float* %p2, align 4
92  %b3 = load float, float* %p3, align 4
93  %c0 = fcmp oeq float %a0, %b0
94  %c1 = fcmp oeq float %b1, %a1
95  %c2 = fcmp oeq float %b2, %a2
96  %c3 = fcmp oeq float %a3, %b3
97  %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
98  %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
99  %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
100  %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
101  %r = sext <4 x i1> %d3 to <4 x i32>
102  ret <4 x i32> %r
103}
104
105define <4 x i32> @fcmp_uno_v4i32(<4 x float> %a, float* %b) {
106; CHECK-LABEL: @fcmp_uno_v4i32(
107; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
108; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
109; CHECK-NEXT:    [[TMP3:%.*]] = fcmp uno <4 x float> [[TMP2]], [[A:%.*]]
110; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
111; CHECK-NEXT:    ret <4 x i32> [[R]]
112;
113  %a0 = extractelement <4 x float> %a, i32 0
114  %a1 = extractelement <4 x float> %a, i32 1
115  %a2 = extractelement <4 x float> %a, i32 2
116  %a3 = extractelement <4 x float> %a, i32 3
117  %p0 = getelementptr inbounds float, float* %b, i32 0
118  %p1 = getelementptr inbounds float, float* %b, i32 1
119  %p2 = getelementptr inbounds float, float* %b, i32 2
120  %p3 = getelementptr inbounds float, float* %b, i32 3
121  %b0 = load float, float* %p0, align 4
122  %b1 = load float, float* %p1, align 4
123  %b2 = load float, float* %p2, align 4
124  %b3 = load float, float* %p3, align 4
125  %c0 = fcmp uno float %a0, %b0
126  %c1 = fcmp uno float %b1, %a1
127  %c2 = fcmp uno float %b2, %a2
128  %c3 = fcmp uno float %a3, %b3
129  %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
130  %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
131  %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
132  %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
133  %r = sext <4 x i1> %d3 to <4 x i32>
134  ret <4 x i32> %r
135}
136
137;
138; Check that we can commute operands by swapping the predicate.
139;
140
141define <4 x i32> @icmp_sgt_slt_v4i32(<4 x i32> %a, i32* %b) {
142; CHECK-LABEL: @icmp_sgt_slt_v4i32(
143; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
144; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
145; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP2]], [[A:%.*]]
146; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
147; CHECK-NEXT:    ret <4 x i32> [[R]]
148;
149  %a0 = extractelement <4 x i32> %a, i32 0
150  %a1 = extractelement <4 x i32> %a, i32 1
151  %a2 = extractelement <4 x i32> %a, i32 2
152  %a3 = extractelement <4 x i32> %a, i32 3
153  %p0 = getelementptr inbounds i32, i32* %b, i32 0
154  %p1 = getelementptr inbounds i32, i32* %b, i32 1
155  %p2 = getelementptr inbounds i32, i32* %b, i32 2
156  %p3 = getelementptr inbounds i32, i32* %b, i32 3
157  %b0 = load i32, i32* %p0, align 4
158  %b1 = load i32, i32* %p1, align 4
159  %b2 = load i32, i32* %p2, align 4
160  %b3 = load i32, i32* %p3, align 4
161  %c0 = icmp sgt i32 %a0, %b0
162  %c1 = icmp slt i32 %b1, %a1
163  %c2 = icmp slt i32 %b2, %a2
164  %c3 = icmp sgt i32 %a3, %b3
165  %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
166  %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
167  %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
168  %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
169  %r = sext <4 x i1> %d3 to <4 x i32>
170  ret <4 x i32> %r
171}
172
173define <4 x i32> @icmp_uge_ule_v4i32(<4 x i32> %a, i32* %b) {
174; CHECK-LABEL: @icmp_uge_ule_v4i32(
175; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
176; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
177; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule <4 x i32> [[TMP2]], [[A:%.*]]
178; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
179; CHECK-NEXT:    ret <4 x i32> [[R]]
180;
181  %a0 = extractelement <4 x i32> %a, i32 0
182  %a1 = extractelement <4 x i32> %a, i32 1
183  %a2 = extractelement <4 x i32> %a, i32 2
184  %a3 = extractelement <4 x i32> %a, i32 3
185  %p0 = getelementptr inbounds i32, i32* %b, i32 0
186  %p1 = getelementptr inbounds i32, i32* %b, i32 1
187  %p2 = getelementptr inbounds i32, i32* %b, i32 2
188  %p3 = getelementptr inbounds i32, i32* %b, i32 3
189  %b0 = load i32, i32* %p0, align 4
190  %b1 = load i32, i32* %p1, align 4
191  %b2 = load i32, i32* %p2, align 4
192  %b3 = load i32, i32* %p3, align 4
193  %c0 = icmp uge i32 %a0, %b0
194  %c1 = icmp ule i32 %b1, %a1
195  %c2 = icmp ule i32 %b2, %a2
196  %c3 = icmp uge i32 %a3, %b3
197  %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
198  %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
199  %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
200  %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
201  %r = sext <4 x i1> %d3 to <4 x i32>
202  ret <4 x i32> %r
203}
204
205define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, float* %b) {
206; CHECK-LABEL: @fcmp_ogt_olt_v4i32(
207; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
208; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
209; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[TMP2]], [[A:%.*]]
210; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
211; CHECK-NEXT:    ret <4 x i32> [[R]]
212;
213  %a0 = extractelement <4 x float> %a, i32 0
214  %a1 = extractelement <4 x float> %a, i32 1
215  %a2 = extractelement <4 x float> %a, i32 2
216  %a3 = extractelement <4 x float> %a, i32 3
217  %p0 = getelementptr inbounds float, float* %b, i32 0
218  %p1 = getelementptr inbounds float, float* %b, i32 1
219  %p2 = getelementptr inbounds float, float* %b, i32 2
220  %p3 = getelementptr inbounds float, float* %b, i32 3
221  %b0 = load float, float* %p0, align 4
222  %b1 = load float, float* %p1, align 4
223  %b2 = load float, float* %p2, align 4
224  %b3 = load float, float* %p3, align 4
225  %c0 = fcmp ogt float %a0, %b0
226  %c1 = fcmp olt float %b1, %a1
227  %c2 = fcmp olt float %b2, %a2
228  %c3 = fcmp ogt float %a3, %b3
229  %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
230  %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
231  %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
232  %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
233  %r = sext <4 x i1> %d3 to <4 x i32>
234  ret <4 x i32> %r
235}
236
237define <4 x i32> @fcmp_ord_uno_v4i32(<4 x float> %a, float* %b) {
238; CHECK-LABEL: @fcmp_ord_uno_v4i32(
239; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
240; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
241; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
242; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
243; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
244; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
245; CHECK-NEXT:    [[P3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
246; CHECK-NEXT:    [[B0:%.*]] = load float, float* [[B]], align 4
247; CHECK-NEXT:    [[B1:%.*]] = load float, float* [[P1]], align 4
248; CHECK-NEXT:    [[B2:%.*]] = load float, float* [[P2]], align 4
249; CHECK-NEXT:    [[B3:%.*]] = load float, float* [[P3]], align 4
250; CHECK-NEXT:    [[C0:%.*]] = fcmp ord float [[A0]], [[B0]]
251; CHECK-NEXT:    [[C1:%.*]] = fcmp uno float [[B1]], [[A1]]
252; CHECK-NEXT:    [[C2:%.*]] = fcmp uno float [[B2]], [[A2]]
253; CHECK-NEXT:    [[C3:%.*]] = fcmp ord float [[A3]], [[B3]]
254; CHECK-NEXT:    [[D0:%.*]] = insertelement <4 x i1> undef, i1 [[C0]], i32 0
255; CHECK-NEXT:    [[D1:%.*]] = insertelement <4 x i1> [[D0]], i1 [[C1]], i32 1
256; CHECK-NEXT:    [[D2:%.*]] = insertelement <4 x i1> [[D1]], i1 [[C2]], i32 2
257; CHECK-NEXT:    [[D3:%.*]] = insertelement <4 x i1> [[D2]], i1 [[C3]], i32 3
258; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[D3]] to <4 x i32>
259; CHECK-NEXT:    ret <4 x i32> [[R]]
260;
261  %a0 = extractelement <4 x float> %a, i32 0
262  %a1 = extractelement <4 x float> %a, i32 1
263  %a2 = extractelement <4 x float> %a, i32 2
264  %a3 = extractelement <4 x float> %a, i32 3
265  %p0 = getelementptr inbounds float, float* %b, i32 0
266  %p1 = getelementptr inbounds float, float* %b, i32 1
267  %p2 = getelementptr inbounds float, float* %b, i32 2
268  %p3 = getelementptr inbounds float, float* %b, i32 3
269  %b0 = load float, float* %p0, align 4
270  %b1 = load float, float* %p1, align 4
271  %b2 = load float, float* %p2, align 4
272  %b3 = load float, float* %p3, align 4
273  %c0 = fcmp ord float %a0, %b0
274  %c1 = fcmp uno float %b1, %a1
275  %c2 = fcmp uno float %b2, %a2
276  %c3 = fcmp ord float %a3, %b3
277  %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
278  %d1 = insertelement <4 x i1>   %d0, i1 %c1, i32 1
279  %d2 = insertelement <4 x i1>   %d1, i1 %c2, i32 2
280  %d3 = insertelement <4 x i1>   %d2, i1 %c3, i32 3
281  %r = sext <4 x i1> %d3 to <4 x i32>
282  ret <4 x i32> %r
283}
284