• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instcombine -S | FileCheck %s
3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4
5define float @test_rcp_ss_0(float %a) {
6; CHECK-LABEL: @test_rcp_ss_0(
7; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
8; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP1]])
9; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
10; CHECK-NEXT:    ret float [[TMP3]]
11;
12  %1 = insertelement <4 x float> undef, float %a, i32 0
13  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
14  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
15  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
16  %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
17  %6 = extractelement <4 x float> %5, i32 0
18  ret float %6
19}
20
21define float @test_rcp_ss_1(float %a) {
22; CHECK-LABEL: @test_rcp_ss_1(
23; CHECK-NEXT:    ret float 1.000000e+00
24;
25  %1 = insertelement <4 x float> undef, float %a, i32 0
26  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
27  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
28  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
29  %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
30  %6 = extractelement <4 x float> %5, i32 1
31  ret float %6
32}
33
34define float @test_sqrt_ss_0(float %a) {
35; CHECK-LABEL: @test_sqrt_ss_0(
36; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
37; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> [[TMP1]])
38; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
39; CHECK-NEXT:    ret float [[TMP3]]
40;
41  %1 = insertelement <4 x float> undef, float %a, i32 0
42  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
43  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
44  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
45  %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
46  %6 = extractelement <4 x float> %5, i32 0
47  ret float %6
48}
49
50define float @test_sqrt_ss_2(float %a) {
51; CHECK-LABEL: @test_sqrt_ss_2(
52; CHECK-NEXT:    ret float 2.000000e+00
53;
54  %1 = insertelement <4 x float> undef, float %a, i32 0
55  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
56  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
57  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
58  %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
59  %6 = extractelement <4 x float> %5, i32 2
60  ret float %6
61}
62
63define float @test_rsqrt_ss_0(float %a) {
64; CHECK-LABEL: @test_rsqrt_ss_0(
65; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
66; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP1]])
67; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
68; CHECK-NEXT:    ret float [[TMP3]]
69;
70  %1 = insertelement <4 x float> undef, float %a, i32 0
71  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
72  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
73  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
74  %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
75  %6 = extractelement <4 x float> %5, i32 0
76  ret float %6
77}
78
79define float @test_rsqrt_ss_3(float %a) {
80; CHECK-LABEL: @test_rsqrt_ss_3(
81; CHECK-NEXT:    ret float 3.000000e+00
82;
83  %1 = insertelement <4 x float> undef, float %a, i32 0
84  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
85  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
86  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
87  %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
88  %6 = extractelement <4 x float> %5, i32 3
89  ret float %6
90}
91
92define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
93; CHECK-LABEL: @test_add_ss(
94; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %b)
95; CHECK-NEXT:    ret <4 x float> [[TMP1]]
96;
97  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
98  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
99  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
100  %4 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %3)
101  ret <4 x float> %4
102}
103
104define float @test_add_ss_0(float %a, float %b) {
105; CHECK-LABEL: @test_add_ss_0(
106; CHECK-NEXT:    [[TMP1:%.*]] = fadd float %a, %b
107; CHECK-NEXT:    ret float [[TMP1]]
108;
109  %1 = insertelement <4 x float> undef, float %a, i32 0
110  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
111  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
112  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
113  %5 = insertelement <4 x float> undef, float %b, i32 0
114  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
115  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
116  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
117  %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8)
118  %r = extractelement <4 x float> %9, i32 0
119  ret float %r
120}
121
122define float @test_add_ss_1(float %a, float %b) {
123; CHECK-LABEL: @test_add_ss_1(
124; CHECK-NEXT:    ret float 1.000000e+00
125;
126  %1 = insertelement <4 x float> undef, float %a, i32 0
127  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
128  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
129  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
130  %5 = insertelement <4 x float> undef, float %b, i32 0
131  %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5)
132  %7 = extractelement <4 x float> %6, i32 1
133  ret float %7
134}
135
136define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
137; CHECK-LABEL: @test_sub_ss(
138; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %b)
139; CHECK-NEXT:    ret <4 x float> [[TMP1]]
140;
141  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
142  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
143  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
144  %4 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %3)
145  ret <4 x float> %4
146}
147
148define float @test_sub_ss_0(float %a, float %b) {
149; CHECK-LABEL: @test_sub_ss_0(
150; CHECK-NEXT:    [[TMP1:%.*]] = fsub float %a, %b
151; CHECK-NEXT:    ret float [[TMP1]]
152;
153  %1 = insertelement <4 x float> undef, float %a, i32 0
154  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
155  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
156  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
157  %5 = insertelement <4 x float> undef, float %b, i32 0
158  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
159  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
160  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
161  %9 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %8)
162  %r = extractelement <4 x float> %9, i32 0
163  ret float %r
164}
165
166define float @test_sub_ss_2(float %a, float %b) {
167; CHECK-LABEL: @test_sub_ss_2(
168; CHECK-NEXT:    ret float 2.000000e+00
169;
170  %1 = insertelement <4 x float> undef, float %a, i32 0
171  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
172  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
173  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
174  %5 = insertelement <4 x float> undef, float %b, i32 0
175  %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5)
176  %7 = extractelement <4 x float> %6, i32 2
177  ret float %7
178}
179
180define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
181; CHECK-LABEL: @test_mul_ss(
182; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %b)
183; CHECK-NEXT:    ret <4 x float> [[TMP1]]
184;
185  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
186  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
187  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
188  %4 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %3)
189  ret <4 x float> %4
190}
191
192define float @test_mul_ss_0(float %a, float %b) {
193; CHECK-LABEL: @test_mul_ss_0(
194; CHECK-NEXT:    [[TMP1:%.*]] = fmul float %a, %b
195; CHECK-NEXT:    ret float [[TMP1]]
196;
197  %1 = insertelement <4 x float> undef, float %a, i32 0
198  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
199  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
200  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
201  %5 = insertelement <4 x float> undef, float %b, i32 0
202  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
203  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
204  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
205  %9 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %8)
206  %r = extractelement <4 x float> %9, i32 0
207  ret float %r
208}
209
210define float @test_mul_ss_3(float %a, float %b) {
211; CHECK-LABEL: @test_mul_ss_3(
212; CHECK-NEXT:    ret float 3.000000e+00
213;
214  %1 = insertelement <4 x float> undef, float %a, i32 0
215  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
216  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
217  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
218  %5 = insertelement <4 x float> undef, float %b, i32 0
219  %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5)
220  %7 = extractelement <4 x float> %6, i32 3
221  ret float %7
222}
223
224define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
225; CHECK-LABEL: @test_div_ss(
226; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %b)
227; CHECK-NEXT:    ret <4 x float> [[TMP1]]
228;
229  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
230  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
231  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
232  %4 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %3)
233  ret <4 x float> %4
234}
235
236define float @test_div_ss_0(float %a, float %b) {
237; CHECK-LABEL: @test_div_ss_0(
238; CHECK-NEXT:    [[TMP1:%.*]] = fdiv float %a, %b
239; CHECK-NEXT:    ret float [[TMP1]]
240;
241  %1 = insertelement <4 x float> undef, float %a, i32 0
242  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
243  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
244  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
245  %5 = insertelement <4 x float> undef, float %b, i32 0
246  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
247  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
248  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
249  %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8)
250  %r = extractelement <4 x float> %9, i32 0
251  ret float %r
252}
253
254define float @test_div_ss_1(float %a, float %b) {
255; CHECK-LABEL: @test_div_ss_1(
256; CHECK-NEXT:    ret float 1.000000e+00
257;
258  %1 = insertelement <4 x float> undef, float %a, i32 0
259  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
260  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
261  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
262  %5 = insertelement <4 x float> undef, float %b, i32 0
263  %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5)
264  %7 = extractelement <4 x float> %6, i32 1
265  ret float %7
266}
267
268define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
269; CHECK-LABEL: @test_min_ss(
270; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %b)
271; CHECK-NEXT:    ret <4 x float> [[TMP1]]
272;
273  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
274  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
275  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
276  %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3)
277  ret <4 x float> %4
278}
279
280define float @test_min_ss_0(float %a, float %b) {
281; CHECK-LABEL: @test_min_ss_0(
282; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
283; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
284; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
285; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
286; CHECK-NEXT:    ret float [[TMP4]]
287;
288  %1 = insertelement <4 x float> undef, float %a, i32 0
289  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
290  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
291  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
292  %5 = insertelement <4 x float> undef, float %b, i32 0
293  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
294  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
295  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
296  %9 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %8)
297  %10 = extractelement <4 x float> %9, i32 0
298  ret float %10
299}
300
301define float @test_min_ss_2(float %a, float %b) {
302; CHECK-LABEL: @test_min_ss_2(
303; CHECK-NEXT:    ret float 2.000000e+00
304;
305  %1 = insertelement <4 x float> undef, float %a, i32 0
306  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
307  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
308  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
309  %5 = insertelement <4 x float> undef, float %b, i32 0
310  %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5)
311  %7 = extractelement <4 x float> %6, i32 2
312  ret float %7
313}
314
315define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
316; CHECK-LABEL: @test_max_ss(
317; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %b)
318; CHECK-NEXT:    ret <4 x float> [[TMP1]]
319;
320  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
321  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
322  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
323  %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3)
324  ret <4 x float> %4
325}
326
327define float @test_max_ss_0(float %a, float %b) {
328; CHECK-LABEL: @test_max_ss_0(
329; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
330; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
331; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
332; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
333; CHECK-NEXT:    ret float [[TMP4]]
334;
335  %1 = insertelement <4 x float> undef, float %a, i32 0
336  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
337  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
338  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
339  %5 = insertelement <4 x float> undef, float %b, i32 0
340  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
341  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
342  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
343  %9 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %8)
344  %10 = extractelement <4 x float> %9, i32 0
345  ret float %10
346}
347
348define float @test_max_ss_3(float %a, float %b) {
349; CHECK-LABEL: @test_max_ss_3(
350; CHECK-NEXT:    ret float 3.000000e+00
351;
352  %1 = insertelement <4 x float> undef, float %a, i32 0
353  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
354  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
355  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
356  %5 = insertelement <4 x float> undef, float %b, i32 0
357  %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5)
358  %7 = extractelement <4 x float> %6, i32 3
359  ret float %7
360}
361
362define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) {
363; CHECK-LABEL: @test_cmp_ss(
364; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %b, i8 0)
365; CHECK-NEXT:    ret <4 x float> [[TMP1]]
366;
367  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
368  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
369  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
370  %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0)
371  ret <4 x float> %4
372}
373
374define float @test_cmp_ss_0(float %a, float %b) {
375; CHECK-LABEL: @test_cmp_ss_0(
376; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
377; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
378; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i8 0)
379; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
380; CHECK-NEXT:    ret float [[R]]
381;
382  %1 = insertelement <4 x float> undef, float %a, i32 0
383  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
384  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
385  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
386  %5 = insertelement <4 x float> undef, float %b, i32 0
387  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
388  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
389  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
390  %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0)
391  %r = extractelement <4 x float> %9, i32 0
392  ret float %r
393}
394
395define float @test_cmp_ss_1(float %a, float %b) {
396; CHECK-LABEL: @test_cmp_ss_1(
397; CHECK-NEXT:    ret float 1.000000e+00
398;
399  %1 = insertelement <4 x float> undef, float %a, i32 0
400  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
401  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
402  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
403  %5 = insertelement <4 x float> undef, float %b, i32 0
404  %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0)
405  %7 = extractelement <4 x float> %6, i32 1
406  ret float %7
407}
408
409define i32 @test_comieq_ss_0(float %a, float %b) {
410; CHECK-LABEL: @test_comieq_ss_0(
411; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
412; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
413; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
414; CHECK-NEXT:    ret i32 [[TMP3]]
415;
416  %1 = insertelement <4 x float> undef, float %a, i32 0
417  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
418  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
419  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
420  %5 = insertelement <4 x float> undef, float %b, i32 0
421  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
422  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
423  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
424  %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8)
425  ret i32 %9
426}
427
428define i32 @test_comige_ss_0(float %a, float %b) {
429; CHECK-LABEL: @test_comige_ss_0(
430; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
431; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
432; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
433; CHECK-NEXT:    ret i32 [[TMP3]]
434;
435  %1 = insertelement <4 x float> undef, float %a, i32 0
436  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
437  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
438  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
439  %5 = insertelement <4 x float> undef, float %b, i32 0
440  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
441  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
442  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
443  %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8)
444  ret i32 %9
445}
446
447define i32 @test_comigt_ss_0(float %a, float %b) {
448; CHECK-LABEL: @test_comigt_ss_0(
449; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
450; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
451; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
452; CHECK-NEXT:    ret i32 [[TMP3]]
453;
454  %1 = insertelement <4 x float> undef, float %a, i32 0
455  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
456  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
457  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
458  %5 = insertelement <4 x float> undef, float %b, i32 0
459  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
460  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
461  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
462  %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8)
463  ret i32 %9
464}
465
466define i32 @test_comile_ss_0(float %a, float %b) {
467; CHECK-LABEL: @test_comile_ss_0(
468; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
469; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
470; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
471; CHECK-NEXT:    ret i32 [[TMP3]]
472;
473  %1 = insertelement <4 x float> undef, float %a, i32 0
474  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
475  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
476  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
477  %5 = insertelement <4 x float> undef, float %b, i32 0
478  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
479  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
480  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
481  %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8)
482  ret i32 %9
483}
484
485define i32 @test_comilt_ss_0(float %a, float %b) {
486; CHECK-LABEL: @test_comilt_ss_0(
487; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
488; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
489; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
490; CHECK-NEXT:    ret i32 [[TMP3]]
491;
492  %1 = insertelement <4 x float> undef, float %a, i32 0
493  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
494  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
495  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
496  %5 = insertelement <4 x float> undef, float %b, i32 0
497  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
498  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
499  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
500  %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8)
501  ret i32 %9
502}
503
504define i32 @test_comineq_ss_0(float %a, float %b) {
505; CHECK-LABEL: @test_comineq_ss_0(
506; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
507; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
508; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
509; CHECK-NEXT:    ret i32 [[TMP3]]
510;
511  %1 = insertelement <4 x float> undef, float %a, i32 0
512  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
513  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
514  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
515  %5 = insertelement <4 x float> undef, float %b, i32 0
516  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
517  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
518  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
519  %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8)
520  ret i32 %9
521}
522
523define i32 @test_ucomieq_ss_0(float %a, float %b) {
524; CHECK-LABEL: @test_ucomieq_ss_0(
525; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
526; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
527; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
528; CHECK-NEXT:    ret i32 [[TMP3]]
529;
530  %1 = insertelement <4 x float> undef, float %a, i32 0
531  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
532  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
533  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
534  %5 = insertelement <4 x float> undef, float %b, i32 0
535  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
536  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
537  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
538  %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8)
539  ret i32 %9
540}
541
542define i32 @test_ucomige_ss_0(float %a, float %b) {
543; CHECK-LABEL: @test_ucomige_ss_0(
544; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
545; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
546; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
547; CHECK-NEXT:    ret i32 [[TMP3]]
548;
549  %1 = insertelement <4 x float> undef, float %a, i32 0
550  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
551  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
552  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
553  %5 = insertelement <4 x float> undef, float %b, i32 0
554  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
555  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
556  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
557  %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8)
558  ret i32 %9
559}
560
561define i32 @test_ucomigt_ss_0(float %a, float %b) {
562; CHECK-LABEL: @test_ucomigt_ss_0(
563; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
564; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
565; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
566; CHECK-NEXT:    ret i32 [[TMP3]]
567;
568  %1 = insertelement <4 x float> undef, float %a, i32 0
569  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
570  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
571  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
572  %5 = insertelement <4 x float> undef, float %b, i32 0
573  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
574  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
575  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
576  %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8)
577  ret i32 %9
578}
579
580define i32 @test_ucomile_ss_0(float %a, float %b) {
581; CHECK-LABEL: @test_ucomile_ss_0(
582; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
583; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
584; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
585; CHECK-NEXT:    ret i32 [[TMP3]]
586;
587  %1 = insertelement <4 x float> undef, float %a, i32 0
588  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
589  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
590  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
591  %5 = insertelement <4 x float> undef, float %b, i32 0
592  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
593  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
594  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
595  %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8)
596  ret i32 %9
597}
598
599define i32 @test_ucomilt_ss_0(float %a, float %b) {
600; CHECK-LABEL: @test_ucomilt_ss_0(
601; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
602; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
603; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
604; CHECK-NEXT:    ret i32 [[TMP3]]
605;
606  %1 = insertelement <4 x float> undef, float %a, i32 0
607  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
608  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
609  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
610  %5 = insertelement <4 x float> undef, float %b, i32 0
611  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
612  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
613  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
614  %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8)
615  ret i32 %9
616}
617
618define i32 @test_ucomineq_ss_0(float %a, float %b) {
619; CHECK-LABEL: @test_ucomineq_ss_0(
620; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
621; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
622; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
623; CHECK-NEXT:    ret i32 [[TMP3]]
624;
625  %1 = insertelement <4 x float> undef, float %a, i32 0
626  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
627  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
628  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
629  %5 = insertelement <4 x float> undef, float %b, i32 0
630  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
631  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
632  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
633  %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8)
634  ret i32 %9
635}
636
637declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
638declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>)
639declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
640
641declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>)
642declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
643declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
644declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>)
645declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
646declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
647declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8)
648
649declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>)
650declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>)
651declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>)
652declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>)
653declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>)
654declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>)
655
656declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>)
657declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>)
658declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>)
659declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>)
660declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>)
661declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>)
662