• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s
2; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
3; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s
4; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
5
6declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
7
8define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
9; CHECK: phaddw
10entry:
11  %0 = bitcast <1 x i64> %b to <4 x i16>
12  %1 = bitcast <1 x i64> %a to <4 x i16>
13  %2 = bitcast <4 x i16> %1 to x86_mmx
14  %3 = bitcast <4 x i16> %0 to x86_mmx
15  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
16  %5 = bitcast x86_mmx %4 to <4 x i16>
17  %6 = bitcast <4 x i16> %5 to <1 x i64>
18  %7 = extractelement <1 x i64> %6, i32 0
19  ret i64 %7
20}
21
22declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
23
24define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
25; CHECK: pcmpgtd
26entry:
27  %0 = bitcast <1 x i64> %b to <2 x i32>
28  %1 = bitcast <1 x i64> %a to <2 x i32>
29  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
30  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
31  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
32  %3 = bitcast x86_mmx %2 to <2 x i32>
33  %4 = bitcast <2 x i32> %3 to <1 x i64>
34  %5 = extractelement <1 x i64> %4, i32 0
35  ret i64 %5
36}
37
38declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
39
40define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
41; CHECK: pcmpgtw
42entry:
43  %0 = bitcast <1 x i64> %b to <4 x i16>
44  %1 = bitcast <1 x i64> %a to <4 x i16>
45  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
46  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
47  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
48  %3 = bitcast x86_mmx %2 to <4 x i16>
49  %4 = bitcast <4 x i16> %3 to <1 x i64>
50  %5 = extractelement <1 x i64> %4, i32 0
51  ret i64 %5
52}
53
54declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
55
56define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
57; CHECK: pcmpgtb
58entry:
59  %0 = bitcast <1 x i64> %b to <8 x i8>
60  %1 = bitcast <1 x i64> %a to <8 x i8>
61  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
62  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
63  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
64  %3 = bitcast x86_mmx %2 to <8 x i8>
65  %4 = bitcast <8 x i8> %3 to <1 x i64>
66  %5 = extractelement <1 x i64> %4, i32 0
67  ret i64 %5
68}
69
70declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
71
72define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
73; CHECK: pcmpeqd
74entry:
75  %0 = bitcast <1 x i64> %b to <2 x i32>
76  %1 = bitcast <1 x i64> %a to <2 x i32>
77  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
78  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
79  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
80  %3 = bitcast x86_mmx %2 to <2 x i32>
81  %4 = bitcast <2 x i32> %3 to <1 x i64>
82  %5 = extractelement <1 x i64> %4, i32 0
83  ret i64 %5
84}
85
86declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
87
88define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
89; CHECK: pcmpeqw
90entry:
91  %0 = bitcast <1 x i64> %b to <4 x i16>
92  %1 = bitcast <1 x i64> %a to <4 x i16>
93  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
94  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
95  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
96  %3 = bitcast x86_mmx %2 to <4 x i16>
97  %4 = bitcast <4 x i16> %3 to <1 x i64>
98  %5 = extractelement <1 x i64> %4, i32 0
99  ret i64 %5
100}
101
102declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
103
104define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
105; CHECK: pcmpeqb
106entry:
107  %0 = bitcast <1 x i64> %b to <8 x i8>
108  %1 = bitcast <1 x i64> %a to <8 x i8>
109  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
110  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
111  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
112  %3 = bitcast x86_mmx %2 to <8 x i8>
113  %4 = bitcast <8 x i8> %3 to <1 x i64>
114  %5 = extractelement <1 x i64> %4, i32 0
115  ret i64 %5
116}
117
118declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
119
120define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
121; CHECK: punpckldq
122entry:
123  %0 = bitcast <1 x i64> %b to <2 x i32>
124  %1 = bitcast <1 x i64> %a to <2 x i32>
125  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
126  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
127  %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
128  %3 = bitcast x86_mmx %2 to <2 x i32>
129  %4 = bitcast <2 x i32> %3 to <1 x i64>
130  %5 = extractelement <1 x i64> %4, i32 0
131  ret i64 %5
132}
133
134declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
135
136define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
137; CHECK: punpcklwd
138entry:
139  %0 = bitcast <1 x i64> %b to <4 x i16>
140  %1 = bitcast <1 x i64> %a to <4 x i16>
141  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
142  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
143  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
144  %3 = bitcast x86_mmx %2 to <4 x i16>
145  %4 = bitcast <4 x i16> %3 to <1 x i64>
146  %5 = extractelement <1 x i64> %4, i32 0
147  ret i64 %5
148}
149
150declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
151
152define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
153; CHECK: punpcklbw
154entry:
155  %0 = bitcast <1 x i64> %b to <8 x i8>
156  %1 = bitcast <1 x i64> %a to <8 x i8>
157  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
158  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
159  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
160  %3 = bitcast x86_mmx %2 to <8 x i8>
161  %4 = bitcast <8 x i8> %3 to <1 x i64>
162  %5 = extractelement <1 x i64> %4, i32 0
163  ret i64 %5
164}
165
166declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
167
168define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
169; CHECK: punpckhdq
170entry:
171  %0 = bitcast <1 x i64> %b to <2 x i32>
172  %1 = bitcast <1 x i64> %a to <2 x i32>
173  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
174  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
175  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
176  %3 = bitcast x86_mmx %2 to <2 x i32>
177  %4 = bitcast <2 x i32> %3 to <1 x i64>
178  %5 = extractelement <1 x i64> %4, i32 0
179  ret i64 %5
180}
181
182declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
183
184define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
185; CHECK: punpckhwd
186entry:
187  %0 = bitcast <1 x i64> %b to <4 x i16>
188  %1 = bitcast <1 x i64> %a to <4 x i16>
189  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
190  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
191  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
192  %3 = bitcast x86_mmx %2 to <4 x i16>
193  %4 = bitcast <4 x i16> %3 to <1 x i64>
194  %5 = extractelement <1 x i64> %4, i32 0
195  ret i64 %5
196}
197
198declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
199
200define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
201; CHECK: punpckhbw
202entry:
203  %0 = bitcast <1 x i64> %b to <8 x i8>
204  %1 = bitcast <1 x i64> %a to <8 x i8>
205  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
206  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
207  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
208  %3 = bitcast x86_mmx %2 to <8 x i8>
209  %4 = bitcast <8 x i8> %3 to <1 x i64>
210  %5 = extractelement <1 x i64> %4, i32 0
211  ret i64 %5
212}
213
214declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
215
216define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
217; CHECK: packuswb
218entry:
219  %0 = bitcast <1 x i64> %b to <4 x i16>
220  %1 = bitcast <1 x i64> %a to <4 x i16>
221  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
222  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
223  %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
224  %3 = bitcast x86_mmx %2 to <8 x i8>
225  %4 = bitcast <8 x i8> %3 to <1 x i64>
226  %5 = extractelement <1 x i64> %4, i32 0
227  ret i64 %5
228}
229
230declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
231
232define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
233; CHECK: packssdw
234entry:
235  %0 = bitcast <1 x i64> %b to <2 x i32>
236  %1 = bitcast <1 x i64> %a to <2 x i32>
237  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
238  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
239  %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
240  %3 = bitcast x86_mmx %2 to <4 x i16>
241  %4 = bitcast <4 x i16> %3 to <1 x i64>
242  %5 = extractelement <1 x i64> %4, i32 0
243  ret i64 %5
244}
245
246declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
247
248define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
249; CHECK: packsswb
250entry:
251  %0 = bitcast <1 x i64> %b to <4 x i16>
252  %1 = bitcast <1 x i64> %a to <4 x i16>
253  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
254  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
255  %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
256  %3 = bitcast x86_mmx %2 to <8 x i8>
257  %4 = bitcast <8 x i8> %3 to <1 x i64>
258  %5 = extractelement <1 x i64> %4, i32 0
259  ret i64 %5
260}
261
262declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
263
264define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
265; CHECK: psrad
266entry:
267  %0 = bitcast <1 x i64> %a to <2 x i32>
268  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
269  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
270  %2 = bitcast x86_mmx %1 to <2 x i32>
271  %3 = bitcast <2 x i32> %2 to <1 x i64>
272  %4 = extractelement <1 x i64> %3, i32 0
273  ret i64 %4
274}
275
276declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
277
278define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
279; CHECK: psraw
280entry:
281  %0 = bitcast <1 x i64> %a to <4 x i16>
282  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
283  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
284  %2 = bitcast x86_mmx %1 to <4 x i16>
285  %3 = bitcast <4 x i16> %2 to <1 x i64>
286  %4 = extractelement <1 x i64> %3, i32 0
287  ret i64 %4
288}
289
290declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
291
292define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
293; CHECK: psrlq
294entry:
295  %0 = extractelement <1 x i64> %a, i32 0
296  %mmx_var.i = bitcast i64 %0 to x86_mmx
297  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
298  %2 = bitcast x86_mmx %1 to i64
299  ret i64 %2
300}
301
302declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
303
304define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
305; CHECK: psrld
306entry:
307  %0 = bitcast <1 x i64> %a to <2 x i32>
308  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
309  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
310  %2 = bitcast x86_mmx %1 to <2 x i32>
311  %3 = bitcast <2 x i32> %2 to <1 x i64>
312  %4 = extractelement <1 x i64> %3, i32 0
313  ret i64 %4
314}
315
316declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
317
318define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
319; CHECK: psrlw
320entry:
321  %0 = bitcast <1 x i64> %a to <4 x i16>
322  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
323  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
324  %2 = bitcast x86_mmx %1 to <4 x i16>
325  %3 = bitcast <4 x i16> %2 to <1 x i64>
326  %4 = extractelement <1 x i64> %3, i32 0
327  ret i64 %4
328}
329
330declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
331
332define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
333; CHECK: psllq
334entry:
335  %0 = extractelement <1 x i64> %a, i32 0
336  %mmx_var.i = bitcast i64 %0 to x86_mmx
337  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
338  %2 = bitcast x86_mmx %1 to i64
339  ret i64 %2
340}
341
342declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
343
344define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
345; CHECK: pslld
346entry:
347  %0 = bitcast <1 x i64> %a to <2 x i32>
348  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
349  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
350  %2 = bitcast x86_mmx %1 to <2 x i32>
351  %3 = bitcast <2 x i32> %2 to <1 x i64>
352  %4 = extractelement <1 x i64> %3, i32 0
353  ret i64 %4
354}
355
356declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
357
358define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
359; CHECK: psllw
360entry:
361  %0 = bitcast <1 x i64> %a to <4 x i16>
362  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
363  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
364  %2 = bitcast x86_mmx %1 to <4 x i16>
365  %3 = bitcast <4 x i16> %2 to <1 x i64>
366  %4 = extractelement <1 x i64> %3, i32 0
367  ret i64 %4
368}
369
370declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
371
372define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
373; CHECK: psrad
374entry:
375  %0 = bitcast <1 x i64> %a to <2 x i32>
376  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
377  %1 = extractelement <1 x i64> %b, i32 0
378  %mmx_var1.i = bitcast i64 %1 to x86_mmx
379  %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
380  %3 = bitcast x86_mmx %2 to <2 x i32>
381  %4 = bitcast <2 x i32> %3 to <1 x i64>
382  %5 = extractelement <1 x i64> %4, i32 0
383  ret i64 %5
384}
385
386declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
387
388define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
389; CHECK: psraw
390entry:
391  %0 = bitcast <1 x i64> %a to <4 x i16>
392  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
393  %1 = extractelement <1 x i64> %b, i32 0
394  %mmx_var1.i = bitcast i64 %1 to x86_mmx
395  %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
396  %3 = bitcast x86_mmx %2 to <4 x i16>
397  %4 = bitcast <4 x i16> %3 to <1 x i64>
398  %5 = extractelement <1 x i64> %4, i32 0
399  ret i64 %5
400}
401
402declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
403
404define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
405; CHECK: psrlq
406entry:
407  %0 = extractelement <1 x i64> %a, i32 0
408  %mmx_var.i = bitcast i64 %0 to x86_mmx
409  %1 = extractelement <1 x i64> %b, i32 0
410  %mmx_var1.i = bitcast i64 %1 to x86_mmx
411  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
412  %3 = bitcast x86_mmx %2 to i64
413  ret i64 %3
414}
415
416declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
417
418define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
419; CHECK: psrld
420entry:
421  %0 = bitcast <1 x i64> %a to <2 x i32>
422  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
423  %1 = extractelement <1 x i64> %b, i32 0
424  %mmx_var1.i = bitcast i64 %1 to x86_mmx
425  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
426  %3 = bitcast x86_mmx %2 to <2 x i32>
427  %4 = bitcast <2 x i32> %3 to <1 x i64>
428  %5 = extractelement <1 x i64> %4, i32 0
429  ret i64 %5
430}
431
432declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
433
434define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
435; CHECK: psrlw
436entry:
437  %0 = bitcast <1 x i64> %a to <4 x i16>
438  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
439  %1 = extractelement <1 x i64> %b, i32 0
440  %mmx_var1.i = bitcast i64 %1 to x86_mmx
441  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
442  %3 = bitcast x86_mmx %2 to <4 x i16>
443  %4 = bitcast <4 x i16> %3 to <1 x i64>
444  %5 = extractelement <1 x i64> %4, i32 0
445  ret i64 %5
446}
447
448declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
449
450define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
451; CHECK: psllq
452entry:
453  %0 = extractelement <1 x i64> %a, i32 0
454  %mmx_var.i = bitcast i64 %0 to x86_mmx
455  %1 = extractelement <1 x i64> %b, i32 0
456  %mmx_var1.i = bitcast i64 %1 to x86_mmx
457  %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
458  %3 = bitcast x86_mmx %2 to i64
459  ret i64 %3
460}
461
462declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
463
464define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
465; CHECK: pslld
466entry:
467  %0 = bitcast <1 x i64> %a to <2 x i32>
468  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
469  %1 = extractelement <1 x i64> %b, i32 0
470  %mmx_var1.i = bitcast i64 %1 to x86_mmx
471  %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
472  %3 = bitcast x86_mmx %2 to <2 x i32>
473  %4 = bitcast <2 x i32> %3 to <1 x i64>
474  %5 = extractelement <1 x i64> %4, i32 0
475  ret i64 %5
476}
477
478declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
479
480define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
481; CHECK: psllw
482entry:
483  %0 = bitcast <1 x i64> %a to <4 x i16>
484  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
485  %1 = extractelement <1 x i64> %b, i32 0
486  %mmx_var1.i = bitcast i64 %1 to x86_mmx
487  %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
488  %3 = bitcast x86_mmx %2 to <4 x i16>
489  %4 = bitcast <4 x i16> %3 to <1 x i64>
490  %5 = extractelement <1 x i64> %4, i32 0
491  ret i64 %5
492}
493
494declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
495
496define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
497; CHECK: pxor
498entry:
499  %0 = bitcast <1 x i64> %b to <2 x i32>
500  %1 = bitcast <1 x i64> %a to <2 x i32>
501  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
502  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
503  %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
504  %3 = bitcast x86_mmx %2 to <2 x i32>
505  %4 = bitcast <2 x i32> %3 to <1 x i64>
506  %5 = extractelement <1 x i64> %4, i32 0
507  ret i64 %5
508}
509
510declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
511
512define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
513; CHECK: por
514entry:
515  %0 = bitcast <1 x i64> %b to <2 x i32>
516  %1 = bitcast <1 x i64> %a to <2 x i32>
517  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
518  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
519  %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
520  %3 = bitcast x86_mmx %2 to <2 x i32>
521  %4 = bitcast <2 x i32> %3 to <1 x i64>
522  %5 = extractelement <1 x i64> %4, i32 0
523  ret i64 %5
524}
525
526declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
527
528define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
529; CHECK: pandn
530entry:
531  %0 = bitcast <1 x i64> %b to <2 x i32>
532  %1 = bitcast <1 x i64> %a to <2 x i32>
533  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
534  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
535  %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
536  %3 = bitcast x86_mmx %2 to <2 x i32>
537  %4 = bitcast <2 x i32> %3 to <1 x i64>
538  %5 = extractelement <1 x i64> %4, i32 0
539  ret i64 %5
540}
541
542declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
543
544define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
545; CHECK: pand
546entry:
547  %0 = bitcast <1 x i64> %b to <2 x i32>
548  %1 = bitcast <1 x i64> %a to <2 x i32>
549  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
550  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
551  %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
552  %3 = bitcast x86_mmx %2 to <2 x i32>
553  %4 = bitcast <2 x i32> %3 to <1 x i64>
554  %5 = extractelement <1 x i64> %4, i32 0
555  ret i64 %5
556}
557
558declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
559
560define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
561; CHECK: pmullw
562entry:
563  %0 = bitcast <1 x i64> %b to <4 x i16>
564  %1 = bitcast <1 x i64> %a to <4 x i16>
565  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
566  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
567  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
568  %3 = bitcast x86_mmx %2 to <4 x i16>
569  %4 = bitcast <4 x i16> %3 to <1 x i64>
570  %5 = extractelement <1 x i64> %4, i32 0
571  ret i64 %5
572}
573
574define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
575; CHECK: pmullw
576entry:
577  %0 = bitcast <1 x i64> %b to <4 x i16>
578  %1 = bitcast <1 x i64> %a to <4 x i16>
579  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
580  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
581  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
582  %3 = bitcast x86_mmx %2 to <4 x i16>
583  %4 = bitcast <4 x i16> %3 to <1 x i64>
584  %5 = extractelement <1 x i64> %4, i32 0
585  ret i64 %5
586}
587
588declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
589
590define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
591; CHECK: pmulhw
592entry:
593  %0 = bitcast <1 x i64> %b to <4 x i16>
594  %1 = bitcast <1 x i64> %a to <4 x i16>
595  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
596  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
597  %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
598  %3 = bitcast x86_mmx %2 to <4 x i16>
599  %4 = bitcast <4 x i16> %3 to <1 x i64>
600  %5 = extractelement <1 x i64> %4, i32 0
601  ret i64 %5
602}
603
604declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
605
606define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
607; CHECK: pmaddwd
608entry:
609  %0 = bitcast <1 x i64> %b to <4 x i16>
610  %1 = bitcast <1 x i64> %a to <4 x i16>
611  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
612  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
613  %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
614  %3 = bitcast x86_mmx %2 to <2 x i32>
615  %4 = bitcast <2 x i32> %3 to <1 x i64>
616  %5 = extractelement <1 x i64> %4, i32 0
617  ret i64 %5
618}
619
620declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
621
622define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
623; CHECK: psubusw
624entry:
625  %0 = bitcast <1 x i64> %b to <4 x i16>
626  %1 = bitcast <1 x i64> %a to <4 x i16>
627  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
628  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
629  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
630  %3 = bitcast x86_mmx %2 to <4 x i16>
631  %4 = bitcast <4 x i16> %3 to <1 x i64>
632  %5 = extractelement <1 x i64> %4, i32 0
633  ret i64 %5
634}
635
636declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
637
638define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
639; CHECK: psubusb
640entry:
641  %0 = bitcast <1 x i64> %b to <8 x i8>
642  %1 = bitcast <1 x i64> %a to <8 x i8>
643  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
644  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
645  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
646  %3 = bitcast x86_mmx %2 to <8 x i8>
647  %4 = bitcast <8 x i8> %3 to <1 x i64>
648  %5 = extractelement <1 x i64> %4, i32 0
649  ret i64 %5
650}
651
652declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
653
654define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
655; CHECK: psubsw
656entry:
657  %0 = bitcast <1 x i64> %b to <4 x i16>
658  %1 = bitcast <1 x i64> %a to <4 x i16>
659  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
660  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
661  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
662  %3 = bitcast x86_mmx %2 to <4 x i16>
663  %4 = bitcast <4 x i16> %3 to <1 x i64>
664  %5 = extractelement <1 x i64> %4, i32 0
665  ret i64 %5
666}
667
668declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
669
670define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
671; CHECK: psubsb
672entry:
673  %0 = bitcast <1 x i64> %b to <8 x i8>
674  %1 = bitcast <1 x i64> %a to <8 x i8>
675  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
676  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
677  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
678  %3 = bitcast x86_mmx %2 to <8 x i8>
679  %4 = bitcast <8 x i8> %3 to <1 x i64>
680  %5 = extractelement <1 x i64> %4, i32 0
681  ret i64 %5
682}
683
684define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
685; CHECK: psubq
686entry:
687  %0 = extractelement <1 x i64> %a, i32 0
688  %mmx_var = bitcast i64 %0 to x86_mmx
689  %1 = extractelement <1 x i64> %b, i32 0
690  %mmx_var1 = bitcast i64 %1 to x86_mmx
691  %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
692  %3 = bitcast x86_mmx %2 to i64
693  ret i64 %3
694}
695
696declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
697
698declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
699
700define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
701; CHECK: psubd
702entry:
703  %0 = bitcast <1 x i64> %b to <2 x i32>
704  %1 = bitcast <1 x i64> %a to <2 x i32>
705  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
706  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
707  %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
708  %3 = bitcast x86_mmx %2 to <2 x i32>
709  %4 = bitcast <2 x i32> %3 to <1 x i64>
710  %5 = extractelement <1 x i64> %4, i32 0
711  ret i64 %5
712}
713
714declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
715
716define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
717; CHECK: psubw
718entry:
719  %0 = bitcast <1 x i64> %b to <4 x i16>
720  %1 = bitcast <1 x i64> %a to <4 x i16>
721  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
722  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
723  %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
724  %3 = bitcast x86_mmx %2 to <4 x i16>
725  %4 = bitcast <4 x i16> %3 to <1 x i64>
726  %5 = extractelement <1 x i64> %4, i32 0
727  ret i64 %5
728}
729
730declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
731
732define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
733; CHECK: psubb
734entry:
735  %0 = bitcast <1 x i64> %b to <8 x i8>
736  %1 = bitcast <1 x i64> %a to <8 x i8>
737  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
738  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
739  %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
740  %3 = bitcast x86_mmx %2 to <8 x i8>
741  %4 = bitcast <8 x i8> %3 to <1 x i64>
742  %5 = extractelement <1 x i64> %4, i32 0
743  ret i64 %5
744}
745
746declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
747
748define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
749; CHECK: paddusw
750entry:
751  %0 = bitcast <1 x i64> %b to <4 x i16>
752  %1 = bitcast <1 x i64> %a to <4 x i16>
753  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
754  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
755  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
756  %3 = bitcast x86_mmx %2 to <4 x i16>
757  %4 = bitcast <4 x i16> %3 to <1 x i64>
758  %5 = extractelement <1 x i64> %4, i32 0
759  ret i64 %5
760}
761
762declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
763
764define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
765; CHECK: paddusb
766entry:
767  %0 = bitcast <1 x i64> %b to <8 x i8>
768  %1 = bitcast <1 x i64> %a to <8 x i8>
769  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
770  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
771  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
772  %3 = bitcast x86_mmx %2 to <8 x i8>
773  %4 = bitcast <8 x i8> %3 to <1 x i64>
774  %5 = extractelement <1 x i64> %4, i32 0
775  ret i64 %5
776}
777
778declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
779
780define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
781; CHECK: paddsw
782entry:
783  %0 = bitcast <1 x i64> %b to <4 x i16>
784  %1 = bitcast <1 x i64> %a to <4 x i16>
785  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
786  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
787  %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
788  %3 = bitcast x86_mmx %2 to <4 x i16>
789  %4 = bitcast <4 x i16> %3 to <1 x i64>
790  %5 = extractelement <1 x i64> %4, i32 0
791  ret i64 %5
792}
793
794declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
795
796define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
797; CHECK: paddsb
798entry:
799  %0 = bitcast <1 x i64> %b to <8 x i8>
800  %1 = bitcast <1 x i64> %a to <8 x i8>
801  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
802  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
803  %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
804  %3 = bitcast x86_mmx %2 to <8 x i8>
805  %4 = bitcast <8 x i8> %3 to <1 x i64>
806  %5 = extractelement <1 x i64> %4, i32 0
807  ret i64 %5
808}
809
810declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
811
812define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
813; CHECK: paddq
814entry:
815  %0 = extractelement <1 x i64> %a, i32 0
816  %mmx_var = bitcast i64 %0 to x86_mmx
817  %1 = extractelement <1 x i64> %b, i32 0
818  %mmx_var1 = bitcast i64 %1 to x86_mmx
819  %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
820  %3 = bitcast x86_mmx %2 to i64
821  ret i64 %3
822}
823
824declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
825
826define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
827; CHECK: paddd
828entry:
829  %0 = bitcast <1 x i64> %b to <2 x i32>
830  %1 = bitcast <1 x i64> %a to <2 x i32>
831  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
832  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
833  %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
834  %3 = bitcast x86_mmx %2 to <2 x i32>
835  %4 = bitcast <2 x i32> %3 to <1 x i64>
836  %5 = extractelement <1 x i64> %4, i32 0
837  ret i64 %5
838}
839
840declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
841
842define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
843; CHECK: paddw
844entry:
845  %0 = bitcast <1 x i64> %b to <4 x i16>
846  %1 = bitcast <1 x i64> %a to <4 x i16>
847  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
848  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
849  %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
850  %3 = bitcast x86_mmx %2 to <4 x i16>
851  %4 = bitcast <4 x i16> %3 to <1 x i64>
852  %5 = extractelement <1 x i64> %4, i32 0
853  ret i64 %5
854}
855
856declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
857
858define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
859; CHECK: paddb
860entry:
861  %0 = bitcast <1 x i64> %b to <8 x i8>
862  %1 = bitcast <1 x i64> %a to <8 x i8>
863  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
864  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
865  %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
866  %3 = bitcast x86_mmx %2 to <8 x i8>
867  %4 = bitcast <8 x i8> %3 to <1 x i64>
868  %5 = extractelement <1 x i64> %4, i32 0
869  ret i64 %5
870}
871
872declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
873
874define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
875; CHECK: psadbw
876entry:
877  %0 = bitcast <1 x i64> %b to <8 x i8>
878  %1 = bitcast <1 x i64> %a to <8 x i8>
879  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
880  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
881  %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
882  %3 = bitcast x86_mmx %2 to i64
883  ret i64 %3
884}
885
886declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
887
888define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
889; CHECK: pminsw
890entry:
891  %0 = bitcast <1 x i64> %b to <4 x i16>
892  %1 = bitcast <1 x i64> %a to <4 x i16>
893  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
894  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
895  %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
896  %3 = bitcast x86_mmx %2 to <4 x i16>
897  %4 = bitcast <4 x i16> %3 to <1 x i64>
898  %5 = extractelement <1 x i64> %4, i32 0
899  ret i64 %5
900}
901
902declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
903
904define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
905; CHECK: pminub
906entry:
907  %0 = bitcast <1 x i64> %b to <8 x i8>
908  %1 = bitcast <1 x i64> %a to <8 x i8>
909  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
910  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
911  %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
912  %3 = bitcast x86_mmx %2 to <8 x i8>
913  %4 = bitcast <8 x i8> %3 to <1 x i64>
914  %5 = extractelement <1 x i64> %4, i32 0
915  ret i64 %5
916}
917
918declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
919
920define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
921; CHECK: pmaxsw
922entry:
923  %0 = bitcast <1 x i64> %b to <4 x i16>
924  %1 = bitcast <1 x i64> %a to <4 x i16>
925  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
926  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
927  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
928  %3 = bitcast x86_mmx %2 to <4 x i16>
929  %4 = bitcast <4 x i16> %3 to <1 x i64>
930  %5 = extractelement <1 x i64> %4, i32 0
931  ret i64 %5
932}
933
934declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
935
936define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
937; CHECK: pmaxub
938entry:
939  %0 = bitcast <1 x i64> %b to <8 x i8>
940  %1 = bitcast <1 x i64> %a to <8 x i8>
941  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
942  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
943  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
944  %3 = bitcast x86_mmx %2 to <8 x i8>
945  %4 = bitcast <8 x i8> %3 to <1 x i64>
946  %5 = extractelement <1 x i64> %4, i32 0
947  ret i64 %5
948}
949
950declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
951
952define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
953; CHECK: pavgw
954entry:
955  %0 = bitcast <1 x i64> %b to <4 x i16>
956  %1 = bitcast <1 x i64> %a to <4 x i16>
957  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
958  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
959  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
960  %3 = bitcast x86_mmx %2 to <4 x i16>
961  %4 = bitcast <4 x i16> %3 to <1 x i64>
962  %5 = extractelement <1 x i64> %4, i32 0
963  ret i64 %5
964}
965
966declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
967
968define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
969; CHECK: pavgb
970entry:
971  %0 = bitcast <1 x i64> %b to <8 x i8>
972  %1 = bitcast <1 x i64> %a to <8 x i8>
973  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
974  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
975  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
976  %3 = bitcast x86_mmx %2 to <8 x i8>
977  %4 = bitcast <8 x i8> %3 to <1 x i64>
978  %5 = extractelement <1 x i64> %4, i32 0
979  ret i64 %5
980}
981
982declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
983
984define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
985; CHECK: movntq
986entry:
987  %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
988  %0 = extractelement <1 x i64> %a, i32 0
989  %mmx_var.i = bitcast i64 %0 to x86_mmx
990  tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
991  ret void
992}
993
994declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
995
996define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
997; CHECK: pmovmskb
998entry:
999  %0 = bitcast <1 x i64> %a to <8 x i8>
1000  %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
1001  %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
1002  ret i32 %1
1003}
1004
1005declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
1006
1007define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
1008; CHECK: maskmovq
1009entry:
1010  %0 = bitcast <1 x i64> %n to <8 x i8>
1011  %1 = bitcast <1 x i64> %d to <8 x i8>
1012  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1013  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1014  tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
1015  ret void
1016}
1017
1018declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
1019
1020define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1021; CHECK: pmulhuw
1022entry:
1023  %0 = bitcast <1 x i64> %b to <4 x i16>
1024  %1 = bitcast <1 x i64> %a to <4 x i16>
1025  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1026  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1027  %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1028  %3 = bitcast x86_mmx %2 to <4 x i16>
1029  %4 = bitcast <4 x i16> %3 to <1 x i64>
1030  %5 = extractelement <1 x i64> %4, i32 0
1031  ret i64 %5
1032}
1033
1034declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
1035
1036define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
1037; CHECK: pshufw
1038entry:
1039  %0 = bitcast <1 x i64> %a to <4 x i16>
1040  %1 = bitcast <4 x i16> %0 to x86_mmx
1041  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
1042  %3 = bitcast x86_mmx %2 to <4 x i16>
1043  %4 = bitcast <4 x i16> %3 to <1 x i64>
1044  %5 = extractelement <1 x i64> %4, i32 0
1045  ret i64 %5
1046}
1047
1048define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp {
1049; CHECK: test21_2
1050; CHECK: pshufw
1051; CHECK: movd
1052entry:
1053  %0 = bitcast <1 x i64> %a to <4 x i16>
1054  %1 = bitcast <4 x i16> %0 to x86_mmx
1055  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
1056  %3 = bitcast x86_mmx %2 to <4 x i16>
1057  %4 = bitcast <4 x i16> %3 to <2 x i32>
1058  %5 = extractelement <2 x i32> %4, i32 0
1059  ret i32 %5
1060}
1061
1062declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
1063
1064define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1065; CHECK: pmuludq
1066entry:
1067  %0 = bitcast <1 x i64> %b to <2 x i32>
1068  %1 = bitcast <1 x i64> %a to <2 x i32>
1069  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
1070  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
1071  %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1072  %3 = bitcast x86_mmx %2 to i64
1073  ret i64 %3
1074}
1075
1076declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
1077
1078define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
1079; CHECK: cvtpi2pd
1080entry:
1081  %0 = bitcast <1 x i64> %a to <2 x i32>
1082  %1 = bitcast <2 x i32> %0 to x86_mmx
1083  %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
1084  ret <2 x double> %2
1085}
1086
1087declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
1088
1089define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
1090; CHECK: cvttpd2pi
1091entry:
1092  %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
1093  %1 = bitcast x86_mmx %0 to <2 x i32>
1094  %2 = bitcast <2 x i32> %1 to <1 x i64>
1095  %3 = extractelement <1 x i64> %2, i32 0
1096  ret i64 %3
1097}
1098
1099declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
1100
1101define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
1102; CHECK: cvtpd2pi
1103entry:
1104  %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
1105  %1 = bitcast x86_mmx %0 to <2 x i32>
1106  %2 = bitcast <2 x i32> %1 to <1 x i64>
1107  %3 = extractelement <1 x i64> %2, i32 0
1108  ret i64 %3
1109}
1110
1111declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
1112
1113define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1114; CHECK: palignr
1115entry:
1116  %0 = extractelement <1 x i64> %a, i32 0
1117  %mmx_var = bitcast i64 %0 to x86_mmx
1118  %1 = extractelement <1 x i64> %b, i32 0
1119  %mmx_var1 = bitcast i64 %1 to x86_mmx
1120  %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
1121  %3 = bitcast x86_mmx %2 to i64
1122  ret i64 %3
1123}
1124
1125declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
1126
1127define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
1128; CHECK: pabsd
1129entry:
1130  %0 = bitcast <1 x i64> %a to <2 x i32>
1131  %1 = bitcast <2 x i32> %0 to x86_mmx
1132  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
1133  %3 = bitcast x86_mmx %2 to <2 x i32>
1134  %4 = bitcast <2 x i32> %3 to <1 x i64>
1135  %5 = extractelement <1 x i64> %4, i32 0
1136  ret i64 %5
1137}
1138
1139declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
1140
1141define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
1142; CHECK: pabsw
1143entry:
1144  %0 = bitcast <1 x i64> %a to <4 x i16>
1145  %1 = bitcast <4 x i16> %0 to x86_mmx
1146  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
1147  %3 = bitcast x86_mmx %2 to <4 x i16>
1148  %4 = bitcast <4 x i16> %3 to <1 x i64>
1149  %5 = extractelement <1 x i64> %4, i32 0
1150  ret i64 %5
1151}
1152
1153declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
1154
1155define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
1156; CHECK: pabsb
1157entry:
1158  %0 = bitcast <1 x i64> %a to <8 x i8>
1159  %1 = bitcast <8 x i8> %0 to x86_mmx
1160  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
1161  %3 = bitcast x86_mmx %2 to <8 x i8>
1162  %4 = bitcast <8 x i8> %3 to <1 x i64>
1163  %5 = extractelement <1 x i64> %4, i32 0
1164  ret i64 %5
1165}
1166
1167declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
1168
1169define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1170; CHECK: psignd
1171entry:
1172  %0 = bitcast <1 x i64> %b to <2 x i32>
1173  %1 = bitcast <1 x i64> %a to <2 x i32>
1174  %2 = bitcast <2 x i32> %1 to x86_mmx
1175  %3 = bitcast <2 x i32> %0 to x86_mmx
1176  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1177  %5 = bitcast x86_mmx %4 to <2 x i32>
1178  %6 = bitcast <2 x i32> %5 to <1 x i64>
1179  %7 = extractelement <1 x i64> %6, i32 0
1180  ret i64 %7
1181}
1182
1183declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
1184
1185define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1186; CHECK: psignw
1187entry:
1188  %0 = bitcast <1 x i64> %b to <4 x i16>
1189  %1 = bitcast <1 x i64> %a to <4 x i16>
1190  %2 = bitcast <4 x i16> %1 to x86_mmx
1191  %3 = bitcast <4 x i16> %0 to x86_mmx
1192  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1193  %5 = bitcast x86_mmx %4 to <4 x i16>
1194  %6 = bitcast <4 x i16> %5 to <1 x i64>
1195  %7 = extractelement <1 x i64> %6, i32 0
1196  ret i64 %7
1197}
1198
1199declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
1200
1201define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1202; CHECK: psignb
1203entry:
1204  %0 = bitcast <1 x i64> %b to <8 x i8>
1205  %1 = bitcast <1 x i64> %a to <8 x i8>
1206  %2 = bitcast <8 x i8> %1 to x86_mmx
1207  %3 = bitcast <8 x i8> %0 to x86_mmx
1208  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1209  %5 = bitcast x86_mmx %4 to <8 x i8>
1210  %6 = bitcast <8 x i8> %5 to <1 x i64>
1211  %7 = extractelement <1 x i64> %6, i32 0
1212  ret i64 %7
1213}
1214
1215declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
1216
1217define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1218; CHECK: pshufb
1219entry:
1220  %0 = bitcast <1 x i64> %b to <8 x i8>
1221  %1 = bitcast <1 x i64> %a to <8 x i8>
1222  %2 = bitcast <8 x i8> %1 to x86_mmx
1223  %3 = bitcast <8 x i8> %0 to x86_mmx
1224  %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1225  %5 = bitcast x86_mmx %4 to <8 x i8>
1226  %6 = bitcast <8 x i8> %5 to <1 x i64>
1227  %7 = extractelement <1 x i64> %6, i32 0
1228  ret i64 %7
1229}
1230
1231declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
1232
1233define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1234; CHECK: pmulhrsw
1235entry:
1236  %0 = bitcast <1 x i64> %b to <4 x i16>
1237  %1 = bitcast <1 x i64> %a to <4 x i16>
1238  %2 = bitcast <4 x i16> %1 to x86_mmx
1239  %3 = bitcast <4 x i16> %0 to x86_mmx
1240  %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1241  %5 = bitcast x86_mmx %4 to <4 x i16>
1242  %6 = bitcast <4 x i16> %5 to <1 x i64>
1243  %7 = extractelement <1 x i64> %6, i32 0
1244  ret i64 %7
1245}
1246
1247declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
1248
1249define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1250; CHECK: pmaddubsw
1251entry:
1252  %0 = bitcast <1 x i64> %b to <8 x i8>
1253  %1 = bitcast <1 x i64> %a to <8 x i8>
1254  %2 = bitcast <8 x i8> %1 to x86_mmx
1255  %3 = bitcast <8 x i8> %0 to x86_mmx
1256  %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1257  %5 = bitcast x86_mmx %4 to <8 x i8>
1258  %6 = bitcast <8 x i8> %5 to <1 x i64>
1259  %7 = extractelement <1 x i64> %6, i32 0
1260  ret i64 %7
1261}
1262
1263declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
1264
1265define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1266; CHECK: phsubsw
1267entry:
1268  %0 = bitcast <1 x i64> %b to <4 x i16>
1269  %1 = bitcast <1 x i64> %a to <4 x i16>
1270  %2 = bitcast <4 x i16> %1 to x86_mmx
1271  %3 = bitcast <4 x i16> %0 to x86_mmx
1272  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1273  %5 = bitcast x86_mmx %4 to <4 x i16>
1274  %6 = bitcast <4 x i16> %5 to <1 x i64>
1275  %7 = extractelement <1 x i64> %6, i32 0
1276  ret i64 %7
1277}
1278
1279declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
1280
1281define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1282; CHECK: phsubd
1283entry:
1284  %0 = bitcast <1 x i64> %b to <2 x i32>
1285  %1 = bitcast <1 x i64> %a to <2 x i32>
1286  %2 = bitcast <2 x i32> %1 to x86_mmx
1287  %3 = bitcast <2 x i32> %0 to x86_mmx
1288  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1289  %5 = bitcast x86_mmx %4 to <2 x i32>
1290  %6 = bitcast <2 x i32> %5 to <1 x i64>
1291  %7 = extractelement <1 x i64> %6, i32 0
1292  ret i64 %7
1293}
1294
1295declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
1296
1297define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1298; CHECK: phsubw
1299entry:
1300  %0 = bitcast <1 x i64> %b to <4 x i16>
1301  %1 = bitcast <1 x i64> %a to <4 x i16>
1302  %2 = bitcast <4 x i16> %1 to x86_mmx
1303  %3 = bitcast <4 x i16> %0 to x86_mmx
1304  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1305  %5 = bitcast x86_mmx %4 to <4 x i16>
1306  %6 = bitcast <4 x i16> %5 to <1 x i64>
1307  %7 = extractelement <1 x i64> %6, i32 0
1308  ret i64 %7
1309}
1310
1311declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
1312
1313define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1314; CHECK: phaddsw
1315entry:
1316  %0 = bitcast <1 x i64> %b to <4 x i16>
1317  %1 = bitcast <1 x i64> %a to <4 x i16>
1318  %2 = bitcast <4 x i16> %1 to x86_mmx
1319  %3 = bitcast <4 x i16> %0 to x86_mmx
1320  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1321  %5 = bitcast x86_mmx %4 to <4 x i16>
1322  %6 = bitcast <4 x i16> %5 to <1 x i64>
1323  %7 = extractelement <1 x i64> %6, i32 0
1324  ret i64 %7
1325}
1326
1327declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
1328
1329define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1330; CHECK: phaddd
1331entry:
1332  %0 = bitcast <1 x i64> %b to <2 x i32>
1333  %1 = bitcast <1 x i64> %a to <2 x i32>
1334  %2 = bitcast <2 x i32> %1 to x86_mmx
1335  %3 = bitcast <2 x i32> %0 to x86_mmx
1336  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1337  %5 = bitcast x86_mmx %4 to <2 x i32>
1338  %6 = bitcast <2 x i32> %5 to <1 x i64>
1339  %7 = extractelement <1 x i64> %6, i32 0
1340  ret i64 %7
1341}
1342
1343define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind {
1344; CHECK: cvtpi2ps
1345  %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b)
1346  ret <4 x float> %c
1347}
1348
1349declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
1350