• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
2; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
3
4declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
5
6define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
7; CHECK: phaddw
8entry:
9  %0 = bitcast <1 x i64> %b to <4 x i16>
10  %1 = bitcast <1 x i64> %a to <4 x i16>
11  %2 = bitcast <4 x i16> %1 to x86_mmx
12  %3 = bitcast <4 x i16> %0 to x86_mmx
13  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
14  %5 = bitcast x86_mmx %4 to <4 x i16>
15  %6 = bitcast <4 x i16> %5 to <1 x i64>
16  %7 = extractelement <1 x i64> %6, i32 0
17  ret i64 %7
18}
19
20declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
21
22define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
23; CHECK: pcmpgtd
24entry:
25  %0 = bitcast <1 x i64> %b to <2 x i32>
26  %1 = bitcast <1 x i64> %a to <2 x i32>
27  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
28  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
29  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
30  %3 = bitcast x86_mmx %2 to <2 x i32>
31  %4 = bitcast <2 x i32> %3 to <1 x i64>
32  %5 = extractelement <1 x i64> %4, i32 0
33  ret i64 %5
34}
35
36declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
37
38define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
39; CHECK: pcmpgtw
40entry:
41  %0 = bitcast <1 x i64> %b to <4 x i16>
42  %1 = bitcast <1 x i64> %a to <4 x i16>
43  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
44  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
45  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
46  %3 = bitcast x86_mmx %2 to <4 x i16>
47  %4 = bitcast <4 x i16> %3 to <1 x i64>
48  %5 = extractelement <1 x i64> %4, i32 0
49  ret i64 %5
50}
51
52declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
53
54define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
55; CHECK: pcmpgtb
56entry:
57  %0 = bitcast <1 x i64> %b to <8 x i8>
58  %1 = bitcast <1 x i64> %a to <8 x i8>
59  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
60  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
61  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
62  %3 = bitcast x86_mmx %2 to <8 x i8>
63  %4 = bitcast <8 x i8> %3 to <1 x i64>
64  %5 = extractelement <1 x i64> %4, i32 0
65  ret i64 %5
66}
67
68declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
69
70define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
71; CHECK: pcmpeqd
72entry:
73  %0 = bitcast <1 x i64> %b to <2 x i32>
74  %1 = bitcast <1 x i64> %a to <2 x i32>
75  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
76  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
77  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
78  %3 = bitcast x86_mmx %2 to <2 x i32>
79  %4 = bitcast <2 x i32> %3 to <1 x i64>
80  %5 = extractelement <1 x i64> %4, i32 0
81  ret i64 %5
82}
83
84declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
85
86define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
87; CHECK: pcmpeqw
88entry:
89  %0 = bitcast <1 x i64> %b to <4 x i16>
90  %1 = bitcast <1 x i64> %a to <4 x i16>
91  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
92  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
93  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
94  %3 = bitcast x86_mmx %2 to <4 x i16>
95  %4 = bitcast <4 x i16> %3 to <1 x i64>
96  %5 = extractelement <1 x i64> %4, i32 0
97  ret i64 %5
98}
99
100declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
101
102define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
103; CHECK: pcmpeqb
104entry:
105  %0 = bitcast <1 x i64> %b to <8 x i8>
106  %1 = bitcast <1 x i64> %a to <8 x i8>
107  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
108  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
109  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
110  %3 = bitcast x86_mmx %2 to <8 x i8>
111  %4 = bitcast <8 x i8> %3 to <1 x i64>
112  %5 = extractelement <1 x i64> %4, i32 0
113  ret i64 %5
114}
115
116declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
117
118define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
119; CHECK: punpckldq
120entry:
121  %0 = bitcast <1 x i64> %b to <2 x i32>
122  %1 = bitcast <1 x i64> %a to <2 x i32>
123  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
124  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
125  %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
126  %3 = bitcast x86_mmx %2 to <2 x i32>
127  %4 = bitcast <2 x i32> %3 to <1 x i64>
128  %5 = extractelement <1 x i64> %4, i32 0
129  ret i64 %5
130}
131
132declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
133
134define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
135; CHECK: punpcklwd
136entry:
137  %0 = bitcast <1 x i64> %b to <4 x i16>
138  %1 = bitcast <1 x i64> %a to <4 x i16>
139  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
140  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
141  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
142  %3 = bitcast x86_mmx %2 to <4 x i16>
143  %4 = bitcast <4 x i16> %3 to <1 x i64>
144  %5 = extractelement <1 x i64> %4, i32 0
145  ret i64 %5
146}
147
148declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
149
150define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
151; CHECK: punpcklbw
152entry:
153  %0 = bitcast <1 x i64> %b to <8 x i8>
154  %1 = bitcast <1 x i64> %a to <8 x i8>
155  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
156  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
157  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
158  %3 = bitcast x86_mmx %2 to <8 x i8>
159  %4 = bitcast <8 x i8> %3 to <1 x i64>
160  %5 = extractelement <1 x i64> %4, i32 0
161  ret i64 %5
162}
163
164declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
165
166define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
167; CHECK: punpckhdq
168entry:
169  %0 = bitcast <1 x i64> %b to <2 x i32>
170  %1 = bitcast <1 x i64> %a to <2 x i32>
171  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
172  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
173  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
174  %3 = bitcast x86_mmx %2 to <2 x i32>
175  %4 = bitcast <2 x i32> %3 to <1 x i64>
176  %5 = extractelement <1 x i64> %4, i32 0
177  ret i64 %5
178}
179
180declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
181
182define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
183; CHECK: punpckhwd
184entry:
185  %0 = bitcast <1 x i64> %b to <4 x i16>
186  %1 = bitcast <1 x i64> %a to <4 x i16>
187  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
188  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
189  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
190  %3 = bitcast x86_mmx %2 to <4 x i16>
191  %4 = bitcast <4 x i16> %3 to <1 x i64>
192  %5 = extractelement <1 x i64> %4, i32 0
193  ret i64 %5
194}
195
196declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
197
198define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
199; CHECK: punpckhbw
200entry:
201  %0 = bitcast <1 x i64> %b to <8 x i8>
202  %1 = bitcast <1 x i64> %a to <8 x i8>
203  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
204  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
205  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
206  %3 = bitcast x86_mmx %2 to <8 x i8>
207  %4 = bitcast <8 x i8> %3 to <1 x i64>
208  %5 = extractelement <1 x i64> %4, i32 0
209  ret i64 %5
210}
211
212declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
213
214define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
215; CHECK: packuswb
216entry:
217  %0 = bitcast <1 x i64> %b to <4 x i16>
218  %1 = bitcast <1 x i64> %a to <4 x i16>
219  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
220  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
221  %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
222  %3 = bitcast x86_mmx %2 to <8 x i8>
223  %4 = bitcast <8 x i8> %3 to <1 x i64>
224  %5 = extractelement <1 x i64> %4, i32 0
225  ret i64 %5
226}
227
228declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
229
230define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
231; CHECK: packssdw
232entry:
233  %0 = bitcast <1 x i64> %b to <2 x i32>
234  %1 = bitcast <1 x i64> %a to <2 x i32>
235  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
236  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
237  %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
238  %3 = bitcast x86_mmx %2 to <4 x i16>
239  %4 = bitcast <4 x i16> %3 to <1 x i64>
240  %5 = extractelement <1 x i64> %4, i32 0
241  ret i64 %5
242}
243
244declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
245
246define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
247; CHECK: packsswb
248entry:
249  %0 = bitcast <1 x i64> %b to <4 x i16>
250  %1 = bitcast <1 x i64> %a to <4 x i16>
251  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
252  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
253  %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
254  %3 = bitcast x86_mmx %2 to <8 x i8>
255  %4 = bitcast <8 x i8> %3 to <1 x i64>
256  %5 = extractelement <1 x i64> %4, i32 0
257  ret i64 %5
258}
259
260declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
261
262define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
263; CHECK: psrad
264entry:
265  %0 = bitcast <1 x i64> %a to <2 x i32>
266  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
267  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
268  %2 = bitcast x86_mmx %1 to <2 x i32>
269  %3 = bitcast <2 x i32> %2 to <1 x i64>
270  %4 = extractelement <1 x i64> %3, i32 0
271  ret i64 %4
272}
273
274declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
275
276define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
277; CHECK: psraw
278entry:
279  %0 = bitcast <1 x i64> %a to <4 x i16>
280  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
281  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
282  %2 = bitcast x86_mmx %1 to <4 x i16>
283  %3 = bitcast <4 x i16> %2 to <1 x i64>
284  %4 = extractelement <1 x i64> %3, i32 0
285  ret i64 %4
286}
287
288declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
289
290define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
291; CHECK: psrlq
292entry:
293  %0 = extractelement <1 x i64> %a, i32 0
294  %mmx_var.i = bitcast i64 %0 to x86_mmx
295  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
296  %2 = bitcast x86_mmx %1 to i64
297  ret i64 %2
298}
299
300declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
301
302define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
303; CHECK: psrld
304entry:
305  %0 = bitcast <1 x i64> %a to <2 x i32>
306  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
307  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
308  %2 = bitcast x86_mmx %1 to <2 x i32>
309  %3 = bitcast <2 x i32> %2 to <1 x i64>
310  %4 = extractelement <1 x i64> %3, i32 0
311  ret i64 %4
312}
313
314declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
315
316define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
317; CHECK: psrlw
318entry:
319  %0 = bitcast <1 x i64> %a to <4 x i16>
320  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
321  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
322  %2 = bitcast x86_mmx %1 to <4 x i16>
323  %3 = bitcast <4 x i16> %2 to <1 x i64>
324  %4 = extractelement <1 x i64> %3, i32 0
325  ret i64 %4
326}
327
328declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
329
330define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
331; CHECK: psllq
332entry:
333  %0 = extractelement <1 x i64> %a, i32 0
334  %mmx_var.i = bitcast i64 %0 to x86_mmx
335  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
336  %2 = bitcast x86_mmx %1 to i64
337  ret i64 %2
338}
339
340declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
341
342define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
343; CHECK: pslld
344entry:
345  %0 = bitcast <1 x i64> %a to <2 x i32>
346  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
347  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
348  %2 = bitcast x86_mmx %1 to <2 x i32>
349  %3 = bitcast <2 x i32> %2 to <1 x i64>
350  %4 = extractelement <1 x i64> %3, i32 0
351  ret i64 %4
352}
353
354declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
355
356define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
357; CHECK: psllw
358entry:
359  %0 = bitcast <1 x i64> %a to <4 x i16>
360  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
361  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
362  %2 = bitcast x86_mmx %1 to <4 x i16>
363  %3 = bitcast <4 x i16> %2 to <1 x i64>
364  %4 = extractelement <1 x i64> %3, i32 0
365  ret i64 %4
366}
367
368declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
369
370define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
371; CHECK: psrad
372entry:
373  %0 = bitcast <1 x i64> %a to <2 x i32>
374  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
375  %1 = extractelement <1 x i64> %b, i32 0
376  %mmx_var1.i = bitcast i64 %1 to x86_mmx
377  %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
378  %3 = bitcast x86_mmx %2 to <2 x i32>
379  %4 = bitcast <2 x i32> %3 to <1 x i64>
380  %5 = extractelement <1 x i64> %4, i32 0
381  ret i64 %5
382}
383
384declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
385
386define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
387; CHECK: psraw
388entry:
389  %0 = bitcast <1 x i64> %a to <4 x i16>
390  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
391  %1 = extractelement <1 x i64> %b, i32 0
392  %mmx_var1.i = bitcast i64 %1 to x86_mmx
393  %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
394  %3 = bitcast x86_mmx %2 to <4 x i16>
395  %4 = bitcast <4 x i16> %3 to <1 x i64>
396  %5 = extractelement <1 x i64> %4, i32 0
397  ret i64 %5
398}
399
400declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
401
402define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
403; CHECK: psrlq
404entry:
405  %0 = extractelement <1 x i64> %a, i32 0
406  %mmx_var.i = bitcast i64 %0 to x86_mmx
407  %1 = extractelement <1 x i64> %b, i32 0
408  %mmx_var1.i = bitcast i64 %1 to x86_mmx
409  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
410  %3 = bitcast x86_mmx %2 to i64
411  ret i64 %3
412}
413
414declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
415
416define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
417; CHECK: psrld
418entry:
419  %0 = bitcast <1 x i64> %a to <2 x i32>
420  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
421  %1 = extractelement <1 x i64> %b, i32 0
422  %mmx_var1.i = bitcast i64 %1 to x86_mmx
423  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
424  %3 = bitcast x86_mmx %2 to <2 x i32>
425  %4 = bitcast <2 x i32> %3 to <1 x i64>
426  %5 = extractelement <1 x i64> %4, i32 0
427  ret i64 %5
428}
429
430declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
431
432define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
433; CHECK: psrlw
434entry:
435  %0 = bitcast <1 x i64> %a to <4 x i16>
436  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
437  %1 = extractelement <1 x i64> %b, i32 0
438  %mmx_var1.i = bitcast i64 %1 to x86_mmx
439  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
440  %3 = bitcast x86_mmx %2 to <4 x i16>
441  %4 = bitcast <4 x i16> %3 to <1 x i64>
442  %5 = extractelement <1 x i64> %4, i32 0
443  ret i64 %5
444}
445
446declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
447
448define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
449; CHECK: psllq
450entry:
451  %0 = extractelement <1 x i64> %a, i32 0
452  %mmx_var.i = bitcast i64 %0 to x86_mmx
453  %1 = extractelement <1 x i64> %b, i32 0
454  %mmx_var1.i = bitcast i64 %1 to x86_mmx
455  %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
456  %3 = bitcast x86_mmx %2 to i64
457  ret i64 %3
458}
459
460declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
461
462define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
463; CHECK: pslld
464entry:
465  %0 = bitcast <1 x i64> %a to <2 x i32>
466  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
467  %1 = extractelement <1 x i64> %b, i32 0
468  %mmx_var1.i = bitcast i64 %1 to x86_mmx
469  %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
470  %3 = bitcast x86_mmx %2 to <2 x i32>
471  %4 = bitcast <2 x i32> %3 to <1 x i64>
472  %5 = extractelement <1 x i64> %4, i32 0
473  ret i64 %5
474}
475
476declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
477
478define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
479; CHECK: psllw
480entry:
481  %0 = bitcast <1 x i64> %a to <4 x i16>
482  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
483  %1 = extractelement <1 x i64> %b, i32 0
484  %mmx_var1.i = bitcast i64 %1 to x86_mmx
485  %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
486  %3 = bitcast x86_mmx %2 to <4 x i16>
487  %4 = bitcast <4 x i16> %3 to <1 x i64>
488  %5 = extractelement <1 x i64> %4, i32 0
489  ret i64 %5
490}
491
492declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
493
494define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
495; CHECK: pxor
496entry:
497  %0 = bitcast <1 x i64> %b to <2 x i32>
498  %1 = bitcast <1 x i64> %a to <2 x i32>
499  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
500  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
501  %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
502  %3 = bitcast x86_mmx %2 to <2 x i32>
503  %4 = bitcast <2 x i32> %3 to <1 x i64>
504  %5 = extractelement <1 x i64> %4, i32 0
505  ret i64 %5
506}
507
508declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
509
510define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
511; CHECK: por
512entry:
513  %0 = bitcast <1 x i64> %b to <2 x i32>
514  %1 = bitcast <1 x i64> %a to <2 x i32>
515  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
516  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
517  %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
518  %3 = bitcast x86_mmx %2 to <2 x i32>
519  %4 = bitcast <2 x i32> %3 to <1 x i64>
520  %5 = extractelement <1 x i64> %4, i32 0
521  ret i64 %5
522}
523
524declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
525
526define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
527; CHECK: pandn
528entry:
529  %0 = bitcast <1 x i64> %b to <2 x i32>
530  %1 = bitcast <1 x i64> %a to <2 x i32>
531  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
532  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
533  %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
534  %3 = bitcast x86_mmx %2 to <2 x i32>
535  %4 = bitcast <2 x i32> %3 to <1 x i64>
536  %5 = extractelement <1 x i64> %4, i32 0
537  ret i64 %5
538}
539
540declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
541
542define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
543; CHECK: pand
544entry:
545  %0 = bitcast <1 x i64> %b to <2 x i32>
546  %1 = bitcast <1 x i64> %a to <2 x i32>
547  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
548  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
549  %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
550  %3 = bitcast x86_mmx %2 to <2 x i32>
551  %4 = bitcast <2 x i32> %3 to <1 x i64>
552  %5 = extractelement <1 x i64> %4, i32 0
553  ret i64 %5
554}
555
556declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
557
558define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
559; CHECK: pmullw
560entry:
561  %0 = bitcast <1 x i64> %b to <4 x i16>
562  %1 = bitcast <1 x i64> %a to <4 x i16>
563  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
564  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
565  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
566  %3 = bitcast x86_mmx %2 to <4 x i16>
567  %4 = bitcast <4 x i16> %3 to <1 x i64>
568  %5 = extractelement <1 x i64> %4, i32 0
569  ret i64 %5
570}
571
572define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
573; CHECK: pmullw
574entry:
575  %0 = bitcast <1 x i64> %b to <4 x i16>
576  %1 = bitcast <1 x i64> %a to <4 x i16>
577  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
578  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
579  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
580  %3 = bitcast x86_mmx %2 to <4 x i16>
581  %4 = bitcast <4 x i16> %3 to <1 x i64>
582  %5 = extractelement <1 x i64> %4, i32 0
583  ret i64 %5
584}
585
586declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
587
588define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
589; CHECK: pmulhw
590entry:
591  %0 = bitcast <1 x i64> %b to <4 x i16>
592  %1 = bitcast <1 x i64> %a to <4 x i16>
593  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
594  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
595  %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
596  %3 = bitcast x86_mmx %2 to <4 x i16>
597  %4 = bitcast <4 x i16> %3 to <1 x i64>
598  %5 = extractelement <1 x i64> %4, i32 0
599  ret i64 %5
600}
601
602declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
603
604define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
605; CHECK: pmaddwd
606entry:
607  %0 = bitcast <1 x i64> %b to <4 x i16>
608  %1 = bitcast <1 x i64> %a to <4 x i16>
609  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
610  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
611  %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
612  %3 = bitcast x86_mmx %2 to <2 x i32>
613  %4 = bitcast <2 x i32> %3 to <1 x i64>
614  %5 = extractelement <1 x i64> %4, i32 0
615  ret i64 %5
616}
617
618declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
619
620define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
621; CHECK: psubusw
622entry:
623  %0 = bitcast <1 x i64> %b to <4 x i16>
624  %1 = bitcast <1 x i64> %a to <4 x i16>
625  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
626  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
627  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
628  %3 = bitcast x86_mmx %2 to <4 x i16>
629  %4 = bitcast <4 x i16> %3 to <1 x i64>
630  %5 = extractelement <1 x i64> %4, i32 0
631  ret i64 %5
632}
633
634declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
635
636define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
637; CHECK: psubusb
638entry:
639  %0 = bitcast <1 x i64> %b to <8 x i8>
640  %1 = bitcast <1 x i64> %a to <8 x i8>
641  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
642  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
643  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
644  %3 = bitcast x86_mmx %2 to <8 x i8>
645  %4 = bitcast <8 x i8> %3 to <1 x i64>
646  %5 = extractelement <1 x i64> %4, i32 0
647  ret i64 %5
648}
649
650declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
651
652define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
653; CHECK: psubsw
654entry:
655  %0 = bitcast <1 x i64> %b to <4 x i16>
656  %1 = bitcast <1 x i64> %a to <4 x i16>
657  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
658  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
659  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
660  %3 = bitcast x86_mmx %2 to <4 x i16>
661  %4 = bitcast <4 x i16> %3 to <1 x i64>
662  %5 = extractelement <1 x i64> %4, i32 0
663  ret i64 %5
664}
665
666declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
667
668define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
669; CHECK: psubsb
670entry:
671  %0 = bitcast <1 x i64> %b to <8 x i8>
672  %1 = bitcast <1 x i64> %a to <8 x i8>
673  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
674  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
675  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
676  %3 = bitcast x86_mmx %2 to <8 x i8>
677  %4 = bitcast <8 x i8> %3 to <1 x i64>
678  %5 = extractelement <1 x i64> %4, i32 0
679  ret i64 %5
680}
681
682define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
683; CHECK: psubq
684entry:
685  %0 = extractelement <1 x i64> %a, i32 0
686  %mmx_var = bitcast i64 %0 to x86_mmx
687  %1 = extractelement <1 x i64> %b, i32 0
688  %mmx_var1 = bitcast i64 %1 to x86_mmx
689  %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
690  %3 = bitcast x86_mmx %2 to i64
691  ret i64 %3
692}
693
694declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
695
696declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
697
698define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
699; CHECK: psubd
700entry:
701  %0 = bitcast <1 x i64> %b to <2 x i32>
702  %1 = bitcast <1 x i64> %a to <2 x i32>
703  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
704  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
705  %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
706  %3 = bitcast x86_mmx %2 to <2 x i32>
707  %4 = bitcast <2 x i32> %3 to <1 x i64>
708  %5 = extractelement <1 x i64> %4, i32 0
709  ret i64 %5
710}
711
712declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
713
714define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
715; CHECK: psubw
716entry:
717  %0 = bitcast <1 x i64> %b to <4 x i16>
718  %1 = bitcast <1 x i64> %a to <4 x i16>
719  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
720  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
721  %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
722  %3 = bitcast x86_mmx %2 to <4 x i16>
723  %4 = bitcast <4 x i16> %3 to <1 x i64>
724  %5 = extractelement <1 x i64> %4, i32 0
725  ret i64 %5
726}
727
728declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
729
730define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
731; CHECK: psubb
732entry:
733  %0 = bitcast <1 x i64> %b to <8 x i8>
734  %1 = bitcast <1 x i64> %a to <8 x i8>
735  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
736  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
737  %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
738  %3 = bitcast x86_mmx %2 to <8 x i8>
739  %4 = bitcast <8 x i8> %3 to <1 x i64>
740  %5 = extractelement <1 x i64> %4, i32 0
741  ret i64 %5
742}
743
744declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
745
746define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
747; CHECK: paddusw
748entry:
749  %0 = bitcast <1 x i64> %b to <4 x i16>
750  %1 = bitcast <1 x i64> %a to <4 x i16>
751  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
752  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
753  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
754  %3 = bitcast x86_mmx %2 to <4 x i16>
755  %4 = bitcast <4 x i16> %3 to <1 x i64>
756  %5 = extractelement <1 x i64> %4, i32 0
757  ret i64 %5
758}
759
760declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
761
762define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
763; CHECK: paddusb
764entry:
765  %0 = bitcast <1 x i64> %b to <8 x i8>
766  %1 = bitcast <1 x i64> %a to <8 x i8>
767  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
768  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
769  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
770  %3 = bitcast x86_mmx %2 to <8 x i8>
771  %4 = bitcast <8 x i8> %3 to <1 x i64>
772  %5 = extractelement <1 x i64> %4, i32 0
773  ret i64 %5
774}
775
776declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
777
778define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
779; CHECK: paddsw
780entry:
781  %0 = bitcast <1 x i64> %b to <4 x i16>
782  %1 = bitcast <1 x i64> %a to <4 x i16>
783  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
784  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
785  %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
786  %3 = bitcast x86_mmx %2 to <4 x i16>
787  %4 = bitcast <4 x i16> %3 to <1 x i64>
788  %5 = extractelement <1 x i64> %4, i32 0
789  ret i64 %5
790}
791
792declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
793
794define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
795; CHECK: paddsb
796entry:
797  %0 = bitcast <1 x i64> %b to <8 x i8>
798  %1 = bitcast <1 x i64> %a to <8 x i8>
799  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
800  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
801  %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
802  %3 = bitcast x86_mmx %2 to <8 x i8>
803  %4 = bitcast <8 x i8> %3 to <1 x i64>
804  %5 = extractelement <1 x i64> %4, i32 0
805  ret i64 %5
806}
807
808declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
809
810define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
811; CHECK: paddq
812entry:
813  %0 = extractelement <1 x i64> %a, i32 0
814  %mmx_var = bitcast i64 %0 to x86_mmx
815  %1 = extractelement <1 x i64> %b, i32 0
816  %mmx_var1 = bitcast i64 %1 to x86_mmx
817  %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
818  %3 = bitcast x86_mmx %2 to i64
819  ret i64 %3
820}
821
822declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
823
824define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
825; CHECK: paddd
826entry:
827  %0 = bitcast <1 x i64> %b to <2 x i32>
828  %1 = bitcast <1 x i64> %a to <2 x i32>
829  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
830  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
831  %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
832  %3 = bitcast x86_mmx %2 to <2 x i32>
833  %4 = bitcast <2 x i32> %3 to <1 x i64>
834  %5 = extractelement <1 x i64> %4, i32 0
835  ret i64 %5
836}
837
838declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
839
840define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
841; CHECK: paddw
842entry:
843  %0 = bitcast <1 x i64> %b to <4 x i16>
844  %1 = bitcast <1 x i64> %a to <4 x i16>
845  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
846  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
847  %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
848  %3 = bitcast x86_mmx %2 to <4 x i16>
849  %4 = bitcast <4 x i16> %3 to <1 x i64>
850  %5 = extractelement <1 x i64> %4, i32 0
851  ret i64 %5
852}
853
854declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
855
856define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
857; CHECK: paddb
858entry:
859  %0 = bitcast <1 x i64> %b to <8 x i8>
860  %1 = bitcast <1 x i64> %a to <8 x i8>
861  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
862  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
863  %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
864  %3 = bitcast x86_mmx %2 to <8 x i8>
865  %4 = bitcast <8 x i8> %3 to <1 x i64>
866  %5 = extractelement <1 x i64> %4, i32 0
867  ret i64 %5
868}
869
870declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
871
872define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
873; CHECK: psadbw
874entry:
875  %0 = bitcast <1 x i64> %b to <8 x i8>
876  %1 = bitcast <1 x i64> %a to <8 x i8>
877  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
878  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
879  %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
880  %3 = bitcast x86_mmx %2 to i64
881  ret i64 %3
882}
883
884declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
885
886define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
887; CHECK: pminsw
888entry:
889  %0 = bitcast <1 x i64> %b to <4 x i16>
890  %1 = bitcast <1 x i64> %a to <4 x i16>
891  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
892  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
893  %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
894  %3 = bitcast x86_mmx %2 to <4 x i16>
895  %4 = bitcast <4 x i16> %3 to <1 x i64>
896  %5 = extractelement <1 x i64> %4, i32 0
897  ret i64 %5
898}
899
900declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
901
902define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
903; CHECK: pminub
904entry:
905  %0 = bitcast <1 x i64> %b to <8 x i8>
906  %1 = bitcast <1 x i64> %a to <8 x i8>
907  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
908  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
909  %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
910  %3 = bitcast x86_mmx %2 to <8 x i8>
911  %4 = bitcast <8 x i8> %3 to <1 x i64>
912  %5 = extractelement <1 x i64> %4, i32 0
913  ret i64 %5
914}
915
916declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
917
918define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
919; CHECK: pmaxsw
920entry:
921  %0 = bitcast <1 x i64> %b to <4 x i16>
922  %1 = bitcast <1 x i64> %a to <4 x i16>
923  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
924  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
925  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
926  %3 = bitcast x86_mmx %2 to <4 x i16>
927  %4 = bitcast <4 x i16> %3 to <1 x i64>
928  %5 = extractelement <1 x i64> %4, i32 0
929  ret i64 %5
930}
931
932declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
933
934define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
935; CHECK: pmaxub
936entry:
937  %0 = bitcast <1 x i64> %b to <8 x i8>
938  %1 = bitcast <1 x i64> %a to <8 x i8>
939  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
940  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
941  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
942  %3 = bitcast x86_mmx %2 to <8 x i8>
943  %4 = bitcast <8 x i8> %3 to <1 x i64>
944  %5 = extractelement <1 x i64> %4, i32 0
945  ret i64 %5
946}
947
948declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
949
950define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
951; CHECK: pavgw
952entry:
953  %0 = bitcast <1 x i64> %b to <4 x i16>
954  %1 = bitcast <1 x i64> %a to <4 x i16>
955  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
956  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
957  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
958  %3 = bitcast x86_mmx %2 to <4 x i16>
959  %4 = bitcast <4 x i16> %3 to <1 x i64>
960  %5 = extractelement <1 x i64> %4, i32 0
961  ret i64 %5
962}
963
964declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
965
966define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
967; CHECK: pavgb
968entry:
969  %0 = bitcast <1 x i64> %b to <8 x i8>
970  %1 = bitcast <1 x i64> %a to <8 x i8>
971  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
972  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
973  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
974  %3 = bitcast x86_mmx %2 to <8 x i8>
975  %4 = bitcast <8 x i8> %3 to <1 x i64>
976  %5 = extractelement <1 x i64> %4, i32 0
977  ret i64 %5
978}
979
980declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
981
982define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
983; CHECK: movntq
984entry:
985  %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
986  %0 = extractelement <1 x i64> %a, i32 0
987  %mmx_var.i = bitcast i64 %0 to x86_mmx
988  tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
989  ret void
990}
991
992declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
993
994define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
995; CHECK: pmovmskb
996entry:
997  %0 = bitcast <1 x i64> %a to <8 x i8>
998  %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
999  %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
1000  ret i32 %1
1001}
1002
1003declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
1004
1005define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
1006; CHECK: maskmovq
1007entry:
1008  %0 = bitcast <1 x i64> %n to <8 x i8>
1009  %1 = bitcast <1 x i64> %d to <8 x i8>
1010  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1011  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1012  tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
1013  ret void
1014}
1015
1016declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
1017
1018define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1019; CHECK: pmulhuw
1020entry:
1021  %0 = bitcast <1 x i64> %b to <4 x i16>
1022  %1 = bitcast <1 x i64> %a to <4 x i16>
1023  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1024  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1025  %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1026  %3 = bitcast x86_mmx %2 to <4 x i16>
1027  %4 = bitcast <4 x i16> %3 to <1 x i64>
1028  %5 = extractelement <1 x i64> %4, i32 0
1029  ret i64 %5
1030}
1031
1032declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
1033
1034define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
1035; CHECK: pshufw
1036entry:
1037  %0 = bitcast <1 x i64> %a to <4 x i16>
1038  %1 = bitcast <4 x i16> %0 to x86_mmx
1039  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
1040  %3 = bitcast x86_mmx %2 to <4 x i16>
1041  %4 = bitcast <4 x i16> %3 to <1 x i64>
1042  %5 = extractelement <1 x i64> %4, i32 0
1043  ret i64 %5
1044}
1045
1046define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp {
1047; CHECK: test21_2
1048; CHECK: pshufw
1049; CHECK: movd
1050entry:
1051  %0 = bitcast <1 x i64> %a to <4 x i16>
1052  %1 = bitcast <4 x i16> %0 to x86_mmx
1053  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
1054  %3 = bitcast x86_mmx %2 to <4 x i16>
1055  %4 = bitcast <4 x i16> %3 to <2 x i32>
1056  %5 = extractelement <2 x i32> %4, i32 0
1057  ret i32 %5
1058}
1059
1060declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
1061
1062define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1063; CHECK: pmuludq
1064entry:
1065  %0 = bitcast <1 x i64> %b to <2 x i32>
1066  %1 = bitcast <1 x i64> %a to <2 x i32>
1067  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
1068  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
1069  %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1070  %3 = bitcast x86_mmx %2 to i64
1071  ret i64 %3
1072}
1073
1074declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
1075
1076define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
1077; CHECK: cvtpi2pd
1078entry:
1079  %0 = bitcast <1 x i64> %a to <2 x i32>
1080  %1 = bitcast <2 x i32> %0 to x86_mmx
1081  %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
1082  ret <2 x double> %2
1083}
1084
1085declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
1086
1087define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
1088; CHECK: cvttpd2pi
1089entry:
1090  %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
1091  %1 = bitcast x86_mmx %0 to <2 x i32>
1092  %2 = bitcast <2 x i32> %1 to <1 x i64>
1093  %3 = extractelement <1 x i64> %2, i32 0
1094  ret i64 %3
1095}
1096
1097declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
1098
1099define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
1100; CHECK: cvtpd2pi
1101entry:
1102  %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
1103  %1 = bitcast x86_mmx %0 to <2 x i32>
1104  %2 = bitcast <2 x i32> %1 to <1 x i64>
1105  %3 = extractelement <1 x i64> %2, i32 0
1106  ret i64 %3
1107}
1108
1109declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
1110
1111define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1112; CHECK: palignr
1113entry:
1114  %0 = extractelement <1 x i64> %a, i32 0
1115  %mmx_var = bitcast i64 %0 to x86_mmx
1116  %1 = extractelement <1 x i64> %b, i32 0
1117  %mmx_var1 = bitcast i64 %1 to x86_mmx
1118  %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
1119  %3 = bitcast x86_mmx %2 to i64
1120  ret i64 %3
1121}
1122
1123declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
1124
1125define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
1126; CHECK: pabsd
1127entry:
1128  %0 = bitcast <1 x i64> %a to <2 x i32>
1129  %1 = bitcast <2 x i32> %0 to x86_mmx
1130  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
1131  %3 = bitcast x86_mmx %2 to <2 x i32>
1132  %4 = bitcast <2 x i32> %3 to <1 x i64>
1133  %5 = extractelement <1 x i64> %4, i32 0
1134  ret i64 %5
1135}
1136
1137declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
1138
1139define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
1140; CHECK: pabsw
1141entry:
1142  %0 = bitcast <1 x i64> %a to <4 x i16>
1143  %1 = bitcast <4 x i16> %0 to x86_mmx
1144  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
1145  %3 = bitcast x86_mmx %2 to <4 x i16>
1146  %4 = bitcast <4 x i16> %3 to <1 x i64>
1147  %5 = extractelement <1 x i64> %4, i32 0
1148  ret i64 %5
1149}
1150
1151declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
1152
1153define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
1154; CHECK: pabsb
1155entry:
1156  %0 = bitcast <1 x i64> %a to <8 x i8>
1157  %1 = bitcast <8 x i8> %0 to x86_mmx
1158  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
1159  %3 = bitcast x86_mmx %2 to <8 x i8>
1160  %4 = bitcast <8 x i8> %3 to <1 x i64>
1161  %5 = extractelement <1 x i64> %4, i32 0
1162  ret i64 %5
1163}
1164
1165declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
1166
1167define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1168; CHECK: psignd
1169entry:
1170  %0 = bitcast <1 x i64> %b to <2 x i32>
1171  %1 = bitcast <1 x i64> %a to <2 x i32>
1172  %2 = bitcast <2 x i32> %1 to x86_mmx
1173  %3 = bitcast <2 x i32> %0 to x86_mmx
1174  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1175  %5 = bitcast x86_mmx %4 to <2 x i32>
1176  %6 = bitcast <2 x i32> %5 to <1 x i64>
1177  %7 = extractelement <1 x i64> %6, i32 0
1178  ret i64 %7
1179}
1180
1181declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
1182
1183define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1184; CHECK: psignw
1185entry:
1186  %0 = bitcast <1 x i64> %b to <4 x i16>
1187  %1 = bitcast <1 x i64> %a to <4 x i16>
1188  %2 = bitcast <4 x i16> %1 to x86_mmx
1189  %3 = bitcast <4 x i16> %0 to x86_mmx
1190  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1191  %5 = bitcast x86_mmx %4 to <4 x i16>
1192  %6 = bitcast <4 x i16> %5 to <1 x i64>
1193  %7 = extractelement <1 x i64> %6, i32 0
1194  ret i64 %7
1195}
1196
1197declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
1198
1199define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1200; CHECK: psignb
1201entry:
1202  %0 = bitcast <1 x i64> %b to <8 x i8>
1203  %1 = bitcast <1 x i64> %a to <8 x i8>
1204  %2 = bitcast <8 x i8> %1 to x86_mmx
1205  %3 = bitcast <8 x i8> %0 to x86_mmx
1206  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1207  %5 = bitcast x86_mmx %4 to <8 x i8>
1208  %6 = bitcast <8 x i8> %5 to <1 x i64>
1209  %7 = extractelement <1 x i64> %6, i32 0
1210  ret i64 %7
1211}
1212
1213declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
1214
1215define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1216; CHECK: pshufb
1217entry:
1218  %0 = bitcast <1 x i64> %b to <8 x i8>
1219  %1 = bitcast <1 x i64> %a to <8 x i8>
1220  %2 = bitcast <8 x i8> %1 to x86_mmx
1221  %3 = bitcast <8 x i8> %0 to x86_mmx
1222  %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1223  %5 = bitcast x86_mmx %4 to <8 x i8>
1224  %6 = bitcast <8 x i8> %5 to <1 x i64>
1225  %7 = extractelement <1 x i64> %6, i32 0
1226  ret i64 %7
1227}
1228
1229declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
1230
1231define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1232; CHECK: pmulhrsw
1233entry:
1234  %0 = bitcast <1 x i64> %b to <4 x i16>
1235  %1 = bitcast <1 x i64> %a to <4 x i16>
1236  %2 = bitcast <4 x i16> %1 to x86_mmx
1237  %3 = bitcast <4 x i16> %0 to x86_mmx
1238  %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1239  %5 = bitcast x86_mmx %4 to <4 x i16>
1240  %6 = bitcast <4 x i16> %5 to <1 x i64>
1241  %7 = extractelement <1 x i64> %6, i32 0
1242  ret i64 %7
1243}
1244
1245declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
1246
1247define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1248; CHECK: pmaddubsw
1249entry:
1250  %0 = bitcast <1 x i64> %b to <8 x i8>
1251  %1 = bitcast <1 x i64> %a to <8 x i8>
1252  %2 = bitcast <8 x i8> %1 to x86_mmx
1253  %3 = bitcast <8 x i8> %0 to x86_mmx
1254  %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1255  %5 = bitcast x86_mmx %4 to <8 x i8>
1256  %6 = bitcast <8 x i8> %5 to <1 x i64>
1257  %7 = extractelement <1 x i64> %6, i32 0
1258  ret i64 %7
1259}
1260
1261declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
1262
1263define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1264; CHECK: phsubsw
1265entry:
1266  %0 = bitcast <1 x i64> %b to <4 x i16>
1267  %1 = bitcast <1 x i64> %a to <4 x i16>
1268  %2 = bitcast <4 x i16> %1 to x86_mmx
1269  %3 = bitcast <4 x i16> %0 to x86_mmx
1270  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1271  %5 = bitcast x86_mmx %4 to <4 x i16>
1272  %6 = bitcast <4 x i16> %5 to <1 x i64>
1273  %7 = extractelement <1 x i64> %6, i32 0
1274  ret i64 %7
1275}
1276
1277declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
1278
1279define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1280; CHECK: phsubd
1281entry:
1282  %0 = bitcast <1 x i64> %b to <2 x i32>
1283  %1 = bitcast <1 x i64> %a to <2 x i32>
1284  %2 = bitcast <2 x i32> %1 to x86_mmx
1285  %3 = bitcast <2 x i32> %0 to x86_mmx
1286  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1287  %5 = bitcast x86_mmx %4 to <2 x i32>
1288  %6 = bitcast <2 x i32> %5 to <1 x i64>
1289  %7 = extractelement <1 x i64> %6, i32 0
1290  ret i64 %7
1291}
1292
1293declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
1294
1295define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1296; CHECK: phsubw
1297entry:
1298  %0 = bitcast <1 x i64> %b to <4 x i16>
1299  %1 = bitcast <1 x i64> %a to <4 x i16>
1300  %2 = bitcast <4 x i16> %1 to x86_mmx
1301  %3 = bitcast <4 x i16> %0 to x86_mmx
1302  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1303  %5 = bitcast x86_mmx %4 to <4 x i16>
1304  %6 = bitcast <4 x i16> %5 to <1 x i64>
1305  %7 = extractelement <1 x i64> %6, i32 0
1306  ret i64 %7
1307}
1308
1309declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
1310
1311define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1312; CHECK: phaddsw
1313entry:
1314  %0 = bitcast <1 x i64> %b to <4 x i16>
1315  %1 = bitcast <1 x i64> %a to <4 x i16>
1316  %2 = bitcast <4 x i16> %1 to x86_mmx
1317  %3 = bitcast <4 x i16> %0 to x86_mmx
1318  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1319  %5 = bitcast x86_mmx %4 to <4 x i16>
1320  %6 = bitcast <4 x i16> %5 to <1 x i64>
1321  %7 = extractelement <1 x i64> %6, i32 0
1322  ret i64 %7
1323}
1324
1325declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
1326
1327define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1328; CHECK: phaddd
1329entry:
1330  %0 = bitcast <1 x i64> %b to <2 x i32>
1331  %1 = bitcast <1 x i64> %a to <2 x i32>
1332  %2 = bitcast <2 x i32> %1 to x86_mmx
1333  %3 = bitcast <2 x i32> %0 to x86_mmx
1334  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1335  %5 = bitcast x86_mmx %4 to <2 x i32>
1336  %6 = bitcast <2 x i32> %5 to <1 x i64>
1337  %7 = extractelement <1 x i64> %6, i32 0
1338  ret i64 %7
1339}
1340