• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X64
4
5define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
6; X86-LABEL: test_pavgusb:
7; X86:       # %bb.0: # %entry
8; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
9; X86-NEXT:    pavgusb %mm1, %mm0
10; X86-NEXT:    movq %mm0, (%eax)
11; X86-NEXT:    retl $4
12;
13; X64-LABEL: test_pavgusb:
14; X64:       # %bb.0: # %entry
15; X64-NEXT:    pavgusb %mm1, %mm0
16; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
17; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
18; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
19; X64-NEXT:    retq
20entry:
21  %0 = bitcast x86_mmx %a.coerce to <8 x i8>
22  %1 = bitcast x86_mmx %b.coerce to <8 x i8>
23  %2 = bitcast <8 x i8> %0 to x86_mmx
24  %3 = bitcast <8 x i8> %1 to x86_mmx
25  %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3)
26  %5 = bitcast x86_mmx %4 to <8 x i8>
27  ret <8 x i8> %5
28}
29
30declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
31
32define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone {
33; X86-LABEL: test_pf2id:
34; X86:       # %bb.0: # %entry
35; X86-NEXT:    pushl %ebp
36; X86-NEXT:    movl %esp, %ebp
37; X86-NEXT:    andl $-8, %esp
38; X86-NEXT:    subl $8, %esp
39; X86-NEXT:    movd 12(%ebp), %mm0
40; X86-NEXT:    movd 8(%ebp), %mm1
41; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
42; X86-NEXT:    pf2id %mm1, %mm0
43; X86-NEXT:    movq %mm0, (%esp)
44; X86-NEXT:    movl (%esp), %eax
45; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
46; X86-NEXT:    movl %ebp, %esp
47; X86-NEXT:    popl %ebp
48; X86-NEXT:    retl
49;
50; X64-LABEL: test_pf2id:
51; X64:       # %bb.0: # %entry
52; X64-NEXT:    movdq2q %xmm0, %mm0
53; X64-NEXT:    pf2id %mm0, %mm0
54; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
55; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
56; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
57; X64-NEXT:    retq
58entry:
59  %0 = bitcast <2 x float> %a to x86_mmx
60  %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0)
61  %2 = bitcast x86_mmx %1 to <2 x i32>
62  ret <2 x i32> %2
63}
64
65declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
66
67define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
68; X86-LABEL: test_pfacc:
69; X86:       # %bb.0: # %entry
70; X86-NEXT:    pushl %ebp
71; X86-NEXT:    movl %esp, %ebp
72; X86-NEXT:    andl $-8, %esp
73; X86-NEXT:    subl $8, %esp
74; X86-NEXT:    movd 20(%ebp), %mm0
75; X86-NEXT:    movd 16(%ebp), %mm1
76; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
77; X86-NEXT:    movd 12(%ebp), %mm0
78; X86-NEXT:    movd 8(%ebp), %mm2
79; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
80; X86-NEXT:    pfacc %mm1, %mm2
81; X86-NEXT:    movq %mm2, (%esp)
82; X86-NEXT:    flds {{[0-9]+}}(%esp)
83; X86-NEXT:    flds (%esp)
84; X86-NEXT:    movl %ebp, %esp
85; X86-NEXT:    popl %ebp
86; X86-NEXT:    retl
87;
88; X64-LABEL: test_pfacc:
89; X64:       # %bb.0: # %entry
90; X64-NEXT:    movdq2q %xmm1, %mm0
91; X64-NEXT:    movdq2q %xmm0, %mm1
92; X64-NEXT:    pfacc %mm0, %mm1
93; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
94; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
95; X64-NEXT:    retq
96entry:
97  %0 = bitcast <2 x float> %a to x86_mmx
98  %1 = bitcast <2 x float> %b to x86_mmx
99  %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1)
100  %3 = bitcast x86_mmx %2 to <2 x float>
101  ret <2 x float> %3
102}
103
104declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
105
106define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone {
107; X86-LABEL: test_pfadd:
108; X86:       # %bb.0: # %entry
109; X86-NEXT:    pushl %ebp
110; X86-NEXT:    movl %esp, %ebp
111; X86-NEXT:    andl $-8, %esp
112; X86-NEXT:    subl $8, %esp
113; X86-NEXT:    movd 20(%ebp), %mm0
114; X86-NEXT:    movd 16(%ebp), %mm1
115; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
116; X86-NEXT:    movd 12(%ebp), %mm0
117; X86-NEXT:    movd 8(%ebp), %mm2
118; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
119; X86-NEXT:    pfadd %mm1, %mm2
120; X86-NEXT:    movq %mm2, (%esp)
121; X86-NEXT:    flds {{[0-9]+}}(%esp)
122; X86-NEXT:    flds (%esp)
123; X86-NEXT:    movl %ebp, %esp
124; X86-NEXT:    popl %ebp
125; X86-NEXT:    retl
126;
127; X64-LABEL: test_pfadd:
128; X64:       # %bb.0: # %entry
129; X64-NEXT:    movdq2q %xmm1, %mm0
130; X64-NEXT:    movdq2q %xmm0, %mm1
131; X64-NEXT:    pfadd %mm0, %mm1
132; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
133; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
134; X64-NEXT:    retq
135entry:
136  %0 = bitcast <2 x float> %a to x86_mmx
137  %1 = bitcast <2 x float> %b to x86_mmx
138  %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1)
139  %3 = bitcast x86_mmx %2 to <2 x float>
140  ret <2 x float> %3
141}
142
143declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
144
145define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone {
146; X86-LABEL: test_pfcmpeq:
147; X86:       # %bb.0: # %entry
148; X86-NEXT:    pushl %ebp
149; X86-NEXT:    movl %esp, %ebp
150; X86-NEXT:    andl $-8, %esp
151; X86-NEXT:    subl $8, %esp
152; X86-NEXT:    movd 20(%ebp), %mm0
153; X86-NEXT:    movd 16(%ebp), %mm1
154; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
155; X86-NEXT:    movd 12(%ebp), %mm0
156; X86-NEXT:    movd 8(%ebp), %mm2
157; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
158; X86-NEXT:    pfcmpeq %mm1, %mm2
159; X86-NEXT:    movq %mm2, (%esp)
160; X86-NEXT:    movl (%esp), %eax
161; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
162; X86-NEXT:    movl %ebp, %esp
163; X86-NEXT:    popl %ebp
164; X86-NEXT:    retl
165;
166; X64-LABEL: test_pfcmpeq:
167; X64:       # %bb.0: # %entry
168; X64-NEXT:    movdq2q %xmm1, %mm0
169; X64-NEXT:    movdq2q %xmm0, %mm1
170; X64-NEXT:    pfcmpeq %mm0, %mm1
171; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
172; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
173; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
174; X64-NEXT:    retq
175entry:
176  %0 = bitcast <2 x float> %a to x86_mmx
177  %1 = bitcast <2 x float> %b to x86_mmx
178  %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1)
179  %3 = bitcast x86_mmx %2 to <2 x i32>
180  ret <2 x i32> %3
181}
182
183declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
184
185define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone {
186; X86-LABEL: test_pfcmpge:
187; X86:       # %bb.0: # %entry
188; X86-NEXT:    pushl %ebp
189; X86-NEXT:    movl %esp, %ebp
190; X86-NEXT:    andl $-8, %esp
191; X86-NEXT:    subl $8, %esp
192; X86-NEXT:    movd 20(%ebp), %mm0
193; X86-NEXT:    movd 16(%ebp), %mm1
194; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
195; X86-NEXT:    movd 12(%ebp), %mm0
196; X86-NEXT:    movd 8(%ebp), %mm2
197; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
198; X86-NEXT:    pfcmpge %mm1, %mm2
199; X86-NEXT:    movq %mm2, (%esp)
200; X86-NEXT:    movl (%esp), %eax
201; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
202; X86-NEXT:    movl %ebp, %esp
203; X86-NEXT:    popl %ebp
204; X86-NEXT:    retl
205;
206; X64-LABEL: test_pfcmpge:
207; X64:       # %bb.0: # %entry
208; X64-NEXT:    movdq2q %xmm1, %mm0
209; X64-NEXT:    movdq2q %xmm0, %mm1
210; X64-NEXT:    pfcmpge %mm0, %mm1
211; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
212; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
213; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
214; X64-NEXT:    retq
215entry:
216  %0 = bitcast <2 x float> %a to x86_mmx
217  %1 = bitcast <2 x float> %b to x86_mmx
218  %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1)
219  %3 = bitcast x86_mmx %2 to <2 x i32>
220  ret <2 x i32> %3
221}
222
223declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
224
225define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone {
226; X86-LABEL: test_pfcmpgt:
227; X86:       # %bb.0: # %entry
228; X86-NEXT:    pushl %ebp
229; X86-NEXT:    movl %esp, %ebp
230; X86-NEXT:    andl $-8, %esp
231; X86-NEXT:    subl $8, %esp
232; X86-NEXT:    movd 20(%ebp), %mm0
233; X86-NEXT:    movd 16(%ebp), %mm1
234; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
235; X86-NEXT:    movd 12(%ebp), %mm0
236; X86-NEXT:    movd 8(%ebp), %mm2
237; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
238; X86-NEXT:    pfcmpgt %mm1, %mm2
239; X86-NEXT:    movq %mm2, (%esp)
240; X86-NEXT:    movl (%esp), %eax
241; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
242; X86-NEXT:    movl %ebp, %esp
243; X86-NEXT:    popl %ebp
244; X86-NEXT:    retl
245;
246; X64-LABEL: test_pfcmpgt:
247; X64:       # %bb.0: # %entry
248; X64-NEXT:    movdq2q %xmm1, %mm0
249; X64-NEXT:    movdq2q %xmm0, %mm1
250; X64-NEXT:    pfcmpgt %mm0, %mm1
251; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
252; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
253; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
254; X64-NEXT:    retq
255entry:
256  %0 = bitcast <2 x float> %a to x86_mmx
257  %1 = bitcast <2 x float> %b to x86_mmx
258  %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1)
259  %3 = bitcast x86_mmx %2 to <2 x i32>
260  ret <2 x i32> %3
261}
262
263declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
264
265define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone {
266; X86-LABEL: test_pfmax:
267; X86:       # %bb.0: # %entry
268; X86-NEXT:    pushl %ebp
269; X86-NEXT:    movl %esp, %ebp
270; X86-NEXT:    andl $-8, %esp
271; X86-NEXT:    subl $8, %esp
272; X86-NEXT:    movd 20(%ebp), %mm0
273; X86-NEXT:    movd 16(%ebp), %mm1
274; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
275; X86-NEXT:    movd 12(%ebp), %mm0
276; X86-NEXT:    movd 8(%ebp), %mm2
277; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
278; X86-NEXT:    pfmax %mm1, %mm2
279; X86-NEXT:    movq %mm2, (%esp)
280; X86-NEXT:    flds {{[0-9]+}}(%esp)
281; X86-NEXT:    flds (%esp)
282; X86-NEXT:    movl %ebp, %esp
283; X86-NEXT:    popl %ebp
284; X86-NEXT:    retl
285;
286; X64-LABEL: test_pfmax:
287; X64:       # %bb.0: # %entry
288; X64-NEXT:    movdq2q %xmm1, %mm0
289; X64-NEXT:    movdq2q %xmm0, %mm1
290; X64-NEXT:    pfmax %mm0, %mm1
291; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
292; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
293; X64-NEXT:    retq
294entry:
295  %0 = bitcast <2 x float> %a to x86_mmx
296  %1 = bitcast <2 x float> %b to x86_mmx
297  %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1)
298  %3 = bitcast x86_mmx %2 to <2 x float>
299  ret <2 x float> %3
300}
301
302declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
303
304define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone {
305; X86-LABEL: test_pfmin:
306; X86:       # %bb.0: # %entry
307; X86-NEXT:    pushl %ebp
308; X86-NEXT:    movl %esp, %ebp
309; X86-NEXT:    andl $-8, %esp
310; X86-NEXT:    subl $8, %esp
311; X86-NEXT:    movd 20(%ebp), %mm0
312; X86-NEXT:    movd 16(%ebp), %mm1
313; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
314; X86-NEXT:    movd 12(%ebp), %mm0
315; X86-NEXT:    movd 8(%ebp), %mm2
316; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
317; X86-NEXT:    pfmin %mm1, %mm2
318; X86-NEXT:    movq %mm2, (%esp)
319; X86-NEXT:    flds {{[0-9]+}}(%esp)
320; X86-NEXT:    flds (%esp)
321; X86-NEXT:    movl %ebp, %esp
322; X86-NEXT:    popl %ebp
323; X86-NEXT:    retl
324;
325; X64-LABEL: test_pfmin:
326; X64:       # %bb.0: # %entry
327; X64-NEXT:    movdq2q %xmm1, %mm0
328; X64-NEXT:    movdq2q %xmm0, %mm1
329; X64-NEXT:    pfmin %mm0, %mm1
330; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
331; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
332; X64-NEXT:    retq
333entry:
334  %0 = bitcast <2 x float> %a to x86_mmx
335  %1 = bitcast <2 x float> %b to x86_mmx
336  %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1)
337  %3 = bitcast x86_mmx %2 to <2 x float>
338  ret <2 x float> %3
339}
340
341declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
342
343define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone {
344; X86-LABEL: test_pfmul:
345; X86:       # %bb.0: # %entry
346; X86-NEXT:    pushl %ebp
347; X86-NEXT:    movl %esp, %ebp
348; X86-NEXT:    andl $-8, %esp
349; X86-NEXT:    subl $8, %esp
350; X86-NEXT:    movd 20(%ebp), %mm0
351; X86-NEXT:    movd 16(%ebp), %mm1
352; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
353; X86-NEXT:    movd 12(%ebp), %mm0
354; X86-NEXT:    movd 8(%ebp), %mm2
355; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
356; X86-NEXT:    pfmul %mm1, %mm2
357; X86-NEXT:    movq %mm2, (%esp)
358; X86-NEXT:    flds {{[0-9]+}}(%esp)
359; X86-NEXT:    flds (%esp)
360; X86-NEXT:    movl %ebp, %esp
361; X86-NEXT:    popl %ebp
362; X86-NEXT:    retl
363;
364; X64-LABEL: test_pfmul:
365; X64:       # %bb.0: # %entry
366; X64-NEXT:    movdq2q %xmm1, %mm0
367; X64-NEXT:    movdq2q %xmm0, %mm1
368; X64-NEXT:    pfmul %mm0, %mm1
369; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
370; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
371; X64-NEXT:    retq
372entry:
373  %0 = bitcast <2 x float> %a to x86_mmx
374  %1 = bitcast <2 x float> %b to x86_mmx
375  %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1)
376  %3 = bitcast x86_mmx %2 to <2 x float>
377  ret <2 x float> %3
378}
379
380declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
381
382define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone {
383; X86-LABEL: test_pfrcp:
384; X86:       # %bb.0: # %entry
385; X86-NEXT:    pushl %ebp
386; X86-NEXT:    movl %esp, %ebp
387; X86-NEXT:    andl $-8, %esp
388; X86-NEXT:    subl $8, %esp
389; X86-NEXT:    movd 12(%ebp), %mm0
390; X86-NEXT:    movd 8(%ebp), %mm1
391; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
392; X86-NEXT:    pfrcp %mm1, %mm0
393; X86-NEXT:    movq %mm0, (%esp)
394; X86-NEXT:    flds {{[0-9]+}}(%esp)
395; X86-NEXT:    flds (%esp)
396; X86-NEXT:    movl %ebp, %esp
397; X86-NEXT:    popl %ebp
398; X86-NEXT:    retl
399;
400; X64-LABEL: test_pfrcp:
401; X64:       # %bb.0: # %entry
402; X64-NEXT:    movdq2q %xmm0, %mm0
403; X64-NEXT:    pfrcp %mm0, %mm0
404; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
405; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
406; X64-NEXT:    retq
407entry:
408  %0 = bitcast <2 x float> %a to x86_mmx
409  %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0)
410  %2 = bitcast x86_mmx %1 to <2 x float>
411  ret <2 x float> %2
412}
413
414declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
415
416define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
417; X86-LABEL: test_pfrcpit1:
418; X86:       # %bb.0: # %entry
419; X86-NEXT:    pushl %ebp
420; X86-NEXT:    movl %esp, %ebp
421; X86-NEXT:    andl $-8, %esp
422; X86-NEXT:    subl $8, %esp
423; X86-NEXT:    movd 20(%ebp), %mm0
424; X86-NEXT:    movd 16(%ebp), %mm1
425; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
426; X86-NEXT:    movd 12(%ebp), %mm0
427; X86-NEXT:    movd 8(%ebp), %mm2
428; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
429; X86-NEXT:    pfrcpit1 %mm1, %mm2
430; X86-NEXT:    movq %mm2, (%esp)
431; X86-NEXT:    flds {{[0-9]+}}(%esp)
432; X86-NEXT:    flds (%esp)
433; X86-NEXT:    movl %ebp, %esp
434; X86-NEXT:    popl %ebp
435; X86-NEXT:    retl
436;
437; X64-LABEL: test_pfrcpit1:
438; X64:       # %bb.0: # %entry
439; X64-NEXT:    movdq2q %xmm1, %mm0
440; X64-NEXT:    movdq2q %xmm0, %mm1
441; X64-NEXT:    pfrcpit1 %mm0, %mm1
442; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
443; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
444; X64-NEXT:    retq
445entry:
446  %0 = bitcast <2 x float> %a to x86_mmx
447  %1 = bitcast <2 x float> %b to x86_mmx
448  %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1)
449  %3 = bitcast x86_mmx %2 to <2 x float>
450  ret <2 x float> %3
451}
452
453declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
454
455define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone {
456; X86-LABEL: test_pfrcpit2:
457; X86:       # %bb.0: # %entry
458; X86-NEXT:    pushl %ebp
459; X86-NEXT:    movl %esp, %ebp
460; X86-NEXT:    andl $-8, %esp
461; X86-NEXT:    subl $8, %esp
462; X86-NEXT:    movd 20(%ebp), %mm0
463; X86-NEXT:    movd 16(%ebp), %mm1
464; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
465; X86-NEXT:    movd 12(%ebp), %mm0
466; X86-NEXT:    movd 8(%ebp), %mm2
467; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
468; X86-NEXT:    pfrcpit2 %mm1, %mm2
469; X86-NEXT:    movq %mm2, (%esp)
470; X86-NEXT:    flds {{[0-9]+}}(%esp)
471; X86-NEXT:    flds (%esp)
472; X86-NEXT:    movl %ebp, %esp
473; X86-NEXT:    popl %ebp
474; X86-NEXT:    retl
475;
476; X64-LABEL: test_pfrcpit2:
477; X64:       # %bb.0: # %entry
478; X64-NEXT:    movdq2q %xmm1, %mm0
479; X64-NEXT:    movdq2q %xmm0, %mm1
480; X64-NEXT:    pfrcpit2 %mm0, %mm1
481; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
482; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
483; X64-NEXT:    retq
484entry:
485  %0 = bitcast <2 x float> %a to x86_mmx
486  %1 = bitcast <2 x float> %b to x86_mmx
487  %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1)
488  %3 = bitcast x86_mmx %2 to <2 x float>
489  ret <2 x float> %3
490}
491
492declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
493
494define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone {
495; X86-LABEL: test_pfrsqrt:
496; X86:       # %bb.0: # %entry
497; X86-NEXT:    pushl %ebp
498; X86-NEXT:    movl %esp, %ebp
499; X86-NEXT:    andl $-8, %esp
500; X86-NEXT:    subl $8, %esp
501; X86-NEXT:    movd 12(%ebp), %mm0
502; X86-NEXT:    movd 8(%ebp), %mm1
503; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
504; X86-NEXT:    pfrsqrt %mm1, %mm0
505; X86-NEXT:    movq %mm0, (%esp)
506; X86-NEXT:    flds {{[0-9]+}}(%esp)
507; X86-NEXT:    flds (%esp)
508; X86-NEXT:    movl %ebp, %esp
509; X86-NEXT:    popl %ebp
510; X86-NEXT:    retl
511;
512; X64-LABEL: test_pfrsqrt:
513; X64:       # %bb.0: # %entry
514; X64-NEXT:    movdq2q %xmm0, %mm0
515; X64-NEXT:    pfrsqrt %mm0, %mm0
516; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
517; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
518; X64-NEXT:    retq
519entry:
520  %0 = bitcast <2 x float> %a to x86_mmx
521  %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0)
522  %2 = bitcast x86_mmx %1 to <2 x float>
523  ret <2 x float> %2
524}
525
526declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
527
528define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
529; X86-LABEL: test_pfrsqit1:
530; X86:       # %bb.0: # %entry
531; X86-NEXT:    pushl %ebp
532; X86-NEXT:    movl %esp, %ebp
533; X86-NEXT:    andl $-8, %esp
534; X86-NEXT:    subl $8, %esp
535; X86-NEXT:    movd 20(%ebp), %mm0
536; X86-NEXT:    movd 16(%ebp), %mm1
537; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
538; X86-NEXT:    movd 12(%ebp), %mm0
539; X86-NEXT:    movd 8(%ebp), %mm2
540; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
541; X86-NEXT:    pfrsqit1 %mm1, %mm2
542; X86-NEXT:    movq %mm2, (%esp)
543; X86-NEXT:    flds {{[0-9]+}}(%esp)
544; X86-NEXT:    flds (%esp)
545; X86-NEXT:    movl %ebp, %esp
546; X86-NEXT:    popl %ebp
547; X86-NEXT:    retl
548;
549; X64-LABEL: test_pfrsqit1:
550; X64:       # %bb.0: # %entry
551; X64-NEXT:    movdq2q %xmm1, %mm0
552; X64-NEXT:    movdq2q %xmm0, %mm1
553; X64-NEXT:    pfrsqit1 %mm0, %mm1
554; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
555; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
556; X64-NEXT:    retq
557entry:
558  %0 = bitcast <2 x float> %a to x86_mmx
559  %1 = bitcast <2 x float> %b to x86_mmx
560  %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1)
561  %3 = bitcast x86_mmx %2 to <2 x float>
562  ret <2 x float> %3
563}
564
565declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
566
567define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone {
568; X86-LABEL: test_pfsub:
569; X86:       # %bb.0: # %entry
570; X86-NEXT:    pushl %ebp
571; X86-NEXT:    movl %esp, %ebp
572; X86-NEXT:    andl $-8, %esp
573; X86-NEXT:    subl $8, %esp
574; X86-NEXT:    movd 20(%ebp), %mm0
575; X86-NEXT:    movd 16(%ebp), %mm1
576; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
577; X86-NEXT:    movd 12(%ebp), %mm0
578; X86-NEXT:    movd 8(%ebp), %mm2
579; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
580; X86-NEXT:    pfsub %mm1, %mm2
581; X86-NEXT:    movq %mm2, (%esp)
582; X86-NEXT:    flds {{[0-9]+}}(%esp)
583; X86-NEXT:    flds (%esp)
584; X86-NEXT:    movl %ebp, %esp
585; X86-NEXT:    popl %ebp
586; X86-NEXT:    retl
587;
588; X64-LABEL: test_pfsub:
589; X64:       # %bb.0: # %entry
590; X64-NEXT:    movdq2q %xmm1, %mm0
591; X64-NEXT:    movdq2q %xmm0, %mm1
592; X64-NEXT:    pfsub %mm0, %mm1
593; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
594; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
595; X64-NEXT:    retq
596entry:
597  %0 = bitcast <2 x float> %a to x86_mmx
598  %1 = bitcast <2 x float> %b to x86_mmx
599  %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1)
600  %3 = bitcast x86_mmx %2 to <2 x float>
601  ret <2 x float> %3
602}
603
604declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
605
606define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone {
607; X86-LABEL: test_pfsubr:
608; X86:       # %bb.0: # %entry
609; X86-NEXT:    pushl %ebp
610; X86-NEXT:    movl %esp, %ebp
611; X86-NEXT:    andl $-8, %esp
612; X86-NEXT:    subl $8, %esp
613; X86-NEXT:    movd 20(%ebp), %mm0
614; X86-NEXT:    movd 16(%ebp), %mm1
615; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
616; X86-NEXT:    movd 12(%ebp), %mm0
617; X86-NEXT:    movd 8(%ebp), %mm2
618; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
619; X86-NEXT:    pfsubr %mm1, %mm2
620; X86-NEXT:    movq %mm2, (%esp)
621; X86-NEXT:    flds {{[0-9]+}}(%esp)
622; X86-NEXT:    flds (%esp)
623; X86-NEXT:    movl %ebp, %esp
624; X86-NEXT:    popl %ebp
625; X86-NEXT:    retl
626;
627; X64-LABEL: test_pfsubr:
628; X64:       # %bb.0: # %entry
629; X64-NEXT:    movdq2q %xmm1, %mm0
630; X64-NEXT:    movdq2q %xmm0, %mm1
631; X64-NEXT:    pfsubr %mm0, %mm1
632; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
633; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
634; X64-NEXT:    retq
635entry:
636  %0 = bitcast <2 x float> %a to x86_mmx
637  %1 = bitcast <2 x float> %b to x86_mmx
638  %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1)
639  %3 = bitcast x86_mmx %2 to <2 x float>
640  ret <2 x float> %3
641}
642
643declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
644
645define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone {
646; X86-LABEL: test_pi2fd:
647; X86:       # %bb.0: # %entry
648; X86-NEXT:    pushl %ebp
649; X86-NEXT:    movl %esp, %ebp
650; X86-NEXT:    andl $-8, %esp
651; X86-NEXT:    subl $8, %esp
652; X86-NEXT:    pi2fd %mm0, %mm0
653; X86-NEXT:    movq %mm0, (%esp)
654; X86-NEXT:    flds {{[0-9]+}}(%esp)
655; X86-NEXT:    flds (%esp)
656; X86-NEXT:    movl %ebp, %esp
657; X86-NEXT:    popl %ebp
658; X86-NEXT:    retl
659;
660; X64-LABEL: test_pi2fd:
661; X64:       # %bb.0: # %entry
662; X64-NEXT:    pi2fd %mm0, %mm0
663; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
664; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
665; X64-NEXT:    retq
666entry:
667  %0 = bitcast x86_mmx %a.coerce to <2 x i32>
668  %1 = bitcast <2 x i32> %0 to x86_mmx
669  %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
670  %3 = bitcast x86_mmx %2 to <2 x float>
671  ret <2 x float> %3
672}
673
674declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
675
676define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
677; X86-LABEL: test_pmulhrw:
678; X86:       # %bb.0: # %entry
679; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
680; X86-NEXT:    pmulhrw %mm1, %mm0
681; X86-NEXT:    movq %mm0, (%eax)
682; X86-NEXT:    retl $4
683;
684; X64-LABEL: test_pmulhrw:
685; X64:       # %bb.0: # %entry
686; X64-NEXT:    pmulhrw %mm1, %mm0
687; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
688; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
689; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
690; X64-NEXT:    retq
691entry:
692  %0 = bitcast x86_mmx %a.coerce to <4 x i16>
693  %1 = bitcast x86_mmx %b.coerce to <4 x i16>
694  %2 = bitcast <4 x i16> %0 to x86_mmx
695  %3 = bitcast <4 x i16> %1 to x86_mmx
696  %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3)
697  %5 = bitcast x86_mmx %4 to <4 x i16>
698  ret <4 x i16> %5
699}
700
701declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
702
703define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone {
704; X86-LABEL: test_pf2iw:
705; X86:       # %bb.0: # %entry
706; X86-NEXT:    pushl %ebp
707; X86-NEXT:    movl %esp, %ebp
708; X86-NEXT:    andl $-8, %esp
709; X86-NEXT:    subl $8, %esp
710; X86-NEXT:    movd 12(%ebp), %mm0
711; X86-NEXT:    movd 8(%ebp), %mm1
712; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
713; X86-NEXT:    pf2iw %mm1, %mm0
714; X86-NEXT:    movq %mm0, (%esp)
715; X86-NEXT:    movl (%esp), %eax
716; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
717; X86-NEXT:    movl %ebp, %esp
718; X86-NEXT:    popl %ebp
719; X86-NEXT:    retl
720;
721; X64-LABEL: test_pf2iw:
722; X64:       # %bb.0: # %entry
723; X64-NEXT:    movdq2q %xmm0, %mm0
724; X64-NEXT:    pf2iw %mm0, %mm0
725; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
726; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
727; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
728; X64-NEXT:    retq
729entry:
730  %0 = bitcast <2 x float> %a to x86_mmx
731  %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0)
732  %2 = bitcast x86_mmx %1 to <2 x i32>
733  ret <2 x i32> %2
734}
735
736declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
737
738define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
739; X86-LABEL: test_pfnacc:
740; X86:       # %bb.0: # %entry
741; X86-NEXT:    pushl %ebp
742; X86-NEXT:    movl %esp, %ebp
743; X86-NEXT:    andl $-8, %esp
744; X86-NEXT:    subl $8, %esp
745; X86-NEXT:    movd 20(%ebp), %mm0
746; X86-NEXT:    movd 16(%ebp), %mm1
747; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
748; X86-NEXT:    movd 12(%ebp), %mm0
749; X86-NEXT:    movd 8(%ebp), %mm2
750; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
751; X86-NEXT:    pfnacc %mm1, %mm2
752; X86-NEXT:    movq %mm2, (%esp)
753; X86-NEXT:    flds {{[0-9]+}}(%esp)
754; X86-NEXT:    flds (%esp)
755; X86-NEXT:    movl %ebp, %esp
756; X86-NEXT:    popl %ebp
757; X86-NEXT:    retl
758;
759; X64-LABEL: test_pfnacc:
760; X64:       # %bb.0: # %entry
761; X64-NEXT:    movdq2q %xmm1, %mm0
762; X64-NEXT:    movdq2q %xmm0, %mm1
763; X64-NEXT:    pfnacc %mm0, %mm1
764; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
765; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
766; X64-NEXT:    retq
767entry:
768  %0 = bitcast <2 x float> %a to x86_mmx
769  %1 = bitcast <2 x float> %b to x86_mmx
770  %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1)
771  %3 = bitcast x86_mmx %2 to <2 x float>
772  ret <2 x float> %3
773}
774
775declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
776
777define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
778; X86-LABEL: test_pfpnacc:
779; X86:       # %bb.0: # %entry
780; X86-NEXT:    pushl %ebp
781; X86-NEXT:    movl %esp, %ebp
782; X86-NEXT:    andl $-8, %esp
783; X86-NEXT:    subl $8, %esp
784; X86-NEXT:    movd 20(%ebp), %mm0
785; X86-NEXT:    movd 16(%ebp), %mm1
786; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
787; X86-NEXT:    movd 12(%ebp), %mm0
788; X86-NEXT:    movd 8(%ebp), %mm2
789; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
790; X86-NEXT:    pfpnacc %mm1, %mm2
791; X86-NEXT:    movq %mm2, (%esp)
792; X86-NEXT:    flds {{[0-9]+}}(%esp)
793; X86-NEXT:    flds (%esp)
794; X86-NEXT:    movl %ebp, %esp
795; X86-NEXT:    popl %ebp
796; X86-NEXT:    retl
797;
798; X64-LABEL: test_pfpnacc:
799; X64:       # %bb.0: # %entry
800; X64-NEXT:    movdq2q %xmm1, %mm0
801; X64-NEXT:    movdq2q %xmm0, %mm1
802; X64-NEXT:    pfpnacc %mm0, %mm1
803; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
804; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
805; X64-NEXT:    retq
806entry:
807  %0 = bitcast <2 x float> %a to x86_mmx
808  %1 = bitcast <2 x float> %b to x86_mmx
809  %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1)
810  %3 = bitcast x86_mmx %2 to <2 x float>
811  ret <2 x float> %3
812}
813
814declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
815
816define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone {
817; X86-LABEL: test_pi2fw:
818; X86:       # %bb.0: # %entry
819; X86-NEXT:    pushl %ebp
820; X86-NEXT:    movl %esp, %ebp
821; X86-NEXT:    andl $-8, %esp
822; X86-NEXT:    subl $8, %esp
823; X86-NEXT:    pi2fw %mm0, %mm0
824; X86-NEXT:    movq %mm0, (%esp)
825; X86-NEXT:    flds {{[0-9]+}}(%esp)
826; X86-NEXT:    flds (%esp)
827; X86-NEXT:    movl %ebp, %esp
828; X86-NEXT:    popl %ebp
829; X86-NEXT:    retl
830;
831; X64-LABEL: test_pi2fw:
832; X64:       # %bb.0: # %entry
833; X64-NEXT:    pi2fw %mm0, %mm0
834; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
835; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
836; X64-NEXT:    retq
837entry:
838  %0 = bitcast x86_mmx %a.coerce to <2 x i32>
839  %1 = bitcast <2 x i32> %0 to x86_mmx
840  %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
841  %3 = bitcast x86_mmx %2 to <2 x float>
842  ret <2 x float> %3
843}
844
845declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
846
847define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone {
848; X86-LABEL: test_pswapdsf:
849; X86:       # %bb.0: # %entry
850; X86-NEXT:    pushl %ebp
851; X86-NEXT:    movl %esp, %ebp
852; X86-NEXT:    andl $-8, %esp
853; X86-NEXT:    subl $8, %esp
854; X86-NEXT:    movd 12(%ebp), %mm0
855; X86-NEXT:    movd 8(%ebp), %mm1
856; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
857; X86-NEXT:    pswapd %mm1, %mm0 # mm0 = mm1[1,0]
858; X86-NEXT:    movq %mm0, (%esp)
859; X86-NEXT:    flds {{[0-9]+}}(%esp)
860; X86-NEXT:    flds (%esp)
861; X86-NEXT:    movl %ebp, %esp
862; X86-NEXT:    popl %ebp
863; X86-NEXT:    retl
864;
865; X64-LABEL: test_pswapdsf:
866; X64:       # %bb.0: # %entry
867; X64-NEXT:    movdq2q %xmm0, %mm0
868; X64-NEXT:    pswapd %mm0, %mm0 # mm0 = mm0[1,0]
869; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
870; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
871; X64-NEXT:    retq
872entry:
873  %0 = bitcast <2 x float> %a to x86_mmx
874  %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
875  %2 = bitcast x86_mmx %1 to <2 x float>
876  ret <2 x float> %2
877}
878
879define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
880; X86-LABEL: test_pswapdsi:
881; X86:       # %bb.0: # %entry
882; X86-NEXT:    pushl %ebp
883; X86-NEXT:    movl %esp, %ebp
884; X86-NEXT:    andl $-8, %esp
885; X86-NEXT:    subl $8, %esp
886; X86-NEXT:    movd 12(%ebp), %mm0
887; X86-NEXT:    movd 8(%ebp), %mm1
888; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
889; X86-NEXT:    pswapd %mm1, %mm0 # mm0 = mm1[1,0]
890; X86-NEXT:    movq %mm0, (%esp)
891; X86-NEXT:    movl (%esp), %eax
892; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
893; X86-NEXT:    movl %ebp, %esp
894; X86-NEXT:    popl %ebp
895; X86-NEXT:    retl
896;
897; X64-LABEL: test_pswapdsi:
898; X64:       # %bb.0: # %entry
899; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
900; X64-NEXT:    movq %xmm0, -{{[0-9]+}}(%rsp)
901; X64-NEXT:    pswapd -{{[0-9]+}}(%rsp), %mm0 # mm0 = mem[1,0]
902; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
903; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
904; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
905; X64-NEXT:    retq
906entry:
907  %0 = bitcast <2 x i32> %a to x86_mmx
908  %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
909  %2 = bitcast x86_mmx %1 to <2 x i32>
910  ret <2 x i32> %2
911}
912
913declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone
914