1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X64 4 5define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone { 6; X86-LABEL: test_pavgusb: 7; X86: # %bb.0: # %entry 8; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 9; X86-NEXT: pavgusb %mm1, %mm0 10; X86-NEXT: movq %mm0, (%eax) 11; X86-NEXT: retl $4 12; 13; X64-LABEL: test_pavgusb: 14; X64: # %bb.0: # %entry 15; X64-NEXT: pavgusb %mm1, %mm0 16; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 17; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 18; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 19; X64-NEXT: retq 20entry: 21 %0 = bitcast x86_mmx %a.coerce to <8 x i8> 22 %1 = bitcast x86_mmx %b.coerce to <8 x i8> 23 %2 = bitcast <8 x i8> %0 to x86_mmx 24 %3 = bitcast <8 x i8> %1 to x86_mmx 25 %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3) 26 %5 = bitcast x86_mmx %4 to <8 x i8> 27 ret <8 x i8> %5 28} 29 30declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone 31 32define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone { 33; X86-LABEL: test_pf2id: 34; X86: # %bb.0: # %entry 35; X86-NEXT: pushl %ebp 36; X86-NEXT: movl %esp, %ebp 37; X86-NEXT: andl $-8, %esp 38; X86-NEXT: subl $8, %esp 39; X86-NEXT: movd 12(%ebp), %mm0 40; X86-NEXT: movd 8(%ebp), %mm1 41; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 42; X86-NEXT: pf2id %mm1, %mm0 43; X86-NEXT: movq %mm0, (%esp) 44; X86-NEXT: movl (%esp), %eax 45; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 46; X86-NEXT: movl %ebp, %esp 47; X86-NEXT: popl %ebp 48; X86-NEXT: retl 49; 50; X64-LABEL: test_pf2id: 51; X64: # %bb.0: # %entry 52; X64-NEXT: movdq2q %xmm0, %mm0 53; X64-NEXT: pf2id %mm0, %mm0 54; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 55; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 56; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 57; X64-NEXT: retq 58entry: 59 %0 = bitcast <2 x float> %a to x86_mmx 60 %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0) 61 %2 = bitcast x86_mmx %1 to <2 x i32> 62 ret <2 x i32> %2 63} 64 65declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone 66 67define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone { 68; X86-LABEL: test_pfacc: 69; X86: # %bb.0: # %entry 70; X86-NEXT: pushl %ebp 71; X86-NEXT: movl %esp, %ebp 72; X86-NEXT: andl $-8, %esp 73; X86-NEXT: subl $8, %esp 74; X86-NEXT: movd 20(%ebp), %mm0 75; X86-NEXT: movd 16(%ebp), %mm1 76; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 77; X86-NEXT: movd 12(%ebp), %mm0 78; X86-NEXT: movd 8(%ebp), %mm2 79; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 80; X86-NEXT: pfacc %mm1, %mm2 81; X86-NEXT: movq %mm2, (%esp) 82; X86-NEXT: flds {{[0-9]+}}(%esp) 83; X86-NEXT: flds (%esp) 84; X86-NEXT: movl %ebp, %esp 85; X86-NEXT: popl %ebp 86; X86-NEXT: retl 87; 88; X64-LABEL: test_pfacc: 89; X64: # %bb.0: # %entry 90; X64-NEXT: movdq2q %xmm1, %mm0 91; X64-NEXT: movdq2q %xmm0, %mm1 92; X64-NEXT: pfacc %mm0, %mm1 93; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 94; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 95; X64-NEXT: retq 96entry: 97 %0 = bitcast <2 x float> %a to x86_mmx 98 %1 = bitcast <2 x float> %b to x86_mmx 99 %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1) 100 %3 = bitcast x86_mmx %2 to <2 x float> 101 ret <2 x float> %3 102} 103 104declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone 105 106define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone { 107; X86-LABEL: test_pfadd: 108; X86: # %bb.0: # %entry 109; X86-NEXT: pushl %ebp 110; X86-NEXT: movl %esp, %ebp 111; X86-NEXT: andl $-8, %esp 112; X86-NEXT: subl $8, %esp 113; X86-NEXT: movd 20(%ebp), %mm0 114; X86-NEXT: movd 16(%ebp), %mm1 115; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 116; X86-NEXT: movd 12(%ebp), %mm0 117; X86-NEXT: movd 8(%ebp), %mm2 118; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 119; X86-NEXT: pfadd %mm1, %mm2 120; X86-NEXT: movq %mm2, (%esp) 121; X86-NEXT: flds {{[0-9]+}}(%esp) 122; X86-NEXT: flds (%esp) 123; X86-NEXT: movl %ebp, %esp 124; X86-NEXT: popl %ebp 125; X86-NEXT: retl 126; 127; X64-LABEL: test_pfadd: 128; X64: # %bb.0: # %entry 129; X64-NEXT: movdq2q %xmm1, %mm0 130; X64-NEXT: movdq2q %xmm0, %mm1 131; X64-NEXT: pfadd %mm0, %mm1 132; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 133; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 134; X64-NEXT: retq 135entry: 136 %0 = bitcast <2 x float> %a to x86_mmx 137 %1 = bitcast <2 x float> %b to x86_mmx 138 %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1) 139 %3 = bitcast x86_mmx %2 to <2 x float> 140 ret <2 x float> %3 141} 142 143declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone 144 145define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone { 146; X86-LABEL: test_pfcmpeq: 147; X86: # %bb.0: # %entry 148; X86-NEXT: pushl %ebp 149; X86-NEXT: movl %esp, %ebp 150; X86-NEXT: andl $-8, %esp 151; X86-NEXT: subl $8, %esp 152; X86-NEXT: movd 20(%ebp), %mm0 153; X86-NEXT: movd 16(%ebp), %mm1 154; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 155; X86-NEXT: movd 12(%ebp), %mm0 156; X86-NEXT: movd 8(%ebp), %mm2 157; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 158; X86-NEXT: pfcmpeq %mm1, %mm2 159; X86-NEXT: movq %mm2, (%esp) 160; X86-NEXT: movl (%esp), %eax 161; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 162; X86-NEXT: movl %ebp, %esp 163; X86-NEXT: popl %ebp 164; X86-NEXT: retl 165; 166; X64-LABEL: test_pfcmpeq: 167; X64: # %bb.0: # %entry 168; X64-NEXT: movdq2q %xmm1, %mm0 169; X64-NEXT: movdq2q %xmm0, %mm1 170; X64-NEXT: pfcmpeq %mm0, %mm1 171; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 172; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 173; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 174; X64-NEXT: retq 175entry: 176 %0 = bitcast <2 x float> %a to x86_mmx 177 %1 = bitcast <2 x float> %b to x86_mmx 178 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1) 179 %3 = bitcast x86_mmx %2 to <2 x i32> 180 ret <2 x i32> %3 181} 182 183declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone 184 185define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone { 186; X86-LABEL: test_pfcmpge: 187; X86: # %bb.0: # %entry 188; X86-NEXT: pushl %ebp 189; X86-NEXT: movl %esp, %ebp 190; X86-NEXT: andl $-8, %esp 191; X86-NEXT: subl $8, %esp 192; X86-NEXT: movd 20(%ebp), %mm0 193; X86-NEXT: movd 16(%ebp), %mm1 194; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 195; X86-NEXT: movd 12(%ebp), %mm0 196; X86-NEXT: movd 8(%ebp), %mm2 197; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 198; X86-NEXT: pfcmpge %mm1, %mm2 199; X86-NEXT: movq %mm2, (%esp) 200; X86-NEXT: movl (%esp), %eax 201; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 202; X86-NEXT: movl %ebp, %esp 203; X86-NEXT: popl %ebp 204; X86-NEXT: retl 205; 206; X64-LABEL: test_pfcmpge: 207; X64: # %bb.0: # %entry 208; X64-NEXT: movdq2q %xmm1, %mm0 209; X64-NEXT: movdq2q %xmm0, %mm1 210; X64-NEXT: pfcmpge %mm0, %mm1 211; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 212; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 213; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 214; X64-NEXT: retq 215entry: 216 %0 = bitcast <2 x float> %a to x86_mmx 217 %1 = bitcast <2 x float> %b to x86_mmx 218 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1) 219 %3 = bitcast x86_mmx %2 to <2 x i32> 220 ret <2 x i32> %3 221} 222 223declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone 224 225define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone { 226; X86-LABEL: test_pfcmpgt: 227; X86: # %bb.0: # %entry 228; X86-NEXT: pushl %ebp 229; X86-NEXT: movl %esp, %ebp 230; X86-NEXT: andl $-8, %esp 231; X86-NEXT: subl $8, %esp 232; X86-NEXT: movd 20(%ebp), %mm0 233; X86-NEXT: movd 16(%ebp), %mm1 234; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 235; X86-NEXT: movd 12(%ebp), %mm0 236; X86-NEXT: movd 8(%ebp), %mm2 237; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 238; X86-NEXT: pfcmpgt %mm1, %mm2 239; X86-NEXT: movq %mm2, (%esp) 240; X86-NEXT: movl (%esp), %eax 241; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 242; X86-NEXT: movl %ebp, %esp 243; X86-NEXT: popl %ebp 244; X86-NEXT: retl 245; 246; X64-LABEL: test_pfcmpgt: 247; X64: # %bb.0: # %entry 248; X64-NEXT: movdq2q %xmm1, %mm0 249; X64-NEXT: movdq2q %xmm0, %mm1 250; X64-NEXT: pfcmpgt %mm0, %mm1 251; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 252; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 253; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 254; X64-NEXT: retq 255entry: 256 %0 = bitcast <2 x float> %a to x86_mmx 257 %1 = bitcast <2 x float> %b to x86_mmx 258 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1) 259 %3 = bitcast x86_mmx %2 to <2 x i32> 260 ret <2 x i32> %3 261} 262 263declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone 264 265define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone { 266; X86-LABEL: test_pfmax: 267; X86: # %bb.0: # %entry 268; X86-NEXT: pushl %ebp 269; X86-NEXT: movl %esp, %ebp 270; X86-NEXT: andl $-8, %esp 271; X86-NEXT: subl $8, %esp 272; X86-NEXT: movd 20(%ebp), %mm0 273; X86-NEXT: movd 16(%ebp), %mm1 274; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 275; X86-NEXT: movd 12(%ebp), %mm0 276; X86-NEXT: movd 8(%ebp), %mm2 277; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 278; X86-NEXT: pfmax %mm1, %mm2 279; X86-NEXT: movq %mm2, (%esp) 280; X86-NEXT: flds {{[0-9]+}}(%esp) 281; X86-NEXT: flds (%esp) 282; X86-NEXT: movl %ebp, %esp 283; X86-NEXT: popl %ebp 284; X86-NEXT: retl 285; 286; X64-LABEL: test_pfmax: 287; X64: # %bb.0: # %entry 288; X64-NEXT: movdq2q %xmm1, %mm0 289; X64-NEXT: movdq2q %xmm0, %mm1 290; X64-NEXT: pfmax %mm0, %mm1 291; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 292; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 293; X64-NEXT: retq 294entry: 295 %0 = bitcast <2 x float> %a to x86_mmx 296 %1 = bitcast <2 x float> %b to x86_mmx 297 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1) 298 %3 = bitcast x86_mmx %2 to <2 x float> 299 ret <2 x float> %3 300} 301 302declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone 303 304define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone { 305; X86-LABEL: test_pfmin: 306; X86: # %bb.0: # %entry 307; X86-NEXT: pushl %ebp 308; X86-NEXT: movl %esp, %ebp 309; X86-NEXT: andl $-8, %esp 310; X86-NEXT: subl $8, %esp 311; X86-NEXT: movd 20(%ebp), %mm0 312; X86-NEXT: movd 16(%ebp), %mm1 313; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 314; X86-NEXT: movd 12(%ebp), %mm0 315; X86-NEXT: movd 8(%ebp), %mm2 316; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 317; X86-NEXT: pfmin %mm1, %mm2 318; X86-NEXT: movq %mm2, (%esp) 319; X86-NEXT: flds {{[0-9]+}}(%esp) 320; X86-NEXT: flds (%esp) 321; X86-NEXT: movl %ebp, %esp 322; X86-NEXT: popl %ebp 323; X86-NEXT: retl 324; 325; X64-LABEL: test_pfmin: 326; X64: # %bb.0: # %entry 327; X64-NEXT: movdq2q %xmm1, %mm0 328; X64-NEXT: movdq2q %xmm0, %mm1 329; X64-NEXT: pfmin %mm0, %mm1 330; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 331; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 332; X64-NEXT: retq 333entry: 334 %0 = bitcast <2 x float> %a to x86_mmx 335 %1 = bitcast <2 x float> %b to x86_mmx 336 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1) 337 %3 = bitcast x86_mmx %2 to <2 x float> 338 ret <2 x float> %3 339} 340 341declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone 342 343define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone { 344; X86-LABEL: test_pfmul: 345; X86: # %bb.0: # %entry 346; X86-NEXT: pushl %ebp 347; X86-NEXT: movl %esp, %ebp 348; X86-NEXT: andl $-8, %esp 349; X86-NEXT: subl $8, %esp 350; X86-NEXT: movd 20(%ebp), %mm0 351; X86-NEXT: movd 16(%ebp), %mm1 352; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 353; X86-NEXT: movd 12(%ebp), %mm0 354; X86-NEXT: movd 8(%ebp), %mm2 355; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 356; X86-NEXT: pfmul %mm1, %mm2 357; X86-NEXT: movq %mm2, (%esp) 358; X86-NEXT: flds {{[0-9]+}}(%esp) 359; X86-NEXT: flds (%esp) 360; X86-NEXT: movl %ebp, %esp 361; X86-NEXT: popl %ebp 362; X86-NEXT: retl 363; 364; X64-LABEL: test_pfmul: 365; X64: # %bb.0: # %entry 366; X64-NEXT: movdq2q %xmm1, %mm0 367; X64-NEXT: movdq2q %xmm0, %mm1 368; X64-NEXT: pfmul %mm0, %mm1 369; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 370; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 371; X64-NEXT: retq 372entry: 373 %0 = bitcast <2 x float> %a to x86_mmx 374 %1 = bitcast <2 x float> %b to x86_mmx 375 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1) 376 %3 = bitcast x86_mmx %2 to <2 x float> 377 ret <2 x float> %3 378} 379 380declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone 381 382define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone { 383; X86-LABEL: test_pfrcp: 384; X86: # %bb.0: # %entry 385; X86-NEXT: pushl %ebp 386; X86-NEXT: movl %esp, %ebp 387; X86-NEXT: andl $-8, %esp 388; X86-NEXT: subl $8, %esp 389; X86-NEXT: movd 12(%ebp), %mm0 390; X86-NEXT: movd 8(%ebp), %mm1 391; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 392; X86-NEXT: pfrcp %mm1, %mm0 393; X86-NEXT: movq %mm0, (%esp) 394; X86-NEXT: flds {{[0-9]+}}(%esp) 395; X86-NEXT: flds (%esp) 396; X86-NEXT: movl %ebp, %esp 397; X86-NEXT: popl %ebp 398; X86-NEXT: retl 399; 400; X64-LABEL: test_pfrcp: 401; X64: # %bb.0: # %entry 402; X64-NEXT: movdq2q %xmm0, %mm0 403; X64-NEXT: pfrcp %mm0, %mm0 404; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 405; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 406; X64-NEXT: retq 407entry: 408 %0 = bitcast <2 x float> %a to x86_mmx 409 %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0) 410 %2 = bitcast x86_mmx %1 to <2 x float> 411 ret <2 x float> %2 412} 413 414declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone 415 416define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone { 417; X86-LABEL: test_pfrcpit1: 418; X86: # %bb.0: # %entry 419; X86-NEXT: pushl %ebp 420; X86-NEXT: movl %esp, %ebp 421; X86-NEXT: andl $-8, %esp 422; X86-NEXT: subl $8, %esp 423; X86-NEXT: movd 20(%ebp), %mm0 424; X86-NEXT: movd 16(%ebp), %mm1 425; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 426; X86-NEXT: movd 12(%ebp), %mm0 427; X86-NEXT: movd 8(%ebp), %mm2 428; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 429; X86-NEXT: pfrcpit1 %mm1, %mm2 430; X86-NEXT: movq %mm2, (%esp) 431; X86-NEXT: flds {{[0-9]+}}(%esp) 432; X86-NEXT: flds (%esp) 433; X86-NEXT: movl %ebp, %esp 434; X86-NEXT: popl %ebp 435; X86-NEXT: retl 436; 437; X64-LABEL: test_pfrcpit1: 438; X64: # %bb.0: # %entry 439; X64-NEXT: movdq2q %xmm1, %mm0 440; X64-NEXT: movdq2q %xmm0, %mm1 441; X64-NEXT: pfrcpit1 %mm0, %mm1 442; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 443; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 444; X64-NEXT: retq 445entry: 446 %0 = bitcast <2 x float> %a to x86_mmx 447 %1 = bitcast <2 x float> %b to x86_mmx 448 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1) 449 %3 = bitcast x86_mmx %2 to <2 x float> 450 ret <2 x float> %3 451} 452 453declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone 454 455define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone { 456; X86-LABEL: test_pfrcpit2: 457; X86: # %bb.0: # %entry 458; X86-NEXT: pushl %ebp 459; X86-NEXT: movl %esp, %ebp 460; X86-NEXT: andl $-8, %esp 461; X86-NEXT: subl $8, %esp 462; X86-NEXT: movd 20(%ebp), %mm0 463; X86-NEXT: movd 16(%ebp), %mm1 464; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 465; X86-NEXT: movd 12(%ebp), %mm0 466; X86-NEXT: movd 8(%ebp), %mm2 467; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 468; X86-NEXT: pfrcpit2 %mm1, %mm2 469; X86-NEXT: movq %mm2, (%esp) 470; X86-NEXT: flds {{[0-9]+}}(%esp) 471; X86-NEXT: flds (%esp) 472; X86-NEXT: movl %ebp, %esp 473; X86-NEXT: popl %ebp 474; X86-NEXT: retl 475; 476; X64-LABEL: test_pfrcpit2: 477; X64: # %bb.0: # %entry 478; X64-NEXT: movdq2q %xmm1, %mm0 479; X64-NEXT: movdq2q %xmm0, %mm1 480; X64-NEXT: pfrcpit2 %mm0, %mm1 481; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 482; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 483; X64-NEXT: retq 484entry: 485 %0 = bitcast <2 x float> %a to x86_mmx 486 %1 = bitcast <2 x float> %b to x86_mmx 487 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1) 488 %3 = bitcast x86_mmx %2 to <2 x float> 489 ret <2 x float> %3 490} 491 492declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone 493 494define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone { 495; X86-LABEL: test_pfrsqrt: 496; X86: # %bb.0: # %entry 497; X86-NEXT: pushl %ebp 498; X86-NEXT: movl %esp, %ebp 499; X86-NEXT: andl $-8, %esp 500; X86-NEXT: subl $8, %esp 501; X86-NEXT: movd 12(%ebp), %mm0 502; X86-NEXT: movd 8(%ebp), %mm1 503; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 504; X86-NEXT: pfrsqrt %mm1, %mm0 505; X86-NEXT: movq %mm0, (%esp) 506; X86-NEXT: flds {{[0-9]+}}(%esp) 507; X86-NEXT: flds (%esp) 508; X86-NEXT: movl %ebp, %esp 509; X86-NEXT: popl %ebp 510; X86-NEXT: retl 511; 512; X64-LABEL: test_pfrsqrt: 513; X64: # %bb.0: # %entry 514; X64-NEXT: movdq2q %xmm0, %mm0 515; X64-NEXT: pfrsqrt %mm0, %mm0 516; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 517; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 518; X64-NEXT: retq 519entry: 520 %0 = bitcast <2 x float> %a to x86_mmx 521 %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0) 522 %2 = bitcast x86_mmx %1 to <2 x float> 523 ret <2 x float> %2 524} 525 526declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone 527 528define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone { 529; X86-LABEL: test_pfrsqit1: 530; X86: # %bb.0: # %entry 531; X86-NEXT: pushl %ebp 532; X86-NEXT: movl %esp, %ebp 533; X86-NEXT: andl $-8, %esp 534; X86-NEXT: subl $8, %esp 535; X86-NEXT: movd 20(%ebp), %mm0 536; X86-NEXT: movd 16(%ebp), %mm1 537; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 538; X86-NEXT: movd 12(%ebp), %mm0 539; X86-NEXT: movd 8(%ebp), %mm2 540; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 541; X86-NEXT: pfrsqit1 %mm1, %mm2 542; X86-NEXT: movq %mm2, (%esp) 543; X86-NEXT: flds {{[0-9]+}}(%esp) 544; X86-NEXT: flds (%esp) 545; X86-NEXT: movl %ebp, %esp 546; X86-NEXT: popl %ebp 547; X86-NEXT: retl 548; 549; X64-LABEL: test_pfrsqit1: 550; X64: # %bb.0: # %entry 551; X64-NEXT: movdq2q %xmm1, %mm0 552; X64-NEXT: movdq2q %xmm0, %mm1 553; X64-NEXT: pfrsqit1 %mm0, %mm1 554; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 555; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 556; X64-NEXT: retq 557entry: 558 %0 = bitcast <2 x float> %a to x86_mmx 559 %1 = bitcast <2 x float> %b to x86_mmx 560 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1) 561 %3 = bitcast x86_mmx %2 to <2 x float> 562 ret <2 x float> %3 563} 564 565declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone 566 567define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone { 568; X86-LABEL: test_pfsub: 569; X86: # %bb.0: # %entry 570; X86-NEXT: pushl %ebp 571; X86-NEXT: movl %esp, %ebp 572; X86-NEXT: andl $-8, %esp 573; X86-NEXT: subl $8, %esp 574; X86-NEXT: movd 20(%ebp), %mm0 575; X86-NEXT: movd 16(%ebp), %mm1 576; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 577; X86-NEXT: movd 12(%ebp), %mm0 578; X86-NEXT: movd 8(%ebp), %mm2 579; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 580; X86-NEXT: pfsub %mm1, %mm2 581; X86-NEXT: movq %mm2, (%esp) 582; X86-NEXT: flds {{[0-9]+}}(%esp) 583; X86-NEXT: flds (%esp) 584; X86-NEXT: movl %ebp, %esp 585; X86-NEXT: popl %ebp 586; X86-NEXT: retl 587; 588; X64-LABEL: test_pfsub: 589; X64: # %bb.0: # %entry 590; X64-NEXT: movdq2q %xmm1, %mm0 591; X64-NEXT: movdq2q %xmm0, %mm1 592; X64-NEXT: pfsub %mm0, %mm1 593; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 594; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 595; X64-NEXT: retq 596entry: 597 %0 = bitcast <2 x float> %a to x86_mmx 598 %1 = bitcast <2 x float> %b to x86_mmx 599 %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1) 600 %3 = bitcast x86_mmx %2 to <2 x float> 601 ret <2 x float> %3 602} 603 604declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone 605 606define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone { 607; X86-LABEL: test_pfsubr: 608; X86: # %bb.0: # %entry 609; X86-NEXT: pushl %ebp 610; X86-NEXT: movl %esp, %ebp 611; X86-NEXT: andl $-8, %esp 612; X86-NEXT: subl $8, %esp 613; X86-NEXT: movd 20(%ebp), %mm0 614; X86-NEXT: movd 16(%ebp), %mm1 615; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 616; X86-NEXT: movd 12(%ebp), %mm0 617; X86-NEXT: movd 8(%ebp), %mm2 618; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 619; X86-NEXT: pfsubr %mm1, %mm2 620; X86-NEXT: movq %mm2, (%esp) 621; X86-NEXT: flds {{[0-9]+}}(%esp) 622; X86-NEXT: flds (%esp) 623; X86-NEXT: movl %ebp, %esp 624; X86-NEXT: popl %ebp 625; X86-NEXT: retl 626; 627; X64-LABEL: test_pfsubr: 628; X64: # %bb.0: # %entry 629; X64-NEXT: movdq2q %xmm1, %mm0 630; X64-NEXT: movdq2q %xmm0, %mm1 631; X64-NEXT: pfsubr %mm0, %mm1 632; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 633; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 634; X64-NEXT: retq 635entry: 636 %0 = bitcast <2 x float> %a to x86_mmx 637 %1 = bitcast <2 x float> %b to x86_mmx 638 %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1) 639 %3 = bitcast x86_mmx %2 to <2 x float> 640 ret <2 x float> %3 641} 642 643declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone 644 645define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone { 646; X86-LABEL: test_pi2fd: 647; X86: # %bb.0: # %entry 648; X86-NEXT: pushl %ebp 649; X86-NEXT: movl %esp, %ebp 650; X86-NEXT: andl $-8, %esp 651; X86-NEXT: subl $8, %esp 652; X86-NEXT: pi2fd %mm0, %mm0 653; X86-NEXT: movq %mm0, (%esp) 654; X86-NEXT: flds {{[0-9]+}}(%esp) 655; X86-NEXT: flds (%esp) 656; X86-NEXT: movl %ebp, %esp 657; X86-NEXT: popl %ebp 658; X86-NEXT: retl 659; 660; X64-LABEL: test_pi2fd: 661; X64: # %bb.0: # %entry 662; X64-NEXT: pi2fd %mm0, %mm0 663; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 664; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 665; X64-NEXT: retq 666entry: 667 %0 = bitcast x86_mmx %a.coerce to <2 x i32> 668 %1 = bitcast <2 x i32> %0 to x86_mmx 669 %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1) 670 %3 = bitcast x86_mmx %2 to <2 x float> 671 ret <2 x float> %3 672} 673 674declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone 675 676define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone { 677; X86-LABEL: test_pmulhrw: 678; X86: # %bb.0: # %entry 679; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 680; X86-NEXT: pmulhrw %mm1, %mm0 681; X86-NEXT: movq %mm0, (%eax) 682; X86-NEXT: retl $4 683; 684; X64-LABEL: test_pmulhrw: 685; X64: # %bb.0: # %entry 686; X64-NEXT: pmulhrw %mm1, %mm0 687; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 688; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 689; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 690; X64-NEXT: retq 691entry: 692 %0 = bitcast x86_mmx %a.coerce to <4 x i16> 693 %1 = bitcast x86_mmx %b.coerce to <4 x i16> 694 %2 = bitcast <4 x i16> %0 to x86_mmx 695 %3 = bitcast <4 x i16> %1 to x86_mmx 696 %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3) 697 %5 = bitcast x86_mmx %4 to <4 x i16> 698 ret <4 x i16> %5 699} 700 701declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone 702 703define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone { 704; X86-LABEL: test_pf2iw: 705; X86: # %bb.0: # %entry 706; X86-NEXT: pushl %ebp 707; X86-NEXT: movl %esp, %ebp 708; X86-NEXT: andl $-8, %esp 709; X86-NEXT: subl $8, %esp 710; X86-NEXT: movd 12(%ebp), %mm0 711; X86-NEXT: movd 8(%ebp), %mm1 712; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 713; X86-NEXT: pf2iw %mm1, %mm0 714; X86-NEXT: movq %mm0, (%esp) 715; X86-NEXT: movl (%esp), %eax 716; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 717; X86-NEXT: movl %ebp, %esp 718; X86-NEXT: popl %ebp 719; X86-NEXT: retl 720; 721; X64-LABEL: test_pf2iw: 722; X64: # %bb.0: # %entry 723; X64-NEXT: movdq2q %xmm0, %mm0 724; X64-NEXT: pf2iw %mm0, %mm0 725; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 726; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 727; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 728; X64-NEXT: retq 729entry: 730 %0 = bitcast <2 x float> %a to x86_mmx 731 %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0) 732 %2 = bitcast x86_mmx %1 to <2 x i32> 733 ret <2 x i32> %2 734} 735 736declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone 737 738define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone { 739; X86-LABEL: test_pfnacc: 740; X86: # %bb.0: # %entry 741; X86-NEXT: pushl %ebp 742; X86-NEXT: movl %esp, %ebp 743; X86-NEXT: andl $-8, %esp 744; X86-NEXT: subl $8, %esp 745; X86-NEXT: movd 20(%ebp), %mm0 746; X86-NEXT: movd 16(%ebp), %mm1 747; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 748; X86-NEXT: movd 12(%ebp), %mm0 749; X86-NEXT: movd 8(%ebp), %mm2 750; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 751; X86-NEXT: pfnacc %mm1, %mm2 752; X86-NEXT: movq %mm2, (%esp) 753; X86-NEXT: flds {{[0-9]+}}(%esp) 754; X86-NEXT: flds (%esp) 755; X86-NEXT: movl %ebp, %esp 756; X86-NEXT: popl %ebp 757; X86-NEXT: retl 758; 759; X64-LABEL: test_pfnacc: 760; X64: # %bb.0: # %entry 761; X64-NEXT: movdq2q %xmm1, %mm0 762; X64-NEXT: movdq2q %xmm0, %mm1 763; X64-NEXT: pfnacc %mm0, %mm1 764; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 765; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 766; X64-NEXT: retq 767entry: 768 %0 = bitcast <2 x float> %a to x86_mmx 769 %1 = bitcast <2 x float> %b to x86_mmx 770 %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1) 771 %3 = bitcast x86_mmx %2 to <2 x float> 772 ret <2 x float> %3 773} 774 775declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone 776 777define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone { 778; X86-LABEL: test_pfpnacc: 779; X86: # %bb.0: # %entry 780; X86-NEXT: pushl %ebp 781; X86-NEXT: movl %esp, %ebp 782; X86-NEXT: andl $-8, %esp 783; X86-NEXT: subl $8, %esp 784; X86-NEXT: movd 20(%ebp), %mm0 785; X86-NEXT: movd 16(%ebp), %mm1 786; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 787; X86-NEXT: movd 12(%ebp), %mm0 788; X86-NEXT: movd 8(%ebp), %mm2 789; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 790; X86-NEXT: pfpnacc %mm1, %mm2 791; X86-NEXT: movq %mm2, (%esp) 792; X86-NEXT: flds {{[0-9]+}}(%esp) 793; X86-NEXT: flds (%esp) 794; X86-NEXT: movl %ebp, %esp 795; X86-NEXT: popl %ebp 796; X86-NEXT: retl 797; 798; X64-LABEL: test_pfpnacc: 799; X64: # %bb.0: # %entry 800; X64-NEXT: movdq2q %xmm1, %mm0 801; X64-NEXT: movdq2q %xmm0, %mm1 802; X64-NEXT: pfpnacc %mm0, %mm1 803; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 804; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 805; X64-NEXT: retq 806entry: 807 %0 = bitcast <2 x float> %a to x86_mmx 808 %1 = bitcast <2 x float> %b to x86_mmx 809 %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1) 810 %3 = bitcast x86_mmx %2 to <2 x float> 811 ret <2 x float> %3 812} 813 814declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone 815 816define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone { 817; X86-LABEL: test_pi2fw: 818; X86: # %bb.0: # %entry 819; X86-NEXT: pushl %ebp 820; X86-NEXT: movl %esp, %ebp 821; X86-NEXT: andl $-8, %esp 822; X86-NEXT: subl $8, %esp 823; X86-NEXT: pi2fw %mm0, %mm0 824; X86-NEXT: movq %mm0, (%esp) 825; X86-NEXT: flds {{[0-9]+}}(%esp) 826; X86-NEXT: flds (%esp) 827; X86-NEXT: movl %ebp, %esp 828; X86-NEXT: popl %ebp 829; X86-NEXT: retl 830; 831; X64-LABEL: test_pi2fw: 832; X64: # %bb.0: # %entry 833; X64-NEXT: pi2fw %mm0, %mm0 834; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 835; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 836; X64-NEXT: retq 837entry: 838 %0 = bitcast x86_mmx %a.coerce to <2 x i32> 839 %1 = bitcast <2 x i32> %0 to x86_mmx 840 %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1) 841 %3 = bitcast x86_mmx %2 to <2 x float> 842 ret <2 x float> %3 843} 844 845declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone 846 847define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone { 848; X86-LABEL: test_pswapdsf: 849; X86: # %bb.0: # %entry 850; X86-NEXT: pushl %ebp 851; X86-NEXT: movl %esp, %ebp 852; X86-NEXT: andl $-8, %esp 853; X86-NEXT: subl $8, %esp 854; X86-NEXT: movd 12(%ebp), %mm0 855; X86-NEXT: movd 8(%ebp), %mm1 856; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 857; X86-NEXT: pswapd %mm1, %mm0 # mm0 = mm1[1,0] 858; X86-NEXT: movq %mm0, (%esp) 859; X86-NEXT: flds {{[0-9]+}}(%esp) 860; X86-NEXT: flds (%esp) 861; X86-NEXT: movl %ebp, %esp 862; X86-NEXT: popl %ebp 863; X86-NEXT: retl 864; 865; X64-LABEL: test_pswapdsf: 866; X64: # %bb.0: # %entry 867; X64-NEXT: movdq2q %xmm0, %mm0 868; X64-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] 869; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 870; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 871; X64-NEXT: retq 872entry: 873 %0 = bitcast <2 x float> %a to x86_mmx 874 %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0) 875 %2 = bitcast x86_mmx %1 to <2 x float> 876 ret <2 x float> %2 877} 878 879define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone { 880; X86-LABEL: test_pswapdsi: 881; X86: # %bb.0: # %entry 882; X86-NEXT: pushl %ebp 883; X86-NEXT: movl %esp, %ebp 884; X86-NEXT: andl $-8, %esp 885; X86-NEXT: subl $8, %esp 886; X86-NEXT: movd 12(%ebp), %mm0 887; X86-NEXT: movd 8(%ebp), %mm1 888; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 889; X86-NEXT: pswapd %mm1, %mm0 # mm0 = mm1[1,0] 890; X86-NEXT: movq %mm0, (%esp) 891; X86-NEXT: movl (%esp), %eax 892; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 893; X86-NEXT: movl %ebp, %esp 894; X86-NEXT: popl %ebp 895; X86-NEXT: retl 896; 897; X64-LABEL: test_pswapdsi: 898; X64: # %bb.0: # %entry 899; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 900; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) 901; X64-NEXT: pswapd -{{[0-9]+}}(%rsp), %mm0 # mm0 = mem[1,0] 902; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 903; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 904; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 905; X64-NEXT: retq 906entry: 907 %0 = bitcast <2 x i32> %a to x86_mmx 908 %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0) 909 %2 = bitcast x86_mmx %1 to <2 x i32> 910 ret <2 x i32> %2 911} 912 913declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone 914