1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s --check-prefix=ALL --check-prefix=X32 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s --check-prefix=ALL --check-prefix=X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/xop-builtins.c 6 7define <2 x i64> @test_mm_maccs_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 8; X32-LABEL: test_mm_maccs_epi16: 9; X32: # %bb.0: 10; X32-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 11; X32-NEXT: retl 12; 13; X64-LABEL: test_mm_maccs_epi16: 14; X64: # %bb.0: 15; X64-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 16; X64-NEXT: retq 17 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 18 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 19 %arg2 = bitcast <2 x i64> %a2 to <8 x i16> 20 %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %arg0, <8 x i16> %arg1, <8 x i16> %arg2) 21 %bc = bitcast <8 x i16> %res to <2 x i64> 22 ret <2 x i64> %bc 23} 24declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone 25 26define <2 x i64> @test_mm_macc_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 27; X32-LABEL: test_mm_macc_epi16: 28; X32: # %bb.0: 29; X32-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 30; X32-NEXT: retl 31; 32; X64-LABEL: test_mm_macc_epi16: 33; X64: # %bb.0: 34; X64-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 35; X64-NEXT: retq 36 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 37 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 38 %arg2 = bitcast <2 x i64> %a2 to <8 x i16> 39 %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %arg0, <8 x i16> %arg1, <8 x i16> %arg2) 40 %bc = bitcast <8 x i16> %res to <2 x i64> 41 ret <2 x i64> %bc 42} 43declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone 44 45define <2 x i64> @test_mm_maccsd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 46; X32-LABEL: test_mm_maccsd_epi16: 47; X32: # %bb.0: 48; X32-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 49; X32-NEXT: retl 50; 51; X64-LABEL: test_mm_maccsd_epi16: 52; X64: # %bb.0: 53; X64-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 54; X64-NEXT: retq 55 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 56 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 57 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 58 %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2) 59 %bc = bitcast <4 x i32> %res to <2 x i64> 60 ret <2 x i64> %bc 61} 62declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 63 64define <2 x i64> @test_mm_maccd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 65; X32-LABEL: test_mm_maccd_epi16: 66; X32: # %bb.0: 67; X32-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 68; X32-NEXT: retl 69; 70; X64-LABEL: test_mm_maccd_epi16: 71; X64: # %bb.0: 72; X64-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 73; X64-NEXT: retq 74 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 75 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 76 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 77 %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2) 78 %bc = bitcast <4 x i32> %res to <2 x i64> 79 ret <2 x i64> %bc 80} 81declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 82 83define <2 x i64> @test_mm_maccs_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 84; X32-LABEL: test_mm_maccs_epi32: 85; X32: # %bb.0: 86; X32-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 87; X32-NEXT: retl 88; 89; X64-LABEL: test_mm_maccs_epi32: 90; X64: # %bb.0: 91; X64-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 92; X64-NEXT: retq 93 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 94 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 95 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 96 %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2) 97 %bc = bitcast <4 x i32> %res to <2 x i64> 98 ret <2 x i64> %bc 99} 100declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 101 102define <2 x i64> @test_mm_macc_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 103; X32-LABEL: test_mm_macc_epi32: 104; X32: # %bb.0: 105; X32-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 106; X32-NEXT: retl 107; 108; X64-LABEL: test_mm_macc_epi32: 109; X64: # %bb.0: 110; X64-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 111; X64-NEXT: retq 112 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 113 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 114 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 115 %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2) 116 %bc = bitcast <4 x i32> %res to <2 x i64> 117 ret <2 x i64> %bc 118} 119declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 120 121define <2 x i64> @test_mm_maccslo_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 122; X32-LABEL: test_mm_maccslo_epi32: 123; X32: # %bb.0: 124; X32-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 125; X32-NEXT: retl 126; 127; X64-LABEL: test_mm_maccslo_epi32: 128; X64: # %bb.0: 129; X64-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 130; X64-NEXT: retq 131 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 132 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 133 %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2) 134 ret <2 x i64> %res 135} 136declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 137 138define <2 x i64> @test_mm_macclo_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 139; X32-LABEL: test_mm_macclo_epi32: 140; X32: # %bb.0: 141; X32-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 142; X32-NEXT: retl 143; 144; X64-LABEL: test_mm_macclo_epi32: 145; X64: # %bb.0: 146; X64-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 147; X64-NEXT: retq 148 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 149 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 150 %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2) 151 ret <2 x i64> %res 152} 153declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 154 155define <2 x i64> @test_mm_maccshi_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 156; X32-LABEL: test_mm_maccshi_epi32: 157; X32: # %bb.0: 158; X32-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 159; X32-NEXT: retl 160; 161; X64-LABEL: test_mm_maccshi_epi32: 162; X64: # %bb.0: 163; X64-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 164; X64-NEXT: retq 165 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 166 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 167 %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2) 168 ret <2 x i64> %res 169} 170declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 171 172define <2 x i64> @test_mm_macchi_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 173; X32-LABEL: test_mm_macchi_epi32: 174; X32: # %bb.0: 175; X32-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 176; X32-NEXT: retl 177; 178; X64-LABEL: test_mm_macchi_epi32: 179; X64: # %bb.0: 180; X64-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 181; X64-NEXT: retq 182 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 183 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 184 %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2) 185 ret <2 x i64> %res 186} 187declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 188 189define <2 x i64> @test_mm_maddsd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 190; X32-LABEL: test_mm_maddsd_epi16: 191; X32: # %bb.0: 192; X32-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 193; X32-NEXT: retl 194; 195; X64-LABEL: test_mm_maddsd_epi16: 196; X64: # %bb.0: 197; X64-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 198; X64-NEXT: retq 199 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 200 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 201 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 202 %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2) 203 %bc = bitcast <4 x i32> %res to <2 x i64> 204 ret <2 x i64> %bc 205} 206declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 207 208define <2 x i64> @test_mm_maddd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 209; X32-LABEL: test_mm_maddd_epi16: 210; X32: # %bb.0: 211; X32-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 212; X32-NEXT: retl 213; 214; X64-LABEL: test_mm_maddd_epi16: 215; X64: # %bb.0: 216; X64-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 217; X64-NEXT: retq 218 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 219 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 220 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 221 %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2) 222 %bc = bitcast <4 x i32> %res to <2 x i64> 223 ret <2 x i64> %bc 224} 225declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 226 227define <2 x i64> @test_mm_haddw_epi8(<2 x i64> %a0) { 228; X32-LABEL: test_mm_haddw_epi8: 229; X32: # %bb.0: 230; X32-NEXT: vphaddbw %xmm0, %xmm0 231; X32-NEXT: retl 232; 233; X64-LABEL: test_mm_haddw_epi8: 234; X64: # %bb.0: 235; X64-NEXT: vphaddbw %xmm0, %xmm0 236; X64-NEXT: retq 237 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 238 %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %arg0) 239 %bc = bitcast <8 x i16> %res to <2 x i64> 240 ret <2 x i64> %bc 241} 242declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone 243 244define <2 x i64> @test_mm_haddd_epi8(<2 x i64> %a0) { 245; X32-LABEL: test_mm_haddd_epi8: 246; X32: # %bb.0: 247; X32-NEXT: vphaddbd %xmm0, %xmm0 248; X32-NEXT: retl 249; 250; X64-LABEL: test_mm_haddd_epi8: 251; X64: # %bb.0: 252; X64-NEXT: vphaddbd %xmm0, %xmm0 253; X64-NEXT: retq 254 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 255 %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %arg0) 256 %bc = bitcast <4 x i32> %res to <2 x i64> 257 ret <2 x i64> %bc 258} 259declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone 260 261define <2 x i64> @test_mm_haddq_epi8(<2 x i64> %a0) { 262; X32-LABEL: test_mm_haddq_epi8: 263; X32: # %bb.0: 264; X32-NEXT: vphaddbq %xmm0, %xmm0 265; X32-NEXT: retl 266; 267; X64-LABEL: test_mm_haddq_epi8: 268; X64: # %bb.0: 269; X64-NEXT: vphaddbq %xmm0, %xmm0 270; X64-NEXT: retq 271 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 272 %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %arg0) 273 ret <2 x i64> %res 274} 275declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone 276 277define <2 x i64> @test_mm_haddd_epi16(<2 x i64> %a0) { 278; X32-LABEL: test_mm_haddd_epi16: 279; X32: # %bb.0: 280; X32-NEXT: vphaddwd %xmm0, %xmm0 281; X32-NEXT: retl 282; 283; X64-LABEL: test_mm_haddd_epi16: 284; X64: # %bb.0: 285; X64-NEXT: vphaddwd %xmm0, %xmm0 286; X64-NEXT: retq 287 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 288 %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %arg0) 289 %bc = bitcast <4 x i32> %res to <2 x i64> 290 ret <2 x i64> %bc 291} 292declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone 293 294define <2 x i64> @test_mm_haddq_epi16(<2 x i64> %a0) { 295; X32-LABEL: test_mm_haddq_epi16: 296; X32: # %bb.0: 297; X32-NEXT: vphaddwq %xmm0, %xmm0 298; X32-NEXT: retl 299; 300; X64-LABEL: test_mm_haddq_epi16: 301; X64: # %bb.0: 302; X64-NEXT: vphaddwq %xmm0, %xmm0 303; X64-NEXT: retq 304 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 305 %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %arg0) 306 ret <2 x i64> %res 307} 308declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone 309 310define <2 x i64> @test_mm_haddq_epi32(<2 x i64> %a0) { 311; X32-LABEL: test_mm_haddq_epi32: 312; X32: # %bb.0: 313; X32-NEXT: vphadddq %xmm0, %xmm0 314; X32-NEXT: retl 315; 316; X64-LABEL: test_mm_haddq_epi32: 317; X64: # %bb.0: 318; X64-NEXT: vphadddq %xmm0, %xmm0 319; X64-NEXT: retq 320 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 321 %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %arg0) 322 ret <2 x i64> %res 323} 324declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone 325 326define <2 x i64> @test_mm_haddw_epu8(<2 x i64> %a0) { 327; X32-LABEL: test_mm_haddw_epu8: 328; X32: # %bb.0: 329; X32-NEXT: vphaddubw %xmm0, %xmm0 330; X32-NEXT: retl 331; 332; X64-LABEL: test_mm_haddw_epu8: 333; X64: # %bb.0: 334; X64-NEXT: vphaddubw %xmm0, %xmm0 335; X64-NEXT: retq 336 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 337 %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %arg0) 338 %bc = bitcast <8 x i16> %res to <2 x i64> 339 ret <2 x i64> %bc 340} 341declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone 342 343define <2 x i64> @test_mm_haddd_epu8(<2 x i64> %a0) { 344; X32-LABEL: test_mm_haddd_epu8: 345; X32: # %bb.0: 346; X32-NEXT: vphaddubd %xmm0, %xmm0 347; X32-NEXT: retl 348; 349; X64-LABEL: test_mm_haddd_epu8: 350; X64: # %bb.0: 351; X64-NEXT: vphaddubd %xmm0, %xmm0 352; X64-NEXT: retq 353 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 354 %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %arg0) 355 %bc = bitcast <4 x i32> %res to <2 x i64> 356 ret <2 x i64> %bc 357} 358declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone 359 360define <2 x i64> @test_mm_haddq_epu8(<2 x i64> %a0) { 361; X32-LABEL: test_mm_haddq_epu8: 362; X32: # %bb.0: 363; X32-NEXT: vphaddubq %xmm0, %xmm0 364; X32-NEXT: retl 365; 366; X64-LABEL: test_mm_haddq_epu8: 367; X64: # %bb.0: 368; X64-NEXT: vphaddubq %xmm0, %xmm0 369; X64-NEXT: retq 370 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 371 %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %arg0) 372 ret <2 x i64> %res 373} 374declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone 375 376define <2 x i64> @test_mm_haddd_epu16(<2 x i64> %a0) { 377; X32-LABEL: test_mm_haddd_epu16: 378; X32: # %bb.0: 379; X32-NEXT: vphadduwd %xmm0, %xmm0 380; X32-NEXT: retl 381; 382; X64-LABEL: test_mm_haddd_epu16: 383; X64: # %bb.0: 384; X64-NEXT: vphadduwd %xmm0, %xmm0 385; X64-NEXT: retq 386 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 387 %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %arg0) 388 %bc = bitcast <4 x i32> %res to <2 x i64> 389 ret <2 x i64> %bc 390} 391declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone 392 393 394define <2 x i64> @test_mm_haddq_epu16(<2 x i64> %a0) { 395; X32-LABEL: test_mm_haddq_epu16: 396; X32: # %bb.0: 397; X32-NEXT: vphadduwq %xmm0, %xmm0 398; X32-NEXT: retl 399; 400; X64-LABEL: test_mm_haddq_epu16: 401; X64: # %bb.0: 402; X64-NEXT: vphadduwq %xmm0, %xmm0 403; X64-NEXT: retq 404 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 405 %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %arg0) 406 ret <2 x i64> %res 407} 408declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone 409 410define <2 x i64> @test_mm_haddq_epu32(<2 x i64> %a0) { 411; X32-LABEL: test_mm_haddq_epu32: 412; X32: # %bb.0: 413; X32-NEXT: vphaddudq %xmm0, %xmm0 414; X32-NEXT: retl 415; 416; X64-LABEL: test_mm_haddq_epu32: 417; X64: # %bb.0: 418; X64-NEXT: vphaddudq %xmm0, %xmm0 419; X64-NEXT: retq 420 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 421 %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %arg0) 422 ret <2 x i64> %res 423} 424declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone 425 426define <2 x i64> @test_mm_hsubw_epi8(<2 x i64> %a0) { 427; X32-LABEL: test_mm_hsubw_epi8: 428; X32: # %bb.0: 429; X32-NEXT: vphsubbw %xmm0, %xmm0 430; X32-NEXT: retl 431; 432; X64-LABEL: test_mm_hsubw_epi8: 433; X64: # %bb.0: 434; X64-NEXT: vphsubbw %xmm0, %xmm0 435; X64-NEXT: retq 436 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 437 %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %arg0) 438 %bc = bitcast <8 x i16> %res to <2 x i64> 439 ret <2 x i64> %bc 440} 441declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone 442 443define <2 x i64> @test_mm_hsubd_epi16(<2 x i64> %a0) { 444; X32-LABEL: test_mm_hsubd_epi16: 445; X32: # %bb.0: 446; X32-NEXT: vphsubwd %xmm0, %xmm0 447; X32-NEXT: retl 448; 449; X64-LABEL: test_mm_hsubd_epi16: 450; X64: # %bb.0: 451; X64-NEXT: vphsubwd %xmm0, %xmm0 452; X64-NEXT: retq 453 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 454 %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %arg0) 455 %bc = bitcast <4 x i32> %res to <2 x i64> 456 ret <2 x i64> %bc 457} 458declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone 459 460define <2 x i64> @test_mm_hsubq_epi32(<2 x i64> %a0) { 461; X32-LABEL: test_mm_hsubq_epi32: 462; X32: # %bb.0: 463; X32-NEXT: vphsubdq %xmm0, %xmm0 464; X32-NEXT: retl 465; 466; X64-LABEL: test_mm_hsubq_epi32: 467; X64: # %bb.0: 468; X64-NEXT: vphsubdq %xmm0, %xmm0 469; X64-NEXT: retq 470 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 471 %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %arg0) 472 ret <2 x i64> %res 473} 474declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone 475 476define <2 x i64> @test_mm_cmov_si128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 477; X32-LABEL: test_mm_cmov_si128: 478; X32: # %bb.0: 479; X32-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 480; X32-NEXT: vpxor %xmm3, %xmm2, %xmm3 481; X32-NEXT: vpand %xmm2, %xmm0, %xmm0 482; X32-NEXT: vpand %xmm3, %xmm1, %xmm1 483; X32-NEXT: vpor %xmm1, %xmm0, %xmm0 484; X32-NEXT: retl 485; 486; X64-LABEL: test_mm_cmov_si128: 487; X64: # %bb.0: 488; X64-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 489; X64-NEXT: vpxor %xmm3, %xmm2, %xmm3 490; X64-NEXT: vpand %xmm2, %xmm0, %xmm0 491; X64-NEXT: vpand %xmm3, %xmm1, %xmm1 492; X64-NEXT: vpor %xmm1, %xmm0, %xmm0 493; X64-NEXT: retq 494 %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) 495 ret <2 x i64> %res 496} 497declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone 498 499define <4 x i64> @test_mm256_cmov_si256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { 500; X32-LABEL: test_mm256_cmov_si256: 501; X32: # %bb.0: 502; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 503; X32-NEXT: vcmptrueps %ymm3, %ymm3, %ymm3 504; X32-NEXT: vxorps %ymm3, %ymm2, %ymm3 505; X32-NEXT: vandps %ymm2, %ymm0, %ymm0 506; X32-NEXT: vandps %ymm3, %ymm1, %ymm1 507; X32-NEXT: vorps %ymm1, %ymm0, %ymm0 508; X32-NEXT: retl 509; 510; X64-LABEL: test_mm256_cmov_si256: 511; X64: # %bb.0: 512; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 513; X64-NEXT: vcmptrueps %ymm3, %ymm3, %ymm3 514; X64-NEXT: vxorps %ymm3, %ymm2, %ymm3 515; X64-NEXT: vandps %ymm2, %ymm0, %ymm0 516; X64-NEXT: vandps %ymm3, %ymm1, %ymm1 517; X64-NEXT: vorps %ymm1, %ymm0, %ymm0 518; X64-NEXT: retq 519 %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) 520 ret <4 x i64> %res 521} 522declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone 523 524define <2 x i64> @test_mm_perm_epi8(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 525; X32-LABEL: test_mm_perm_epi8: 526; X32: # %bb.0: 527; X32-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 528; X32-NEXT: retl 529; 530; X64-LABEL: test_mm_perm_epi8: 531; X64: # %bb.0: 532; X64-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 533; X64-NEXT: retq 534 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 535 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 536 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 537 %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %arg0, <16 x i8> %arg1, <16 x i8> %arg2) 538 %bc = bitcast <16 x i8> %res to <2 x i64> 539 ret <2 x i64> %bc 540} 541declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 542 543define <2 x i64> @test_mm_rot_epi8(<2 x i64> %a0, <2 x i64> %a1) { 544; X32-LABEL: test_mm_rot_epi8: 545; X32: # %bb.0: 546; X32-NEXT: vprotb %xmm1, %xmm0, %xmm0 547; X32-NEXT: retl 548; 549; X64-LABEL: test_mm_rot_epi8: 550; X64: # %bb.0: 551; X64-NEXT: vprotb %xmm1, %xmm0, %xmm0 552; X64-NEXT: retq 553 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 554 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 555 %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %arg0, <16 x i8> %arg1) 556 %bc = bitcast <16 x i8> %res to <2 x i64> 557 ret <2 x i64> %bc 558} 559declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone 560 561define <2 x i64> @test_mm_rot_epi16(<2 x i64> %a0, <2 x i64> %a1) { 562; X32-LABEL: test_mm_rot_epi16: 563; X32: # %bb.0: 564; X32-NEXT: vprotw %xmm1, %xmm0, %xmm0 565; X32-NEXT: retl 566; 567; X64-LABEL: test_mm_rot_epi16: 568; X64: # %bb.0: 569; X64-NEXT: vprotw %xmm1, %xmm0, %xmm0 570; X64-NEXT: retq 571 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 572 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 573 %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %arg0, <8 x i16> %arg1) 574 %bc = bitcast <8 x i16> %res to <2 x i64> 575 ret <2 x i64> %bc 576} 577declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone 578 579define <2 x i64> @test_mm_rot_epi32(<2 x i64> %a0, <2 x i64> %a1) { 580; X32-LABEL: test_mm_rot_epi32: 581; X32: # %bb.0: 582; X32-NEXT: vprotd %xmm1, %xmm0, %xmm0 583; X32-NEXT: retl 584; 585; X64-LABEL: test_mm_rot_epi32: 586; X64: # %bb.0: 587; X64-NEXT: vprotd %xmm1, %xmm0, %xmm0 588; X64-NEXT: retq 589 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 590 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 591 %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %arg0, <4 x i32> %arg1) 592 %bc = bitcast <4 x i32> %res to <2 x i64> 593 ret <2 x i64> %bc 594} 595declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone 596 597define <2 x i64> @test_mm_rot_epi64(<2 x i64> %a0, <2 x i64> %a1) { 598; X32-LABEL: test_mm_rot_epi64: 599; X32: # %bb.0: 600; X32-NEXT: vprotq %xmm1, %xmm0, %xmm0 601; X32-NEXT: retl 602; 603; X64-LABEL: test_mm_rot_epi64: 604; X64: # %bb.0: 605; X64-NEXT: vprotq %xmm1, %xmm0, %xmm0 606; X64-NEXT: retq 607 %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) 608 ret <2 x i64> %res 609} 610declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone 611 612define <2 x i64> @test_mm_roti_epi8(<2 x i64> %a0) { 613; X32-LABEL: test_mm_roti_epi8: 614; X32: # %bb.0: 615; X32-NEXT: vprotb $1, %xmm0, %xmm0 616; X32-NEXT: retl 617; 618; X64-LABEL: test_mm_roti_epi8: 619; X64: # %bb.0: 620; X64-NEXT: vprotb $1, %xmm0, %xmm0 621; X64-NEXT: retq 622 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 623 %res = call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %arg0, i8 1) 624 %bc = bitcast <16 x i8> %res to <2 x i64> 625 ret <2 x i64> %bc 626} 627declare <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8>, i8) nounwind readnone 628 629define <2 x i64> @test_mm_roti_epi16(<2 x i64> %a0) { 630; X32-LABEL: test_mm_roti_epi16: 631; X32: # %bb.0: 632; X32-NEXT: vprotw $50, %xmm0, %xmm0 633; X32-NEXT: retl 634; 635; X64-LABEL: test_mm_roti_epi16: 636; X64: # %bb.0: 637; X64-NEXT: vprotw $50, %xmm0, %xmm0 638; X64-NEXT: retq 639 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 640 %res = call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %arg0, i8 50) 641 %bc = bitcast <8 x i16> %res to <2 x i64> 642 ret <2 x i64> %bc 643} 644declare <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16>, i8) nounwind readnone 645 646define <2 x i64> @test_mm_roti_epi32(<2 x i64> %a0) { 647; X32-LABEL: test_mm_roti_epi32: 648; X32: # %bb.0: 649; X32-NEXT: vprotd $226, %xmm0, %xmm0 650; X32-NEXT: retl 651; 652; X64-LABEL: test_mm_roti_epi32: 653; X64: # %bb.0: 654; X64-NEXT: vprotd $226, %xmm0, %xmm0 655; X64-NEXT: retq 656 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 657 %res = call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %arg0, i8 -30) 658 %bc = bitcast <4 x i32> %res to <2 x i64> 659 ret <2 x i64> %bc 660} 661declare <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32>, i8) nounwind readnone 662 663define <2 x i64> @test_mm_roti_epi64(<2 x i64> %a0) { 664; X32-LABEL: test_mm_roti_epi64: 665; X32: # %bb.0: 666; X32-NEXT: vprotq $100, %xmm0, %xmm0 667; X32-NEXT: retl 668; 669; X64-LABEL: test_mm_roti_epi64: 670; X64: # %bb.0: 671; X64-NEXT: vprotq $100, %xmm0, %xmm0 672; X64-NEXT: retq 673 %res = call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %a0, i8 100) 674 ret <2 x i64> %res 675} 676declare <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64>, i8) nounwind readnone 677 678define <2 x i64> @test_mm_shl_epi8(<2 x i64> %a0, <2 x i64> %a1) { 679; X32-LABEL: test_mm_shl_epi8: 680; X32: # %bb.0: 681; X32-NEXT: vpshlb %xmm1, %xmm0, %xmm0 682; X32-NEXT: retl 683; 684; X64-LABEL: test_mm_shl_epi8: 685; X64: # %bb.0: 686; X64-NEXT: vpshlb %xmm1, %xmm0, %xmm0 687; X64-NEXT: retq 688 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 689 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 690 %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %arg0, <16 x i8> %arg1) 691 %bc = bitcast <16 x i8> %res to <2 x i64> 692 ret <2 x i64> %bc 693} 694declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone 695 696define <2 x i64> @test_mm_shl_epi16(<2 x i64> %a0, <2 x i64> %a1) { 697; X32-LABEL: test_mm_shl_epi16: 698; X32: # %bb.0: 699; X32-NEXT: vpshlw %xmm1, %xmm0, %xmm0 700; X32-NEXT: retl 701; 702; X64-LABEL: test_mm_shl_epi16: 703; X64: # %bb.0: 704; X64-NEXT: vpshlw %xmm1, %xmm0, %xmm0 705; X64-NEXT: retq 706 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 707 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 708 %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %arg0, <8 x i16> %arg1) 709 %bc = bitcast <8 x i16> %res to <2 x i64> 710 ret <2 x i64> %bc 711} 712declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone 713 714define <2 x i64> @test_mm_shl_epi32(<2 x i64> %a0, <2 x i64> %a1) { 715; X32-LABEL: test_mm_shl_epi32: 716; X32: # %bb.0: 717; X32-NEXT: vpshld %xmm1, %xmm0, %xmm0 718; X32-NEXT: retl 719; 720; X64-LABEL: test_mm_shl_epi32: 721; X64: # %bb.0: 722; X64-NEXT: vpshld %xmm1, %xmm0, %xmm0 723; X64-NEXT: retq 724 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 725 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 726 %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %arg0, <4 x i32> %arg1) 727 %bc = bitcast <4 x i32> %res to <2 x i64> 728 ret <2 x i64> %bc 729} 730declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone 731 732define <2 x i64> @test_mm_shl_epi64(<2 x i64> %a0, <2 x i64> %a1) { 733; X32-LABEL: test_mm_shl_epi64: 734; X32: # %bb.0: 735; X32-NEXT: vpshlq %xmm1, %xmm0, %xmm0 736; X32-NEXT: retl 737; 738; X64-LABEL: test_mm_shl_epi64: 739; X64: # %bb.0: 740; X64-NEXT: vpshlq %xmm1, %xmm0, %xmm0 741; X64-NEXT: retq 742 %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) 743 ret <2 x i64> %res 744} 745declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone 746 747define <2 x i64> @test_mm_sha_epi8(<2 x i64> %a0, <2 x i64> %a1) { 748; X32-LABEL: test_mm_sha_epi8: 749; X32: # %bb.0: 750; X32-NEXT: vpshab %xmm1, %xmm0, %xmm0 751; X32-NEXT: retl 752; 753; X64-LABEL: test_mm_sha_epi8: 754; X64: # %bb.0: 755; X64-NEXT: vpshab %xmm1, %xmm0, %xmm0 756; X64-NEXT: retq 757 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 758 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 759 %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %arg0, <16 x i8> %arg1) 760 %bc = bitcast <16 x i8> %res to <2 x i64> 761 ret <2 x i64> %bc 762} 763declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone 764 765define <2 x i64> @test_mm_sha_epi16(<2 x i64> %a0, <2 x i64> %a1) { 766; X32-LABEL: test_mm_sha_epi16: 767; X32: # %bb.0: 768; X32-NEXT: vpshaw %xmm1, %xmm0, %xmm0 769; X32-NEXT: retl 770; 771; X64-LABEL: test_mm_sha_epi16: 772; X64: # %bb.0: 773; X64-NEXT: vpshaw %xmm1, %xmm0, %xmm0 774; X64-NEXT: retq 775 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 776 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 777 %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %arg0, <8 x i16> %arg1) 778 %bc = bitcast <8 x i16> %res to <2 x i64> 779 ret <2 x i64> %bc 780} 781declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone 782 783define <2 x i64> @test_mm_sha_epi32(<2 x i64> %a0, <2 x i64> %a1) { 784; X32-LABEL: test_mm_sha_epi32: 785; X32: # %bb.0: 786; X32-NEXT: vpshad %xmm1, %xmm0, %xmm0 787; X32-NEXT: retl 788; 789; X64-LABEL: test_mm_sha_epi32: 790; X64: # %bb.0: 791; X64-NEXT: vpshad %xmm1, %xmm0, %xmm0 792; X64-NEXT: retq 793 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 794 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 795 %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %arg0, <4 x i32> %arg1) 796 %bc = bitcast <4 x i32> %res to <2 x i64> 797 ret <2 x i64> %bc 798} 799declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone 800 801define <2 x i64> @test_mm_sha_epi64(<2 x i64> %a0, <2 x i64> %a1) { 802; X32-LABEL: test_mm_sha_epi64: 803; X32: # %bb.0: 804; X32-NEXT: vpshaq %xmm1, %xmm0, %xmm0 805; X32-NEXT: retl 806; 807; X64-LABEL: test_mm_sha_epi64: 808; X64: # %bb.0: 809; X64-NEXT: vpshaq %xmm1, %xmm0, %xmm0 810; X64-NEXT: retq 811 %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) 812 ret <2 x i64> %res 813} 814declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone 815 816define <2 x i64> @test_mm_com_epu8(<2 x i64> %a0, <2 x i64> %a1) { 817; X32-LABEL: test_mm_com_epu8: 818; X32: # %bb.0: 819; X32-NEXT: vpcomltub %xmm1, %xmm0, %xmm0 820; X32-NEXT: retl 821; 822; X64-LABEL: test_mm_com_epu8: 823; X64: # %bb.0: 824; X64-NEXT: vpcomltub %xmm1, %xmm0, %xmm0 825; X64-NEXT: retq 826 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 827 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 828 %res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %arg0, <16 x i8> %arg1, i8 0) 829 %bc = bitcast <16 x i8> %res to <2 x i64> 830 ret <2 x i64> %bc 831} 832declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone 833 834define <2 x i64> @test_mm_com_epu16(<2 x i64> %a0, <2 x i64> %a1) { 835; X32-LABEL: test_mm_com_epu16: 836; X32: # %bb.0: 837; X32-NEXT: vpcomltuw %xmm1, %xmm0, %xmm0 838; X32-NEXT: retl 839; 840; X64-LABEL: test_mm_com_epu16: 841; X64: # %bb.0: 842; X64-NEXT: vpcomltuw %xmm1, %xmm0, %xmm0 843; X64-NEXT: retq 844 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 845 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 846 %res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %arg0, <8 x i16> %arg1, i8 0) 847 %bc = bitcast <8 x i16> %res to <2 x i64> 848 ret <2 x i64> %bc 849} 850declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone 851 852define <2 x i64> @test_mm_com_epu32(<2 x i64> %a0, <2 x i64> %a1) { 853; X32-LABEL: test_mm_com_epu32: 854; X32: # %bb.0: 855; X32-NEXT: vpcomltud %xmm1, %xmm0, %xmm0 856; X32-NEXT: retl 857; 858; X64-LABEL: test_mm_com_epu32: 859; X64: # %bb.0: 860; X64-NEXT: vpcomltud %xmm1, %xmm0, %xmm0 861; X64-NEXT: retq 862 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 863 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 864 %res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %arg0, <4 x i32> %arg1, i8 0) 865 %bc = bitcast <4 x i32> %res to <2 x i64> 866 ret <2 x i64> %bc 867} 868declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone 869 870define <2 x i64> @test_mm_com_epu64(<2 x i64> %a0, <2 x i64> %a1) { 871; X32-LABEL: test_mm_com_epu64: 872; X32: # %bb.0: 873; X32-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0 874; X32-NEXT: retl 875; 876; X64-LABEL: test_mm_com_epu64: 877; X64: # %bb.0: 878; X64-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0 879; X64-NEXT: retq 880 %res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) 881 ret <2 x i64> %res 882} 883declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone 884 885define <2 x i64> @test_mm_com_epi8(<2 x i64> %a0, <2 x i64> %a1) { 886; X32-LABEL: test_mm_com_epi8: 887; X32: # %bb.0: 888; X32-NEXT: vpcomltb %xmm1, %xmm0, %xmm0 889; X32-NEXT: retl 890; 891; X64-LABEL: test_mm_com_epi8: 892; X64: # %bb.0: 893; X64-NEXT: vpcomltb %xmm1, %xmm0, %xmm0 894; X64-NEXT: retq 895 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 896 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 897 %res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %arg0, <16 x i8> %arg1, i8 0) 898 %bc = bitcast <16 x i8> %res to <2 x i64> 899 ret <2 x i64> %bc 900} 901declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone 902 903define <2 x i64> @test_mm_com_epi16(<2 x i64> %a0, <2 x i64> %a1) { 904; X32-LABEL: test_mm_com_epi16: 905; X32: # %bb.0: 906; X32-NEXT: vpcomltw %xmm1, %xmm0, %xmm0 907; X32-NEXT: retl 908; 909; X64-LABEL: test_mm_com_epi16: 910; X64: # %bb.0: 911; X64-NEXT: vpcomltw %xmm1, %xmm0, %xmm0 912; X64-NEXT: retq 913 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 914 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 915 %res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %arg0, <8 x i16> %arg1, i8 0) 916 %bc = bitcast <8 x i16> %res to <2 x i64> 917 ret <2 x i64> %bc 918} 919declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone 920 921define <2 x i64> @test_mm_com_epi32(<2 x i64> %a0, <2 x i64> %a1) { 922; X32-LABEL: test_mm_com_epi32: 923; X32: # %bb.0: 924; X32-NEXT: vpcomltd %xmm1, %xmm0, %xmm0 925; X32-NEXT: retl 926; 927; X64-LABEL: test_mm_com_epi32: 928; X64: # %bb.0: 929; X64-NEXT: vpcomltd %xmm1, %xmm0, %xmm0 930; X64-NEXT: retq 931 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 932 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 933 %res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %arg0, <4 x i32> %arg1, i8 0) 934 %bc = bitcast <4 x i32> %res to <2 x i64> 935 ret <2 x i64> %bc 936} 937declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone 938 939define <2 x i64> @test_mm_com_epi64(<2 x i64> %a0, <2 x i64> %a1) { 940; X32-LABEL: test_mm_com_epi64: 941; X32: # %bb.0: 942; X32-NEXT: vpcomltq %xmm1, %xmm0, %xmm0 943; X32-NEXT: retl 944; 945; X64-LABEL: test_mm_com_epi64: 946; X64: # %bb.0: 947; X64-NEXT: vpcomltq %xmm1, %xmm0, %xmm0 948; X64-NEXT: retq 949 %res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) 950 ret <2 x i64> %res 951} 952declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone 953 954define <2 x double> @test_mm_permute2_pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) { 955; X32-LABEL: test_mm_permute2_pd: 956; X32: # %bb.0: 957; X32-NEXT: vpermil2pd $0, %xmm2, %xmm1, %xmm0, %xmm0 958; X32-NEXT: retl 959; 960; X64-LABEL: test_mm_permute2_pd: 961; X64: # %bb.0: 962; X64-NEXT: vpermil2pd $0, %xmm2, %xmm1, %xmm0, %xmm0 963; X64-NEXT: retq 964 %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2, i8 0) 965 ret <2 x double> %res 966} 967declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone 968 969define <4 x double> @test_mm256_permute2_pd(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2) { 970; X32-LABEL: test_mm256_permute2_pd: 971; X32: # %bb.0: 972; X32-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0 973; X32-NEXT: retl 974; 975; X64-LABEL: test_mm256_permute2_pd: 976; X64: # %bb.0: 977; X64-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0 978; X64-NEXT: retq 979 %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2, i8 0) 980 ret <4 x double> %res 981} 982declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone 983 984define <4 x float> @test_mm_permute2_ps(<4 x float> %a0, <4 x float> %a1, <2 x i64> %a2) { 985; X32-LABEL: test_mm_permute2_ps: 986; X32: # %bb.0: 987; X32-NEXT: vpermil2ps $0, %xmm2, %xmm1, %xmm0, %xmm0 988; X32-NEXT: retl 989; 990; X64-LABEL: test_mm_permute2_ps: 991; X64: # %bb.0: 992; X64-NEXT: vpermil2ps $0, %xmm2, %xmm1, %xmm0, %xmm0 993; X64-NEXT: retq 994 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 995 %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %arg2, i8 0) 996 ret <4 x float> %res 997} 998declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone 999 1000define <8 x float> @test_mm256_permute2_ps(<8 x float> %a0, <8 x float> %a1, <4 x i64> %a2) { 1001; X32-LABEL: test_mm256_permute2_ps: 1002; X32: # %bb.0: 1003; X32-NEXT: vpermil2ps $0, %ymm2, %ymm1, %ymm0, %ymm0 1004; X32-NEXT: retl 1005; 1006; X64-LABEL: test_mm256_permute2_ps: 1007; X64: # %bb.0: 1008; X64-NEXT: vpermil2ps $0, %ymm2, %ymm1, %ymm0, %ymm0 1009; X64-NEXT: retq 1010 %arg2 = bitcast <4 x i64> %a2 to <8 x i32> 1011 %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %arg2, i8 0) 1012 ret <8 x float> %res 1013} 1014declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone 1015 1016define <4 x float> @test_mm_frcz_ss(<4 x float> %a0) { 1017; X32-LABEL: test_mm_frcz_ss: 1018; X32: # %bb.0: 1019; X32-NEXT: vfrczss %xmm0, %xmm0 1020; X32-NEXT: retl 1021; 1022; X64-LABEL: test_mm_frcz_ss: 1023; X64: # %bb.0: 1024; X64-NEXT: vfrczss %xmm0, %xmm0 1025; X64-NEXT: retq 1026 %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0) 1027 ret <4 x float> %res 1028} 1029declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone 1030 1031define <2 x double> @test_mm_frcz_sd(<2 x double> %a0) { 1032; X32-LABEL: test_mm_frcz_sd: 1033; X32: # %bb.0: 1034; X32-NEXT: vfrczsd %xmm0, %xmm0 1035; X32-NEXT: retl 1036; 1037; X64-LABEL: test_mm_frcz_sd: 1038; X64: # %bb.0: 1039; X64-NEXT: vfrczsd %xmm0, %xmm0 1040; X64-NEXT: retq 1041 %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0) 1042 ret <2 x double> %res 1043} 1044declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone 1045 1046define <4 x float> @test_mm_frcz_ps(<4 x float> %a0) { 1047; X32-LABEL: test_mm_frcz_ps: 1048; X32: # %bb.0: 1049; X32-NEXT: vfrczps %xmm0, %xmm0 1050; X32-NEXT: retl 1051; 1052; X64-LABEL: test_mm_frcz_ps: 1053; X64: # %bb.0: 1054; X64-NEXT: vfrczps %xmm0, %xmm0 1055; X64-NEXT: retq 1056 %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) 1057 ret <4 x float> %res 1058} 1059declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone 1060 1061define <2 x double> @test_mm_frcz_pd(<2 x double> %a0) { 1062; X32-LABEL: test_mm_frcz_pd: 1063; X32: # %bb.0: 1064; X32-NEXT: vfrczpd %xmm0, %xmm0 1065; X32-NEXT: retl 1066; 1067; X64-LABEL: test_mm_frcz_pd: 1068; X64: # %bb.0: 1069; X64-NEXT: vfrczpd %xmm0, %xmm0 1070; X64-NEXT: retq 1071 %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) 1072 ret <2 x double> %res 1073} 1074declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone 1075 1076define <8 x float> @test_mm256_frcz_ps(<8 x float> %a0) { 1077; X32-LABEL: test_mm256_frcz_ps: 1078; X32: # %bb.0: 1079; X32-NEXT: vfrczps %ymm0, %ymm0 1080; X32-NEXT: retl 1081; 1082; X64-LABEL: test_mm256_frcz_ps: 1083; X64: # %bb.0: 1084; X64-NEXT: vfrczps %ymm0, %ymm0 1085; X64-NEXT: retq 1086 %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) 1087 ret <8 x float> %res 1088} 1089declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone 1090 1091define <4 x double> @test_mm256_frcz_pd(<4 x double> %a0) { 1092; X32-LABEL: test_mm256_frcz_pd: 1093; X32: # %bb.0: 1094; X32-NEXT: vfrczpd %ymm0, %ymm0 1095; X32-NEXT: retl 1096; 1097; X64-LABEL: test_mm256_frcz_pd: 1098; X64: # %bb.0: 1099; X64-NEXT: vfrczpd %ymm0, %ymm0 1100; X64-NEXT: retq 1101 %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) 1102 ret <4 x double> %res 1103} 1104declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122