1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s --check-prefix=ALL --check-prefix=X32 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s --check-prefix=ALL --check-prefix=X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/xop-builtins.c 6 7define <2 x i64> @test_mm_maccs_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 8; X32-LABEL: test_mm_maccs_epi16: 9; X32: # BB#0: 10; X32-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 11; X32-NEXT: retl 12; 13; X64-LABEL: test_mm_maccs_epi16: 14; X64: # BB#0: 15; X64-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 16; X64-NEXT: retq 17 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 18 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 19 %arg2 = bitcast <2 x i64> %a2 to <8 x i16> 20 %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %arg0, <8 x i16> %arg1, <8 x i16> %arg2) 21 %bc = bitcast <8 x i16> %res to <2 x i64> 22 ret <2 x i64> %bc 23} 24declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone 25 26define <2 x i64> @test_mm_macc_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 27; X32-LABEL: test_mm_macc_epi16: 28; X32: # BB#0: 29; X32-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 30; X32-NEXT: retl 31; 32; X64-LABEL: test_mm_macc_epi16: 33; X64: # BB#0: 34; X64-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 35; X64-NEXT: retq 36 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 37 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 38 %arg2 = bitcast <2 x i64> %a2 to <8 x i16> 39 %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %arg0, <8 x i16> %arg1, <8 x i16> %arg2) 40 %bc = bitcast <8 x i16> %res to <2 x i64> 41 ret <2 x i64> %bc 42} 43declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone 44 45define <2 x i64> @test_mm_maccsd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 46; X32-LABEL: test_mm_maccsd_epi16: 47; X32: # BB#0: 48; X32-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 49; X32-NEXT: retl 50; 51; X64-LABEL: test_mm_maccsd_epi16: 52; X64: # BB#0: 53; X64-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 54; X64-NEXT: retq 55 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 56 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 57 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 58 %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2) 59 %bc = bitcast <4 x i32> %res to <2 x i64> 60 ret <2 x i64> %bc 61} 62declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 63 64define <2 x i64> @test_mm_maccd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 65; X32-LABEL: test_mm_maccd_epi16: 66; X32: # BB#0: 67; X32-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 68; X32-NEXT: retl 69; 70; X64-LABEL: test_mm_maccd_epi16: 71; X64: # BB#0: 72; X64-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 73; X64-NEXT: retq 74 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 75 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 76 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 77 %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2) 78 %bc = bitcast <4 x i32> %res to <2 x i64> 79 ret <2 x i64> %bc 80} 81declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 82 83define <2 x i64> @test_mm_maccs_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 84; X32-LABEL: test_mm_maccs_epi32: 85; X32: # BB#0: 86; X32-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 87; X32-NEXT: retl 88; 89; X64-LABEL: test_mm_maccs_epi32: 90; X64: # BB#0: 91; X64-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 92; X64-NEXT: retq 93 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 94 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 95 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 96 %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2) 97 %bc = bitcast <4 x i32> %res to <2 x i64> 98 ret <2 x i64> %bc 99} 100declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 101 102define <2 x i64> @test_mm_macc_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 103; X32-LABEL: test_mm_macc_epi32: 104; X32: # BB#0: 105; X32-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 106; X32-NEXT: retl 107; 108; X64-LABEL: test_mm_macc_epi32: 109; X64: # BB#0: 110; X64-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 111; X64-NEXT: retq 112 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 113 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 114 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 115 %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2) 116 %bc = bitcast <4 x i32> %res to <2 x i64> 117 ret <2 x i64> %bc 118} 119declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 120 121define <2 x i64> @test_mm_maccslo_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 122; X32-LABEL: test_mm_maccslo_epi32: 123; X32: # BB#0: 124; X32-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 125; X32-NEXT: retl 126; 127; X64-LABEL: test_mm_maccslo_epi32: 128; X64: # BB#0: 129; X64-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 130; X64-NEXT: retq 131 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 132 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 133 %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2) 134 ret <2 x i64> %res 135} 136declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 137 138define <2 x i64> @test_mm_macclo_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 139; X32-LABEL: test_mm_macclo_epi32: 140; X32: # BB#0: 141; X32-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 142; X32-NEXT: retl 143; 144; X64-LABEL: test_mm_macclo_epi32: 145; X64: # BB#0: 146; X64-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 147; X64-NEXT: retq 148 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 149 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 150 %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2) 151 ret <2 x i64> %res 152} 153declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 154 155define <2 x i64> @test_mm_maccshi_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 156; X32-LABEL: test_mm_maccshi_epi32: 157; X32: # BB#0: 158; X32-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 159; X32-NEXT: retl 160; 161; X64-LABEL: test_mm_maccshi_epi32: 162; X64: # BB#0: 163; X64-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 164; X64-NEXT: retq 165 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 166 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 167 %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2) 168 ret <2 x i64> %res 169} 170declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 171 172define <2 x i64> @test_mm_macchi_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 173; X32-LABEL: test_mm_macchi_epi32: 174; X32: # BB#0: 175; X32-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 176; X32-NEXT: retl 177; 178; X64-LABEL: test_mm_macchi_epi32: 179; X64: # BB#0: 180; X64-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 181; X64-NEXT: retq 182 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 183 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 184 %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2) 185 ret <2 x i64> %res 186} 187declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 188 189define <2 x i64> @test_mm_maddsd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 190; X32-LABEL: test_mm_maddsd_epi16: 191; X32: # BB#0: 192; X32-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 193; X32-NEXT: retl 194; 195; X64-LABEL: test_mm_maddsd_epi16: 196; X64: # BB#0: 197; X64-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 198; X64-NEXT: retq 199 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 200 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 201 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 202 %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2) 203 %bc = bitcast <4 x i32> %res to <2 x i64> 204 ret <2 x i64> %bc 205} 206declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 207 208define <2 x i64> @test_mm_maddd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 209; X32-LABEL: test_mm_maddd_epi16: 210; X32: # BB#0: 211; X32-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 212; X32-NEXT: retl 213; 214; X64-LABEL: test_mm_maddd_epi16: 215; X64: # BB#0: 216; X64-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 217; X64-NEXT: retq 218 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 219 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 220 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 221 %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2) 222 %bc = bitcast <4 x i32> %res to <2 x i64> 223 ret <2 x i64> %bc 224} 225declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 226 227define <2 x i64> @test_mm_haddw_epi8(<2 x i64> %a0) { 228; X32-LABEL: test_mm_haddw_epi8: 229; X32: # BB#0: 230; X32-NEXT: vphaddbw %xmm0, %xmm0 231; X32-NEXT: retl 232; 233; X64-LABEL: test_mm_haddw_epi8: 234; X64: # BB#0: 235; X64-NEXT: vphaddbw %xmm0, %xmm0 236; X64-NEXT: retq 237 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 238 %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %arg0) 239 %bc = bitcast <8 x i16> %res to <2 x i64> 240 ret <2 x i64> %bc 241} 242declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone 243 244define <2 x i64> @test_mm_haddd_epi8(<2 x i64> %a0) { 245; X32-LABEL: test_mm_haddd_epi8: 246; X32: # BB#0: 247; X32-NEXT: vphaddbd %xmm0, %xmm0 248; X32-NEXT: retl 249; 250; X64-LABEL: test_mm_haddd_epi8: 251; X64: # BB#0: 252; X64-NEXT: vphaddbd %xmm0, %xmm0 253; X64-NEXT: retq 254 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 255 %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %arg0) 256 %bc = bitcast <4 x i32> %res to <2 x i64> 257 ret <2 x i64> %bc 258} 259declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone 260 261define <2 x i64> @test_mm_haddq_epi8(<2 x i64> %a0) { 262; X32-LABEL: test_mm_haddq_epi8: 263; X32: # BB#0: 264; X32-NEXT: vphaddbq %xmm0, %xmm0 265; X32-NEXT: retl 266; 267; X64-LABEL: test_mm_haddq_epi8: 268; X64: # BB#0: 269; X64-NEXT: vphaddbq %xmm0, %xmm0 270; X64-NEXT: retq 271 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 272 %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %arg0) 273 ret <2 x i64> %res 274} 275declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone 276 277define <2 x i64> @test_mm_haddd_epi16(<2 x i64> %a0) { 278; X32-LABEL: test_mm_haddd_epi16: 279; X32: # BB#0: 280; X32-NEXT: vphaddwd %xmm0, %xmm0 281; X32-NEXT: retl 282; 283; X64-LABEL: test_mm_haddd_epi16: 284; X64: # BB#0: 285; X64-NEXT: vphaddwd %xmm0, %xmm0 286; X64-NEXT: retq 287 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 288 %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %arg0) 289 %bc = bitcast <4 x i32> %res to <2 x i64> 290 ret <2 x i64> %bc 291} 292declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone 293 294define <2 x i64> @test_mm_haddq_epi16(<2 x i64> %a0) { 295; X32-LABEL: test_mm_haddq_epi16: 296; X32: # BB#0: 297; X32-NEXT: vphaddwq %xmm0, %xmm0 298; X32-NEXT: retl 299; 300; X64-LABEL: test_mm_haddq_epi16: 301; X64: # BB#0: 302; X64-NEXT: vphaddwq %xmm0, %xmm0 303; X64-NEXT: retq 304 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 305 %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %arg0) 306 ret <2 x i64> %res 307} 308declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone 309 310define <2 x i64> @test_mm_haddq_epi32(<2 x i64> %a0) { 311; X32-LABEL: test_mm_haddq_epi32: 312; X32: # BB#0: 313; X32-NEXT: vphadddq %xmm0, %xmm0 314; X32-NEXT: retl 315; 316; X64-LABEL: test_mm_haddq_epi32: 317; X64: # BB#0: 318; X64-NEXT: vphadddq %xmm0, %xmm0 319; X64-NEXT: retq 320 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 321 %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %arg0) 322 ret <2 x i64> %res 323} 324declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone 325 326define <2 x i64> @test_mm_haddw_epu8(<2 x i64> %a0) { 327; X32-LABEL: test_mm_haddw_epu8: 328; X32: # BB#0: 329; X32-NEXT: vphaddubw %xmm0, %xmm0 330; X32-NEXT: retl 331; 332; X64-LABEL: test_mm_haddw_epu8: 333; X64: # BB#0: 334; X64-NEXT: vphaddubw %xmm0, %xmm0 335; X64-NEXT: retq 336 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 337 %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %arg0) 338 %bc = bitcast <8 x i16> %res to <2 x i64> 339 ret <2 x i64> %bc 340} 341declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone 342 343define <2 x i64> @test_mm_haddd_epu8(<2 x i64> %a0) { 344; X32-LABEL: test_mm_haddd_epu8: 345; X32: # BB#0: 346; X32-NEXT: vphaddubd %xmm0, %xmm0 347; X32-NEXT: retl 348; 349; X64-LABEL: test_mm_haddd_epu8: 350; X64: # BB#0: 351; X64-NEXT: vphaddubd %xmm0, %xmm0 352; X64-NEXT: retq 353 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 354 %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %arg0) 355 %bc = bitcast <4 x i32> %res to <2 x i64> 356 ret <2 x i64> %bc 357} 358declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone 359 360define <2 x i64> @test_mm_haddq_epu8(<2 x i64> %a0) { 361; X32-LABEL: test_mm_haddq_epu8: 362; X32: # BB#0: 363; X32-NEXT: vphaddubq %xmm0, %xmm0 364; X32-NEXT: retl 365; 366; X64-LABEL: test_mm_haddq_epu8: 367; X64: # BB#0: 368; X64-NEXT: vphaddubq %xmm0, %xmm0 369; X64-NEXT: retq 370 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 371 %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %arg0) 372 ret <2 x i64> %res 373} 374declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone 375 376define <2 x i64> @test_mm_haddd_epu16(<2 x i64> %a0) { 377; X32-LABEL: test_mm_haddd_epu16: 378; X32: # BB#0: 379; X32-NEXT: vphadduwd %xmm0, %xmm0 380; X32-NEXT: retl 381; 382; X64-LABEL: test_mm_haddd_epu16: 383; X64: # BB#0: 384; X64-NEXT: vphadduwd %xmm0, %xmm0 385; X64-NEXT: retq 386 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 387 %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %arg0) 388 %bc = bitcast <4 x i32> %res to <2 x i64> 389 ret <2 x i64> %bc 390} 391declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone 392 393 394define <2 x i64> @test_mm_haddq_epu16(<2 x i64> %a0) { 395; X32-LABEL: test_mm_haddq_epu16: 396; X32: # BB#0: 397; X32-NEXT: vphadduwq %xmm0, %xmm0 398; X32-NEXT: retl 399; 400; X64-LABEL: test_mm_haddq_epu16: 401; X64: # BB#0: 402; X64-NEXT: vphadduwq %xmm0, %xmm0 403; X64-NEXT: retq 404 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 405 %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %arg0) 406 ret <2 x i64> %res 407} 408declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone 409 410define <2 x i64> @test_mm_haddq_epu32(<2 x i64> %a0) { 411; X32-LABEL: test_mm_haddq_epu32: 412; X32: # BB#0: 413; X32-NEXT: vphaddudq %xmm0, %xmm0 414; X32-NEXT: retl 415; 416; X64-LABEL: test_mm_haddq_epu32: 417; X64: # BB#0: 418; X64-NEXT: vphaddudq %xmm0, %xmm0 419; X64-NEXT: retq 420 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 421 %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %arg0) 422 ret <2 x i64> %res 423} 424declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone 425 426define <2 x i64> @test_mm_hsubw_epi8(<2 x i64> %a0) { 427; X32-LABEL: test_mm_hsubw_epi8: 428; X32: # BB#0: 429; X32-NEXT: vphsubbw %xmm0, %xmm0 430; X32-NEXT: retl 431; 432; X64-LABEL: test_mm_hsubw_epi8: 433; X64: # BB#0: 434; X64-NEXT: vphsubbw %xmm0, %xmm0 435; X64-NEXT: retq 436 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 437 %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %arg0) 438 %bc = bitcast <8 x i16> %res to <2 x i64> 439 ret <2 x i64> %bc 440} 441declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone 442 443define <2 x i64> @test_mm_hsubd_epi16(<2 x i64> %a0) { 444; X32-LABEL: test_mm_hsubd_epi16: 445; X32: # BB#0: 446; X32-NEXT: vphsubwd %xmm0, %xmm0 447; X32-NEXT: retl 448; 449; X64-LABEL: test_mm_hsubd_epi16: 450; X64: # BB#0: 451; X64-NEXT: vphsubwd %xmm0, %xmm0 452; X64-NEXT: retq 453 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 454 %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %arg0) 455 %bc = bitcast <4 x i32> %res to <2 x i64> 456 ret <2 x i64> %bc 457} 458declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone 459 460define <2 x i64> @test_mm_hsubq_epi32(<2 x i64> %a0) { 461; X32-LABEL: test_mm_hsubq_epi32: 462; X32: # BB#0: 463; X32-NEXT: vphsubdq %xmm0, %xmm0 464; X32-NEXT: retl 465; 466; X64-LABEL: test_mm_hsubq_epi32: 467; X64: # BB#0: 468; X64-NEXT: vphsubdq %xmm0, %xmm0 469; X64-NEXT: retq 470 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 471 %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %arg0) 472 ret <2 x i64> %res 473} 474declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone 475 476define <2 x i64> @test_mm_cmov_si128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 477; X32-LABEL: test_mm_cmov_si128: 478; X32: # BB#0: 479; X32-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 480; X32-NEXT: vpxor %xmm3, %xmm2, %xmm3 481; X32-NEXT: vpand %xmm2, %xmm0, %xmm0 482; X32-NEXT: vpand %xmm3, %xmm1, %xmm1 483; X32-NEXT: vpor %xmm1, %xmm0, %xmm0 484; X32-NEXT: retl 485; 486; X64-LABEL: test_mm_cmov_si128: 487; X64: # BB#0: 488; X64-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 489; X64-NEXT: vpxor %xmm3, %xmm2, %xmm3 490; X64-NEXT: vpand %xmm2, %xmm0, %xmm0 491; X64-NEXT: vpand %xmm3, %xmm1, %xmm1 492; X64-NEXT: vpor %xmm1, %xmm0, %xmm0 493; X64-NEXT: retq 494 %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) 495 ret <2 x i64> %res 496} 497declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone 498 499define <4 x i64> @test_mm256_cmov_si256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { 500; X32-LABEL: test_mm256_cmov_si256: 501; X32: # BB#0: 502; X32-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 503; X32-NEXT: retl 504; 505; X64-LABEL: test_mm256_cmov_si256: 506; X64: # BB#0: 507; X64-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 508; X64-NEXT: retq 509 %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) 510 ret <4 x i64> %res 511} 512declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone 513 514define <2 x i64> @test_mm_perm_epi8(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 515; X32-LABEL: test_mm_perm_epi8: 516; X32: # BB#0: 517; X32-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 518; X32-NEXT: retl 519; 520; X64-LABEL: test_mm_perm_epi8: 521; X64: # BB#0: 522; X64-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 523; X64-NEXT: retq 524 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 525 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 526 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 527 %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %arg0, <16 x i8> %arg1, <16 x i8> %arg2) 528 %bc = bitcast <16 x i8> %res to <2 x i64> 529 ret <2 x i64> %bc 530} 531declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 532 533define <2 x i64> @test_mm_rot_epi8(<2 x i64> %a0, <2 x i64> %a1) { 534; X32-LABEL: test_mm_rot_epi8: 535; X32: # BB#0: 536; X32-NEXT: vprotb %xmm1, %xmm0, %xmm0 537; X32-NEXT: retl 538; 539; X64-LABEL: test_mm_rot_epi8: 540; X64: # BB#0: 541; X64-NEXT: vprotb %xmm1, %xmm0, %xmm0 542; X64-NEXT: retq 543 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 544 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 545 %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %arg0, <16 x i8> %arg1) 546 %bc = bitcast <16 x i8> %res to <2 x i64> 547 ret <2 x i64> %bc 548} 549declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone 550 551define <2 x i64> @test_mm_rot_epi16(<2 x i64> %a0, <2 x i64> %a1) { 552; X32-LABEL: test_mm_rot_epi16: 553; X32: # BB#0: 554; X32-NEXT: vprotw %xmm1, %xmm0, %xmm0 555; X32-NEXT: retl 556; 557; X64-LABEL: test_mm_rot_epi16: 558; X64: # BB#0: 559; X64-NEXT: vprotw %xmm1, %xmm0, %xmm0 560; X64-NEXT: retq 561 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 562 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 563 %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %arg0, <8 x i16> %arg1) 564 %bc = bitcast <8 x i16> %res to <2 x i64> 565 ret <2 x i64> %bc 566} 567declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone 568 569define <2 x i64> @test_mm_rot_epi32(<2 x i64> %a0, <2 x i64> %a1) { 570; X32-LABEL: test_mm_rot_epi32: 571; X32: # BB#0: 572; X32-NEXT: vprotd %xmm1, %xmm0, %xmm0 573; X32-NEXT: retl 574; 575; X64-LABEL: test_mm_rot_epi32: 576; X64: # BB#0: 577; X64-NEXT: vprotd %xmm1, %xmm0, %xmm0 578; X64-NEXT: retq 579 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 580 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 581 %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %arg0, <4 x i32> %arg1) 582 %bc = bitcast <4 x i32> %res to <2 x i64> 583 ret <2 x i64> %bc 584} 585declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone 586 587define <2 x i64> @test_mm_rot_epi64(<2 x i64> %a0, <2 x i64> %a1) { 588; X32-LABEL: test_mm_rot_epi64: 589; X32: # BB#0: 590; X32-NEXT: vprotq %xmm1, %xmm0, %xmm0 591; X32-NEXT: retl 592; 593; X64-LABEL: test_mm_rot_epi64: 594; X64: # BB#0: 595; X64-NEXT: vprotq %xmm1, %xmm0, %xmm0 596; X64-NEXT: retq 597 %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) 598 ret <2 x i64> %res 599} 600declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone 601 602define <2 x i64> @test_mm_roti_epi8(<2 x i64> %a0) { 603; X32-LABEL: test_mm_roti_epi8: 604; X32: # BB#0: 605; X32-NEXT: vprotb $1, %xmm0, %xmm0 606; X32-NEXT: retl 607; 608; X64-LABEL: test_mm_roti_epi8: 609; X64: # BB#0: 610; X64-NEXT: vprotb $1, %xmm0, %xmm0 611; X64-NEXT: retq 612 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 613 %res = call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %arg0, i8 1) 614 %bc = bitcast <16 x i8> %res to <2 x i64> 615 ret <2 x i64> %bc 616} 617declare <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8>, i8) nounwind readnone 618 619define <2 x i64> @test_mm_roti_epi16(<2 x i64> %a0) { 620; X32-LABEL: test_mm_roti_epi16: 621; X32: # BB#0: 622; X32-NEXT: vprotw $50, %xmm0, %xmm0 623; X32-NEXT: retl 624; 625; X64-LABEL: test_mm_roti_epi16: 626; X64: # BB#0: 627; X64-NEXT: vprotw $50, %xmm0, %xmm0 628; X64-NEXT: retq 629 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 630 %res = call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %arg0, i8 50) 631 %bc = bitcast <8 x i16> %res to <2 x i64> 632 ret <2 x i64> %bc 633} 634declare <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16>, i8) nounwind readnone 635 636define <2 x i64> @test_mm_roti_epi32(<2 x i64> %a0) { 637; X32-LABEL: test_mm_roti_epi32: 638; X32: # BB#0: 639; X32-NEXT: vprotd $226, %xmm0, %xmm0 640; X32-NEXT: retl 641; 642; X64-LABEL: test_mm_roti_epi32: 643; X64: # BB#0: 644; X64-NEXT: vprotd $226, %xmm0, %xmm0 645; X64-NEXT: retq 646 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 647 %res = call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %arg0, i8 -30) 648 %bc = bitcast <4 x i32> %res to <2 x i64> 649 ret <2 x i64> %bc 650} 651declare <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32>, i8) nounwind readnone 652 653define <2 x i64> @test_mm_roti_epi64(<2 x i64> %a0) { 654; X32-LABEL: test_mm_roti_epi64: 655; X32: # BB#0: 656; X32-NEXT: vprotq $100, %xmm0, %xmm0 657; X32-NEXT: retl 658; 659; X64-LABEL: test_mm_roti_epi64: 660; X64: # BB#0: 661; X64-NEXT: vprotq $100, %xmm0, %xmm0 662; X64-NEXT: retq 663 %res = call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %a0, i8 100) 664 ret <2 x i64> %res 665} 666declare <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64>, i8) nounwind readnone 667 668define <2 x i64> @test_mm_shl_epi8(<2 x i64> %a0, <2 x i64> %a1) { 669; X32-LABEL: test_mm_shl_epi8: 670; X32: # BB#0: 671; X32-NEXT: vpshlb %xmm1, %xmm0, %xmm0 672; X32-NEXT: retl 673; 674; X64-LABEL: test_mm_shl_epi8: 675; X64: # BB#0: 676; X64-NEXT: vpshlb %xmm1, %xmm0, %xmm0 677; X64-NEXT: retq 678 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 679 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 680 %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %arg0, <16 x i8> %arg1) 681 %bc = bitcast <16 x i8> %res to <2 x i64> 682 ret <2 x i64> %bc 683} 684declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone 685 686define <2 x i64> @test_mm_shl_epi16(<2 x i64> %a0, <2 x i64> %a1) { 687; X32-LABEL: test_mm_shl_epi16: 688; X32: # BB#0: 689; X32-NEXT: vpshlw %xmm1, %xmm0, %xmm0 690; X32-NEXT: retl 691; 692; X64-LABEL: test_mm_shl_epi16: 693; X64: # BB#0: 694; X64-NEXT: vpshlw %xmm1, %xmm0, %xmm0 695; X64-NEXT: retq 696 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 697 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 698 %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %arg0, <8 x i16> %arg1) 699 %bc = bitcast <8 x i16> %res to <2 x i64> 700 ret <2 x i64> %bc 701} 702declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone 703 704define <2 x i64> @test_mm_shl_epi32(<2 x i64> %a0, <2 x i64> %a1) { 705; X32-LABEL: test_mm_shl_epi32: 706; X32: # BB#0: 707; X32-NEXT: vpshld %xmm1, %xmm0, %xmm0 708; X32-NEXT: retl 709; 710; X64-LABEL: test_mm_shl_epi32: 711; X64: # BB#0: 712; X64-NEXT: vpshld %xmm1, %xmm0, %xmm0 713; X64-NEXT: retq 714 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 715 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 716 %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %arg0, <4 x i32> %arg1) 717 %bc = bitcast <4 x i32> %res to <2 x i64> 718 ret <2 x i64> %bc 719} 720declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone 721 722define <2 x i64> @test_mm_shl_epi64(<2 x i64> %a0, <2 x i64> %a1) { 723; X32-LABEL: test_mm_shl_epi64: 724; X32: # BB#0: 725; X32-NEXT: vpshlq %xmm1, %xmm0, %xmm0 726; X32-NEXT: retl 727; 728; X64-LABEL: test_mm_shl_epi64: 729; X64: # BB#0: 730; X64-NEXT: vpshlq %xmm1, %xmm0, %xmm0 731; X64-NEXT: retq 732 %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) 733 ret <2 x i64> %res 734} 735declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone 736 737define <2 x i64> @test_mm_sha_epi8(<2 x i64> %a0, <2 x i64> %a1) { 738; X32-LABEL: test_mm_sha_epi8: 739; X32: # BB#0: 740; X32-NEXT: vpshab %xmm1, %xmm0, %xmm0 741; X32-NEXT: retl 742; 743; X64-LABEL: test_mm_sha_epi8: 744; X64: # BB#0: 745; X64-NEXT: vpshab %xmm1, %xmm0, %xmm0 746; X64-NEXT: retq 747 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 748 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 749 %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %arg0, <16 x i8> %arg1) 750 %bc = bitcast <16 x i8> %res to <2 x i64> 751 ret <2 x i64> %bc 752} 753declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone 754 755define <2 x i64> @test_mm_sha_epi16(<2 x i64> %a0, <2 x i64> %a1) { 756; X32-LABEL: test_mm_sha_epi16: 757; X32: # BB#0: 758; X32-NEXT: vpshaw %xmm1, %xmm0, %xmm0 759; X32-NEXT: retl 760; 761; X64-LABEL: test_mm_sha_epi16: 762; X64: # BB#0: 763; X64-NEXT: vpshaw %xmm1, %xmm0, %xmm0 764; X64-NEXT: retq 765 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 766 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 767 %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %arg0, <8 x i16> %arg1) 768 %bc = bitcast <8 x i16> %res to <2 x i64> 769 ret <2 x i64> %bc 770} 771declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone 772 773define <2 x i64> @test_mm_sha_epi32(<2 x i64> %a0, <2 x i64> %a1) { 774; X32-LABEL: test_mm_sha_epi32: 775; X32: # BB#0: 776; X32-NEXT: vpshad %xmm1, %xmm0, %xmm0 777; X32-NEXT: retl 778; 779; X64-LABEL: test_mm_sha_epi32: 780; X64: # BB#0: 781; X64-NEXT: vpshad %xmm1, %xmm0, %xmm0 782; X64-NEXT: retq 783 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 784 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 785 %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %arg0, <4 x i32> %arg1) 786 %bc = bitcast <4 x i32> %res to <2 x i64> 787 ret <2 x i64> %bc 788} 789declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone 790 791define <2 x i64> @test_mm_sha_epi64(<2 x i64> %a0, <2 x i64> %a1) { 792; X32-LABEL: test_mm_sha_epi64: 793; X32: # BB#0: 794; X32-NEXT: vpshaq %xmm1, %xmm0, %xmm0 795; X32-NEXT: retl 796; 797; X64-LABEL: test_mm_sha_epi64: 798; X64: # BB#0: 799; X64-NEXT: vpshaq %xmm1, %xmm0, %xmm0 800; X64-NEXT: retq 801 %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) 802 ret <2 x i64> %res 803} 804declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone 805 806define <2 x i64> @test_mm_com_epu8(<2 x i64> %a0, <2 x i64> %a1) { 807; X32-LABEL: test_mm_com_epu8: 808; X32: # BB#0: 809; X32-NEXT: vpcomltub %xmm1, %xmm0, %xmm0 810; X32-NEXT: retl 811; 812; X64-LABEL: test_mm_com_epu8: 813; X64: # BB#0: 814; X64-NEXT: vpcomltub %xmm1, %xmm0, %xmm0 815; X64-NEXT: retq 816 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 817 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 818 %res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %arg0, <16 x i8> %arg1, i8 0) 819 %bc = bitcast <16 x i8> %res to <2 x i64> 820 ret <2 x i64> %bc 821} 822declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone 823 824define <2 x i64> @test_mm_com_epu16(<2 x i64> %a0, <2 x i64> %a1) { 825; X32-LABEL: test_mm_com_epu16: 826; X32: # BB#0: 827; X32-NEXT: vpcomltuw %xmm1, %xmm0, %xmm0 828; X32-NEXT: retl 829; 830; X64-LABEL: test_mm_com_epu16: 831; X64: # BB#0: 832; X64-NEXT: vpcomltuw %xmm1, %xmm0, %xmm0 833; X64-NEXT: retq 834 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 835 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 836 %res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %arg0, <8 x i16> %arg1, i8 0) 837 %bc = bitcast <8 x i16> %res to <2 x i64> 838 ret <2 x i64> %bc 839} 840declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone 841 842define <2 x i64> @test_mm_com_epu32(<2 x i64> %a0, <2 x i64> %a1) { 843; X32-LABEL: test_mm_com_epu32: 844; X32: # BB#0: 845; X32-NEXT: vpcomltud %xmm1, %xmm0, %xmm0 846; X32-NEXT: retl 847; 848; X64-LABEL: test_mm_com_epu32: 849; X64: # BB#0: 850; X64-NEXT: vpcomltud %xmm1, %xmm0, %xmm0 851; X64-NEXT: retq 852 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 853 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 854 %res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %arg0, <4 x i32> %arg1, i8 0) 855 %bc = bitcast <4 x i32> %res to <2 x i64> 856 ret <2 x i64> %bc 857} 858declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone 859 860define <2 x i64> @test_mm_com_epu64(<2 x i64> %a0, <2 x i64> %a1) { 861; X32-LABEL: test_mm_com_epu64: 862; X32: # BB#0: 863; X32-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0 864; X32-NEXT: retl 865; 866; X64-LABEL: test_mm_com_epu64: 867; X64: # BB#0: 868; X64-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0 869; X64-NEXT: retq 870 %res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) 871 ret <2 x i64> %res 872} 873declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone 874 875define <2 x i64> @test_mm_com_epi8(<2 x i64> %a0, <2 x i64> %a1) { 876; X32-LABEL: test_mm_com_epi8: 877; X32: # BB#0: 878; X32-NEXT: vpcomltb %xmm1, %xmm0, %xmm0 879; X32-NEXT: retl 880; 881; X64-LABEL: test_mm_com_epi8: 882; X64: # BB#0: 883; X64-NEXT: vpcomltb %xmm1, %xmm0, %xmm0 884; X64-NEXT: retq 885 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 886 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 887 %res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %arg0, <16 x i8> %arg1, i8 0) 888 %bc = bitcast <16 x i8> %res to <2 x i64> 889 ret <2 x i64> %bc 890} 891declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone 892 893define <2 x i64> @test_mm_com_epi16(<2 x i64> %a0, <2 x i64> %a1) { 894; X32-LABEL: test_mm_com_epi16: 895; X32: # BB#0: 896; X32-NEXT: vpcomltw %xmm1, %xmm0, %xmm0 897; X32-NEXT: retl 898; 899; X64-LABEL: test_mm_com_epi16: 900; X64: # BB#0: 901; X64-NEXT: vpcomltw %xmm1, %xmm0, %xmm0 902; X64-NEXT: retq 903 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 904 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 905 %res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %arg0, <8 x i16> %arg1, i8 0) 906 %bc = bitcast <8 x i16> %res to <2 x i64> 907 ret <2 x i64> %bc 908} 909declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone 910 911define <2 x i64> @test_mm_com_epi32(<2 x i64> %a0, <2 x i64> %a1) { 912; X32-LABEL: test_mm_com_epi32: 913; X32: # BB#0: 914; X32-NEXT: vpcomltd %xmm1, %xmm0, %xmm0 915; X32-NEXT: retl 916; 917; X64-LABEL: test_mm_com_epi32: 918; X64: # BB#0: 919; X64-NEXT: vpcomltd %xmm1, %xmm0, %xmm0 920; X64-NEXT: retq 921 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 922 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 923 %res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %arg0, <4 x i32> %arg1, i8 0) 924 %bc = bitcast <4 x i32> %res to <2 x i64> 925 ret <2 x i64> %bc 926} 927declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone 928 929define <2 x i64> @test_mm_com_epi64(<2 x i64> %a0, <2 x i64> %a1) { 930; X32-LABEL: test_mm_com_epi64: 931; X32: # BB#0: 932; X32-NEXT: vpcomltq %xmm1, %xmm0, %xmm0 933; X32-NEXT: retl 934; 935; X64-LABEL: test_mm_com_epi64: 936; X64: # BB#0: 937; X64-NEXT: vpcomltq %xmm1, %xmm0, %xmm0 938; X64-NEXT: retq 939 %res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) 940 ret <2 x i64> %res 941} 942declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone 943 944define <2 x double> @test_mm_permute2_pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) { 945; X32-LABEL: test_mm_permute2_pd: 946; X32: # BB#0: 947; X32-NEXT: vpermil2pd $0, %xmm2, %xmm1, %xmm0, %xmm0 948; X32-NEXT: retl 949; 950; X64-LABEL: test_mm_permute2_pd: 951; X64: # BB#0: 952; X64-NEXT: vpermil2pd $0, %xmm2, %xmm1, %xmm0, %xmm0 953; X64-NEXT: retq 954 %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2, i8 0) 955 ret <2 x double> %res 956} 957declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone 958 959define <4 x double> @test_mm256_permute2_pd(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2) { 960; X32-LABEL: test_mm256_permute2_pd: 961; X32: # BB#0: 962; X32-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0 963; X32-NEXT: retl 964; 965; X64-LABEL: test_mm256_permute2_pd: 966; X64: # BB#0: 967; X64-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0 968; X64-NEXT: retq 969 %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2, i8 0) 970 ret <4 x double> %res 971} 972declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone 973 974define <4 x float> @test_mm_permute2_ps(<4 x float> %a0, <4 x float> %a1, <2 x i64> %a2) { 975; X32-LABEL: test_mm_permute2_ps: 976; X32: # BB#0: 977; X32-NEXT: vpermil2ps $0, %xmm2, %xmm1, %xmm0, %xmm0 978; X32-NEXT: retl 979; 980; X64-LABEL: test_mm_permute2_ps: 981; X64: # BB#0: 982; X64-NEXT: vpermil2ps $0, %xmm2, %xmm1, %xmm0, %xmm0 983; X64-NEXT: retq 984 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 985 %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %arg2, i8 0) 986 ret <4 x float> %res 987} 988declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone 989 990define <8 x float> @test_mm256_permute2_ps(<8 x float> %a0, <8 x float> %a1, <4 x i64> %a2) { 991; X32-LABEL: test_mm256_permute2_ps: 992; X32: # BB#0: 993; X32-NEXT: vpermil2ps $0, %ymm2, %ymm1, %ymm0, %ymm0 994; X32-NEXT: retl 995; 996; X64-LABEL: test_mm256_permute2_ps: 997; X64: # BB#0: 998; X64-NEXT: vpermil2ps $0, %ymm2, %ymm1, %ymm0, %ymm0 999; X64-NEXT: retq 1000 %arg2 = bitcast <4 x i64> %a2 to <8 x i32> 1001 %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %arg2, i8 0) 1002 ret <8 x float> %res 1003} 1004declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone 1005 1006define <4 x float> @test_mm_frcz_ss(<4 x float> %a0) { 1007; X32-LABEL: test_mm_frcz_ss: 1008; X32: # BB#0: 1009; X32-NEXT: vfrczss %xmm0, %xmm0 1010; X32-NEXT: retl 1011; 1012; X64-LABEL: test_mm_frcz_ss: 1013; X64: # BB#0: 1014; X64-NEXT: vfrczss %xmm0, %xmm0 1015; X64-NEXT: retq 1016 %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0) 1017 ret <4 x float> %res 1018} 1019declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone 1020 1021define <2 x double> @test_mm_frcz_sd(<2 x double> %a0) { 1022; X32-LABEL: test_mm_frcz_sd: 1023; X32: # BB#0: 1024; X32-NEXT: vfrczsd %xmm0, %xmm0 1025; X32-NEXT: retl 1026; 1027; X64-LABEL: test_mm_frcz_sd: 1028; X64: # BB#0: 1029; X64-NEXT: vfrczsd %xmm0, %xmm0 1030; X64-NEXT: retq 1031 %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0) 1032 ret <2 x double> %res 1033} 1034declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone 1035 1036define <4 x float> @test_mm_frcz_ps(<4 x float> %a0) { 1037; X32-LABEL: test_mm_frcz_ps: 1038; X32: # BB#0: 1039; X32-NEXT: vfrczps %xmm0, %xmm0 1040; X32-NEXT: retl 1041; 1042; X64-LABEL: test_mm_frcz_ps: 1043; X64: # BB#0: 1044; X64-NEXT: vfrczps %xmm0, %xmm0 1045; X64-NEXT: retq 1046 %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) 1047 ret <4 x float> %res 1048} 1049declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone 1050 1051define <2 x double> @test_mm_frcz_pd(<2 x double> %a0) { 1052; X32-LABEL: test_mm_frcz_pd: 1053; X32: # BB#0: 1054; X32-NEXT: vfrczpd %xmm0, %xmm0 1055; X32-NEXT: retl 1056; 1057; X64-LABEL: test_mm_frcz_pd: 1058; X64: # BB#0: 1059; X64-NEXT: vfrczpd %xmm0, %xmm0 1060; X64-NEXT: retq 1061 %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) 1062 ret <2 x double> %res 1063} 1064declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone 1065 1066define <8 x float> @test_mm256_frcz_ps(<8 x float> %a0) { 1067; X32-LABEL: test_mm256_frcz_ps: 1068; X32: # BB#0: 1069; X32-NEXT: vfrczps %ymm0, %ymm0 1070; X32-NEXT: retl 1071; 1072; X64-LABEL: test_mm256_frcz_ps: 1073; X64: # BB#0: 1074; X64-NEXT: vfrczps %ymm0, %ymm0 1075; X64-NEXT: retq 1076 %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) 1077 ret <8 x float> %res 1078} 1079declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone 1080 1081define <4 x double> @test_mm256_frcz_pd(<4 x double> %a0) { 1082; X32-LABEL: test_mm256_frcz_pd: 1083; X32: # BB#0: 1084; X32-NEXT: vfrczpd %ymm0, %ymm0 1085; X32-NEXT: retl 1086; 1087; X64-LABEL: test_mm256_frcz_pd: 1088; X64: # BB#0: 1089; X64-NEXT: vfrczpd %ymm0, %ymm0 1090; X64-NEXT: retq 1091 %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) 1092 ret <4 x double> %res 1093} 1094declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112