1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX 4; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefix=AVX 5; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE 6; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX 7; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefix=AVX 8 9; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/ssse3-builtins.c 10 11define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) { 12; SSE-LABEL: test_mm_abs_epi8: 13; SSE: # %bb.0: 14; SSE-NEXT: pabsb %xmm0, %xmm0 15; SSE-NEXT: ret{{[l|q]}} 16; 17; AVX-LABEL: test_mm_abs_epi8: 18; AVX: # %bb.0: 19; AVX-NEXT: vpabsb %xmm0, %xmm0 20; AVX-NEXT: ret{{[l|q]}} 21 %arg = bitcast <2 x i64> %a0 to <16 x i8> 22 %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %arg, i1 false) 23 %res = bitcast <16 x i8> %abs to <2 x i64> 24 ret <2 x i64> %res 25} 26declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) nounwind readnone 27 28define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) { 29; SSE-LABEL: test_mm_abs_epi16: 30; SSE: # %bb.0: 31; SSE-NEXT: pabsw %xmm0, %xmm0 32; SSE-NEXT: ret{{[l|q]}} 33; 34; AVX-LABEL: test_mm_abs_epi16: 35; AVX: # %bb.0: 36; AVX-NEXT: vpabsw %xmm0, %xmm0 37; AVX-NEXT: ret{{[l|q]}} 38 %arg = bitcast <2 x i64> %a0 to <8 x i16> 39 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %arg, i1 false) 40 %res = bitcast <8 x i16> %abs to <2 x i64> 41 ret <2 x i64> %res 42} 43declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) nounwind readnone 44 45define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) { 46; SSE-LABEL: test_mm_abs_epi32: 47; SSE: # %bb.0: 48; SSE-NEXT: pabsd %xmm0, %xmm0 49; SSE-NEXT: ret{{[l|q]}} 50; 51; AVX-LABEL: test_mm_abs_epi32: 52; AVX: # %bb.0: 53; AVX-NEXT: vpabsd %xmm0, %xmm0 54; AVX-NEXT: ret{{[l|q]}} 55 %arg = bitcast <2 x i64> %a0 to <4 x i32> 56 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %arg, i1 false) 57 %res = bitcast <4 x i32> %abs to <2 x i64> 58 ret <2 x i64> %res 59} 60declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) nounwind readnone 61 62define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) { 63; SSE-LABEL: test_mm_alignr_epi8: 64; SSE: # %bb.0: 65; SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] 66; SSE-NEXT: movdqa %xmm1, %xmm0 67; SSE-NEXT: ret{{[l|q]}} 68; 69; AVX-LABEL: test_mm_alignr_epi8: 70; AVX: # %bb.0: 71; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] 72; AVX-NEXT: ret{{[l|q]}} 73 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 74 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 75 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17> 76 %res = bitcast <16 x i8> %shuf to <2 x i64> 77 ret <2 x i64> %res 78} 79 80define <2 x i64> @test2_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) { 81; SSE-LABEL: test2_mm_alignr_epi8: 82; SSE: # %bb.0: 83; SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 84; SSE-NEXT: movdqa %xmm1, %xmm0 85; SSE-NEXT: ret{{[l|q]}} 86; 87; AVX-LABEL: test2_mm_alignr_epi8: 88; AVX: # %bb.0: 89; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 90; AVX-NEXT: ret{{[l|q]}} 91 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 92 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 93 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> 94 %res = bitcast <16 x i8> %shuf to <2 x i64> 95 ret <2 x i64> %res 96} 97 98define <2 x i64> @test_mm_hadd_epi16(<2 x i64> %a0, <2 x i64> %a1) { 99; SSE-LABEL: test_mm_hadd_epi16: 100; SSE: # %bb.0: 101; SSE-NEXT: phaddw %xmm1, %xmm0 102; SSE-NEXT: ret{{[l|q]}} 103; 104; AVX-LABEL: test_mm_hadd_epi16: 105; AVX: # %bb.0: 106; AVX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 107; AVX-NEXT: ret{{[l|q]}} 108 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 109 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 110 %call = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %arg0, <8 x i16> %arg1) 111 %res = bitcast <8 x i16> %call to <2 x i64> 112 ret <2 x i64> %res 113} 114declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone 115 116define <2 x i64> @test_mm_hadd_epi32(<2 x i64> %a0, <2 x i64> %a1) { 117; SSE-LABEL: test_mm_hadd_epi32: 118; SSE: # %bb.0: 119; SSE-NEXT: phaddd %xmm1, %xmm0 120; SSE-NEXT: ret{{[l|q]}} 121; 122; AVX-LABEL: test_mm_hadd_epi32: 123; AVX: # %bb.0: 124; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 125; AVX-NEXT: ret{{[l|q]}} 126 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 127 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 128 %call = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %arg0, <4 x i32> %arg1) 129 %res = bitcast <4 x i32> %call to <2 x i64> 130 ret <2 x i64> %res 131} 132declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone 133 134define <2 x i64> @test_mm_hadds_epi16(<2 x i64> %a0, <2 x i64> %a1) { 135; SSE-LABEL: test_mm_hadds_epi16: 136; SSE: # %bb.0: 137; SSE-NEXT: phaddsw %xmm1, %xmm0 138; SSE-NEXT: ret{{[l|q]}} 139; 140; AVX-LABEL: test_mm_hadds_epi16: 141; AVX: # %bb.0: 142; AVX-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 143; AVX-NEXT: ret{{[l|q]}} 144 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 145 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 146 %call = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %arg0, <8 x i16> %arg1) 147 %res = bitcast <8 x i16> %call to <2 x i64> 148 ret <2 x i64> %res 149} 150declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 151 152define <2 x i64> @test_mm_hsub_epi16(<2 x i64> %a0, <2 x i64> %a1) { 153; SSE-LABEL: test_mm_hsub_epi16: 154; SSE: # %bb.0: 155; SSE-NEXT: phsubw %xmm1, %xmm0 156; SSE-NEXT: ret{{[l|q]}} 157; 158; AVX-LABEL: test_mm_hsub_epi16: 159; AVX: # %bb.0: 160; AVX-NEXT: vphsubw %xmm1, %xmm0, %xmm0 161; AVX-NEXT: ret{{[l|q]}} 162 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 163 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 164 %call = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %arg0, <8 x i16> %arg1) 165 %res = bitcast <8 x i16> %call to <2 x i64> 166 ret <2 x i64> %res 167} 168declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone 169 170define <2 x i64> @test_mm_hsub_epi32(<2 x i64> %a0, <2 x i64> %a1) { 171; SSE-LABEL: test_mm_hsub_epi32: 172; SSE: # %bb.0: 173; SSE-NEXT: phsubd %xmm1, %xmm0 174; SSE-NEXT: ret{{[l|q]}} 175; 176; AVX-LABEL: test_mm_hsub_epi32: 177; AVX: # %bb.0: 178; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 179; AVX-NEXT: ret{{[l|q]}} 180 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 181 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 182 %call = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %arg0, <4 x i32> %arg1) 183 %res = bitcast <4 x i32> %call to <2 x i64> 184 ret <2 x i64> %res 185} 186declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone 187 188define <2 x i64> @test_mm_hsubs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 189; SSE-LABEL: test_mm_hsubs_epi16: 190; SSE: # %bb.0: 191; SSE-NEXT: phsubsw %xmm1, %xmm0 192; SSE-NEXT: ret{{[l|q]}} 193; 194; AVX-LABEL: test_mm_hsubs_epi16: 195; AVX: # %bb.0: 196; AVX-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 197; AVX-NEXT: ret{{[l|q]}} 198 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 199 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 200 %call = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %arg0, <8 x i16> %arg1) 201 %res = bitcast <8 x i16> %call to <2 x i64> 202 ret <2 x i64> %res 203} 204declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 205 206define <2 x i64> @test_mm_maddubs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 207; SSE-LABEL: test_mm_maddubs_epi16: 208; SSE: # %bb.0: 209; SSE-NEXT: pmaddubsw %xmm1, %xmm0 210; SSE-NEXT: ret{{[l|q]}} 211; 212; AVX-LABEL: test_mm_maddubs_epi16: 213; AVX: # %bb.0: 214; AVX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 215; AVX-NEXT: ret{{[l|q]}} 216 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 217 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 218 %call = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %arg0, <16 x i8> %arg1) 219 %res = bitcast <8 x i16> %call to <2 x i64> 220 ret <2 x i64> %res 221} 222declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone 223 224define <2 x i64> @test_mm_mulhrs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 225; SSE-LABEL: test_mm_mulhrs_epi16: 226; SSE: # %bb.0: 227; SSE-NEXT: pmulhrsw %xmm1, %xmm0 228; SSE-NEXT: ret{{[l|q]}} 229; 230; AVX-LABEL: test_mm_mulhrs_epi16: 231; AVX: # %bb.0: 232; AVX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 233; AVX-NEXT: ret{{[l|q]}} 234 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 235 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 236 %call = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %arg0, <8 x i16> %arg1) 237 %res = bitcast <8 x i16> %call to <2 x i64> 238 ret <2 x i64> %res 239} 240declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 241 242define <2 x i64> @test_mm_shuffle_epi8(<2 x i64> %a0, <2 x i64> %a1) { 243; SSE-LABEL: test_mm_shuffle_epi8: 244; SSE: # %bb.0: 245; SSE-NEXT: pshufb %xmm1, %xmm0 246; SSE-NEXT: ret{{[l|q]}} 247; 248; AVX-LABEL: test_mm_shuffle_epi8: 249; AVX: # %bb.0: 250; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 251; AVX-NEXT: ret{{[l|q]}} 252 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 253 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 254 %call = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %arg0, <16 x i8> %arg1) 255 %res = bitcast <16 x i8> %call to <2 x i64> 256 ret <2 x i64> %res 257} 258declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone 259 260define <2 x i64> @test_mm_sign_epi8(<2 x i64> %a0, <2 x i64> %a1) { 261; SSE-LABEL: test_mm_sign_epi8: 262; SSE: # %bb.0: 263; SSE-NEXT: psignb %xmm1, %xmm0 264; SSE-NEXT: ret{{[l|q]}} 265; 266; AVX-LABEL: test_mm_sign_epi8: 267; AVX: # %bb.0: 268; AVX-NEXT: vpsignb %xmm1, %xmm0, %xmm0 269; AVX-NEXT: ret{{[l|q]}} 270 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 271 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 272 %call = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %arg0, <16 x i8> %arg1) 273 %res = bitcast <16 x i8> %call to <2 x i64> 274 ret <2 x i64> %res 275} 276declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone 277 278define <2 x i64> @test_mm_sign_epi16(<2 x i64> %a0, <2 x i64> %a1) { 279; SSE-LABEL: test_mm_sign_epi16: 280; SSE: # %bb.0: 281; SSE-NEXT: psignw %xmm1, %xmm0 282; SSE-NEXT: ret{{[l|q]}} 283; 284; AVX-LABEL: test_mm_sign_epi16: 285; AVX: # %bb.0: 286; AVX-NEXT: vpsignw %xmm1, %xmm0, %xmm0 287; AVX-NEXT: ret{{[l|q]}} 288 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 289 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 290 %call = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %arg0, <8 x i16> %arg1) 291 %res = bitcast <8 x i16> %call to <2 x i64> 292 ret <2 x i64> %res 293} 294declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone 295 296define <2 x i64> @test_mm_sign_epi32(<2 x i64> %a0, <2 x i64> %a1) { 297; SSE-LABEL: test_mm_sign_epi32: 298; SSE: # %bb.0: 299; SSE-NEXT: psignd %xmm1, %xmm0 300; SSE-NEXT: ret{{[l|q]}} 301; 302; AVX-LABEL: test_mm_sign_epi32: 303; AVX: # %bb.0: 304; AVX-NEXT: vpsignd %xmm1, %xmm0, %xmm0 305; AVX-NEXT: ret{{[l|q]}} 306 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 307 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 308 %call = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %arg0, <4 x i32> %arg1) 309 %res = bitcast <4 x i32> %call to <2 x i64> 310 ret <2 x i64> %res 311} 312declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone 313