1; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=X32 2; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=X64 3 4; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/ssse3-builtins.c 5 6define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) { 7; X32-LABEL: test_mm_abs_epi8: 8; X32: # BB#0: 9; X32-NEXT: pabsb %xmm0, %xmm0 10; X32-NEXT: retl 11; 12; X64-LABEL: test_mm_abs_epi8: 13; X64: # BB#0: 14; X64-NEXT: pabsb %xmm0, %xmm0 15; X64-NEXT: retq 16 %arg = bitcast <2 x i64> %a0 to <16 x i8> 17 %call = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %arg) 18 %res = bitcast <16 x i8> %call to <2 x i64> 19 ret <2 x i64> %res 20} 21declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone 22 23define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) { 24; X32-LABEL: test_mm_abs_epi16: 25; X32: # BB#0: 26; X32-NEXT: pabsw %xmm0, %xmm0 27; X32-NEXT: retl 28; 29; X64-LABEL: test_mm_abs_epi16: 30; X64: # BB#0: 31; X64-NEXT: pabsw %xmm0, %xmm0 32; X64-NEXT: retq 33 %arg = bitcast <2 x i64> %a0 to <8 x i16> 34 %call = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %arg) 35 %res = bitcast <8 x i16> %call to <2 x i64> 36 ret <2 x i64> %res 37} 38declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone 39 40define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) { 41; X32-LABEL: test_mm_abs_epi32: 42; X32: # BB#0: 43; X32-NEXT: pabsd %xmm0, %xmm0 44; X32-NEXT: retl 45; 46; X64-LABEL: test_mm_abs_epi32: 47; X64: # BB#0: 48; X64-NEXT: pabsd %xmm0, %xmm0 49; X64-NEXT: retq 50 %arg = bitcast <2 x i64> %a0 to <4 x i32> 51 %call = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %arg) 52 %res = bitcast <4 x i32> %call to <2 x i64> 53 ret <2 x i64> %res 54} 55declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone 56 57define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) { 58; X32-LABEL: test_mm_alignr_epi8: 59; X32: # BB#0: 60; X32-NEXT: palignr {{.*#}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] 61; X32-NEXT: movdqa %xmm1, %xmm0 62; X32-NEXT: retl 63; 64; X64-LABEL: test_mm_alignr_epi8: 65; X64: # BB#0: 66; X64-NEXT: palignr {{.*#}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] 67; X64-NEXT: movdqa %xmm1, %xmm0 68; X64-NEXT: retq 69 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 70 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 71 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17> 72 %res = bitcast <16 x i8> %shuf to <2 x i64> 73 ret <2 x i64> %res 74} 75 76define <2 x i64> @test_mm_hadd_epi16(<2 x i64> %a0, <2 x i64> %a1) { 77; X32-LABEL: test_mm_hadd_epi16: 78; X32: # BB#0: 79; X32-NEXT: phaddw %xmm1, %xmm0 80; X32-NEXT: retl 81; 82; X64-LABEL: test_mm_hadd_epi16: 83; X64: # BB#0: 84; X64-NEXT: phaddw %xmm1, %xmm0 85; X64-NEXT: retq 86 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 87 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 88 %call = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %arg0, <8 x i16> %arg1) 89 %res = bitcast <8 x i16> %call to <2 x i64> 90 ret <2 x i64> %res 91} 92declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone 93 94define <2 x i64> @test_mm_hadd_epi32(<2 x i64> %a0, <2 x i64> %a1) { 95; X32-LABEL: test_mm_hadd_epi32: 96; X32: # BB#0: 97; X32-NEXT: phaddd %xmm1, %xmm0 98; X32-NEXT: retl 99; 100; X64-LABEL: test_mm_hadd_epi32: 101; X64: # BB#0: 102; X64-NEXT: phaddd %xmm1, %xmm0 103; X64-NEXT: retq 104 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 105 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 106 %call = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %arg0, <4 x i32> %arg1) 107 %res = bitcast <4 x i32> %call to <2 x i64> 108 ret <2 x i64> %res 109} 110declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone 111 112define <2 x i64> @test_mm_hadds_epi16(<2 x i64> %a0, <2 x i64> %a1) { 113; X32-LABEL: test_mm_hadds_epi16: 114; X32: # BB#0: 115; X32-NEXT: phaddsw %xmm1, %xmm0 116; X32-NEXT: retl 117; 118; X64-LABEL: test_mm_hadds_epi16: 119; X64: # BB#0: 120; X64-NEXT: phaddsw %xmm1, %xmm0 121; X64-NEXT: retq 122 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 123 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 124 %call = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %arg0, <8 x i16> %arg1) 125 %res = bitcast <8 x i16> %call to <2 x i64> 126 ret <2 x i64> %res 127} 128declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 129 130define <2 x i64> @test_mm_hsub_epi16(<2 x i64> %a0, <2 x i64> %a1) { 131; X32-LABEL: test_mm_hsub_epi16: 132; X32: # BB#0: 133; X32-NEXT: phsubw %xmm1, %xmm0 134; X32-NEXT: retl 135; 136; X64-LABEL: test_mm_hsub_epi16: 137; X64: # BB#0: 138; X64-NEXT: phsubw %xmm1, %xmm0 139; X64-NEXT: retq 140 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 141 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 142 %call = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %arg0, <8 x i16> %arg1) 143 %res = bitcast <8 x i16> %call to <2 x i64> 144 ret <2 x i64> %res 145} 146declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone 147 148define <2 x i64> @test_mm_hsub_epi32(<2 x i64> %a0, <2 x i64> %a1) { 149; X32-LABEL: test_mm_hsub_epi32: 150; X32: # BB#0: 151; X32-NEXT: phsubd %xmm1, %xmm0 152; X32-NEXT: retl 153; 154; X64-LABEL: test_mm_hsub_epi32: 155; X64: # BB#0: 156; X64-NEXT: phsubd %xmm1, %xmm0 157; X64-NEXT: retq 158 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 159 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 160 %call = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %arg0, <4 x i32> %arg1) 161 %res = bitcast <4 x i32> %call to <2 x i64> 162 ret <2 x i64> %res 163} 164declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone 165 166define <2 x i64> @test_mm_hsubs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 167; X32-LABEL: test_mm_hsubs_epi16: 168; X32: # BB#0: 169; X32-NEXT: phsubsw %xmm1, %xmm0 170; X32-NEXT: retl 171; 172; X64-LABEL: test_mm_hsubs_epi16: 173; X64: # BB#0: 174; X64-NEXT: phsubsw %xmm1, %xmm0 175; X64-NEXT: retq 176 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 177 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 178 %call = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %arg0, <8 x i16> %arg1) 179 %res = bitcast <8 x i16> %call to <2 x i64> 180 ret <2 x i64> %res 181} 182declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 183 184define <2 x i64> @test_mm_maddubs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 185; X32-LABEL: test_mm_maddubs_epi16: 186; X32: # BB#0: 187; X32-NEXT: pmaddubsw %xmm1, %xmm0 188; X32-NEXT: retl 189; 190; X64-LABEL: test_mm_maddubs_epi16: 191; X64: # BB#0: 192; X64-NEXT: pmaddubsw %xmm1, %xmm0 193; X64-NEXT: retq 194 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 195 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 196 %call = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %arg0, <16 x i8> %arg1) 197 %res = bitcast <8 x i16> %call to <2 x i64> 198 ret <2 x i64> %res 199} 200declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone 201 202define <2 x i64> @test_mm_mulhrs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 203; X32-LABEL: test_mm_mulhrs_epi16: 204; X32: # BB#0: 205; X32-NEXT: pmulhrsw %xmm1, %xmm0 206; X32-NEXT: retl 207; 208; X64-LABEL: test_mm_mulhrs_epi16: 209; X64: # BB#0: 210; X64-NEXT: pmulhrsw %xmm1, %xmm0 211; X64-NEXT: retq 212 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 213 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 214 %call = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %arg0, <8 x i16> %arg1) 215 %res = bitcast <8 x i16> %call to <2 x i64> 216 ret <2 x i64> %res 217} 218declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 219 220define <2 x i64> @test_mm_shuffle_epi8(<2 x i64> %a0, <2 x i64> %a1) { 221; X32-LABEL: test_mm_shuffle_epi8: 222; X32: # BB#0: 223; X32-NEXT: pshufb %xmm1, %xmm0 224; X32-NEXT: retl 225; 226; X64-LABEL: test_mm_shuffle_epi8: 227; X64: # BB#0: 228; X64-NEXT: pshufb %xmm1, %xmm0 229; X64-NEXT: retq 230 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 231 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 232 %call = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %arg0, <16 x i8> %arg1) 233 %res = bitcast <16 x i8> %call to <2 x i64> 234 ret <2 x i64> %res 235} 236declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone 237 238define <2 x i64> @test_mm_sign_epi8(<2 x i64> %a0, <2 x i64> %a1) { 239; X32-LABEL: test_mm_sign_epi8: 240; X32: # BB#0: 241; X32-NEXT: psignb %xmm1, %xmm0 242; X32-NEXT: retl 243; 244; X64-LABEL: test_mm_sign_epi8: 245; X64: # BB#0: 246; X64-NEXT: psignb %xmm1, %xmm0 247; X64-NEXT: retq 248 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 249 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 250 %call = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %arg0, <16 x i8> %arg1) 251 %res = bitcast <16 x i8> %call to <2 x i64> 252 ret <2 x i64> %res 253} 254declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone 255 256define <2 x i64> @test_mm_sign_epi16(<2 x i64> %a0, <2 x i64> %a1) { 257; X32-LABEL: test_mm_sign_epi16: 258; X32: # BB#0: 259; X32-NEXT: psignw %xmm1, %xmm0 260; X32-NEXT: retl 261; 262; X64-LABEL: test_mm_sign_epi16: 263; X64: # BB#0: 264; X64-NEXT: psignw %xmm1, %xmm0 265; X64-NEXT: retq 266 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 267 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 268 %call = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %arg0, <8 x i16> %arg1) 269 %res = bitcast <8 x i16> %call to <2 x i64> 270 ret <2 x i64> %res 271} 272declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone 273 274define <2 x i64> @test_mm_sign_epi32(<2 x i64> %a0, <2 x i64> %a1) { 275; X32-LABEL: test_mm_sign_epi32: 276; X32: # BB#0: 277; X32-NEXT: psignd %xmm1, %xmm0 278; X32-NEXT: retl 279; 280; X64-LABEL: test_mm_sign_epi32: 281; X64: # BB#0: 282; X64-NEXT: psignd %xmm1, %xmm0 283; X64-NEXT: retq 284 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 285 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 286 %call = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %arg0, <4 x i32> %arg1) 287 %res = bitcast <4 x i32> %call to <2 x i64> 288 ret <2 x i64> %res 289} 290declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone 291