1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=X32 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/ssse3-builtins.c 6 7define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) { 8; X32-LABEL: test_mm_abs_epi8: 9; X32: # BB#0: 10; X32-NEXT: pabsb %xmm0, %xmm0 11; X32-NEXT: retl 12; 13; X64-LABEL: test_mm_abs_epi8: 14; X64: # BB#0: 15; X64-NEXT: pabsb %xmm0, %xmm0 16; X64-NEXT: retq 17 %arg = bitcast <2 x i64> %a0 to <16 x i8> 18 %call = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %arg) 19 %res = bitcast <16 x i8> %call to <2 x i64> 20 ret <2 x i64> %res 21} 22declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone 23 24define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) { 25; X32-LABEL: test_mm_abs_epi16: 26; X32: # BB#0: 27; X32-NEXT: pabsw %xmm0, %xmm0 28; X32-NEXT: retl 29; 30; X64-LABEL: test_mm_abs_epi16: 31; X64: # BB#0: 32; X64-NEXT: pabsw %xmm0, %xmm0 33; X64-NEXT: retq 34 %arg = bitcast <2 x i64> %a0 to <8 x i16> 35 %call = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %arg) 36 %res = bitcast <8 x i16> %call to <2 x i64> 37 ret <2 x i64> %res 38} 39declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone 40 41define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) { 42; X32-LABEL: test_mm_abs_epi32: 43; X32: # BB#0: 44; X32-NEXT: pabsd %xmm0, %xmm0 45; X32-NEXT: retl 46; 47; X64-LABEL: test_mm_abs_epi32: 48; X64: # BB#0: 49; X64-NEXT: pabsd %xmm0, %xmm0 50; X64-NEXT: retq 51 %arg = bitcast <2 x i64> %a0 to <4 x i32> 52 %call = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %arg) 53 %res = bitcast <4 x i32> %call to <2 x i64> 54 ret <2 x i64> %res 55} 56declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone 57 58define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) { 59; X32-LABEL: test_mm_alignr_epi8: 60; X32: # BB#0: 61; X32-NEXT: palignr {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] 62; X32-NEXT: movdqa %xmm1, %xmm0 63; X32-NEXT: retl 64; 65; X64-LABEL: test_mm_alignr_epi8: 66; X64: # BB#0: 67; X64-NEXT: palignr {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] 68; X64-NEXT: movdqa %xmm1, %xmm0 69; X64-NEXT: retq 70 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 71 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 72 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17> 73 %res = bitcast <16 x i8> %shuf to <2 x i64> 74 ret <2 x i64> %res 75} 76 77define <2 x i64> @test2_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) { 78; X32-LABEL: test2_mm_alignr_epi8: 79; X32: # BB#0: 80; X32-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 81; X32-NEXT: movdqa %xmm1, %xmm0 82; X32-NEXT: retl 83; 84; X64-LABEL: test2_mm_alignr_epi8: 85; X64: # BB#0: 86; X64-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 87; X64-NEXT: movdqa %xmm1, %xmm0 88; X64-NEXT: retq 89 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 90 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 91 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> 92 %res = bitcast <16 x i8> %shuf to <2 x i64> 93 ret <2 x i64> %res 94} 95 96define <2 x i64> @test_mm_hadd_epi16(<2 x i64> %a0, <2 x i64> %a1) { 97; X32-LABEL: test_mm_hadd_epi16: 98; X32: # BB#0: 99; X32-NEXT: phaddw %xmm1, %xmm0 100; X32-NEXT: retl 101; 102; X64-LABEL: test_mm_hadd_epi16: 103; X64: # BB#0: 104; X64-NEXT: phaddw %xmm1, %xmm0 105; X64-NEXT: retq 106 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 107 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 108 %call = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %arg0, <8 x i16> %arg1) 109 %res = bitcast <8 x i16> %call to <2 x i64> 110 ret <2 x i64> %res 111} 112declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone 113 114define <2 x i64> @test_mm_hadd_epi32(<2 x i64> %a0, <2 x i64> %a1) { 115; X32-LABEL: test_mm_hadd_epi32: 116; X32: # BB#0: 117; X32-NEXT: phaddd %xmm1, %xmm0 118; X32-NEXT: retl 119; 120; X64-LABEL: test_mm_hadd_epi32: 121; X64: # BB#0: 122; X64-NEXT: phaddd %xmm1, %xmm0 123; X64-NEXT: retq 124 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 125 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 126 %call = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %arg0, <4 x i32> %arg1) 127 %res = bitcast <4 x i32> %call to <2 x i64> 128 ret <2 x i64> %res 129} 130declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone 131 132define <2 x i64> @test_mm_hadds_epi16(<2 x i64> %a0, <2 x i64> %a1) { 133; X32-LABEL: test_mm_hadds_epi16: 134; X32: # BB#0: 135; X32-NEXT: phaddsw %xmm1, %xmm0 136; X32-NEXT: retl 137; 138; X64-LABEL: test_mm_hadds_epi16: 139; X64: # BB#0: 140; X64-NEXT: phaddsw %xmm1, %xmm0 141; X64-NEXT: retq 142 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 143 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 144 %call = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %arg0, <8 x i16> %arg1) 145 %res = bitcast <8 x i16> %call to <2 x i64> 146 ret <2 x i64> %res 147} 148declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 149 150define <2 x i64> @test_mm_hsub_epi16(<2 x i64> %a0, <2 x i64> %a1) { 151; X32-LABEL: test_mm_hsub_epi16: 152; X32: # BB#0: 153; X32-NEXT: phsubw %xmm1, %xmm0 154; X32-NEXT: retl 155; 156; X64-LABEL: test_mm_hsub_epi16: 157; X64: # BB#0: 158; X64-NEXT: phsubw %xmm1, %xmm0 159; X64-NEXT: retq 160 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 161 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 162 %call = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %arg0, <8 x i16> %arg1) 163 %res = bitcast <8 x i16> %call to <2 x i64> 164 ret <2 x i64> %res 165} 166declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone 167 168define <2 x i64> @test_mm_hsub_epi32(<2 x i64> %a0, <2 x i64> %a1) { 169; X32-LABEL: test_mm_hsub_epi32: 170; X32: # BB#0: 171; X32-NEXT: phsubd %xmm1, %xmm0 172; X32-NEXT: retl 173; 174; X64-LABEL: test_mm_hsub_epi32: 175; X64: # BB#0: 176; X64-NEXT: phsubd %xmm1, %xmm0 177; X64-NEXT: retq 178 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 179 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 180 %call = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %arg0, <4 x i32> %arg1) 181 %res = bitcast <4 x i32> %call to <2 x i64> 182 ret <2 x i64> %res 183} 184declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone 185 186define <2 x i64> @test_mm_hsubs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 187; X32-LABEL: test_mm_hsubs_epi16: 188; X32: # BB#0: 189; X32-NEXT: phsubsw %xmm1, %xmm0 190; X32-NEXT: retl 191; 192; X64-LABEL: test_mm_hsubs_epi16: 193; X64: # BB#0: 194; X64-NEXT: phsubsw %xmm1, %xmm0 195; X64-NEXT: retq 196 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 197 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 198 %call = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %arg0, <8 x i16> %arg1) 199 %res = bitcast <8 x i16> %call to <2 x i64> 200 ret <2 x i64> %res 201} 202declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 203 204define <2 x i64> @test_mm_maddubs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 205; X32-LABEL: test_mm_maddubs_epi16: 206; X32: # BB#0: 207; X32-NEXT: pmaddubsw %xmm1, %xmm0 208; X32-NEXT: retl 209; 210; X64-LABEL: test_mm_maddubs_epi16: 211; X64: # BB#0: 212; X64-NEXT: pmaddubsw %xmm1, %xmm0 213; X64-NEXT: retq 214 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 215 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 216 %call = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %arg0, <16 x i8> %arg1) 217 %res = bitcast <8 x i16> %call to <2 x i64> 218 ret <2 x i64> %res 219} 220declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone 221 222define <2 x i64> @test_mm_mulhrs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 223; X32-LABEL: test_mm_mulhrs_epi16: 224; X32: # BB#0: 225; X32-NEXT: pmulhrsw %xmm1, %xmm0 226; X32-NEXT: retl 227; 228; X64-LABEL: test_mm_mulhrs_epi16: 229; X64: # BB#0: 230; X64-NEXT: pmulhrsw %xmm1, %xmm0 231; X64-NEXT: retq 232 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 233 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 234 %call = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %arg0, <8 x i16> %arg1) 235 %res = bitcast <8 x i16> %call to <2 x i64> 236 ret <2 x i64> %res 237} 238declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 239 240define <2 x i64> @test_mm_shuffle_epi8(<2 x i64> %a0, <2 x i64> %a1) { 241; X32-LABEL: test_mm_shuffle_epi8: 242; X32: # BB#0: 243; X32-NEXT: pshufb %xmm1, %xmm0 244; X32-NEXT: retl 245; 246; X64-LABEL: test_mm_shuffle_epi8: 247; X64: # BB#0: 248; X64-NEXT: pshufb %xmm1, %xmm0 249; X64-NEXT: retq 250 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 251 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 252 %call = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %arg0, <16 x i8> %arg1) 253 %res = bitcast <16 x i8> %call to <2 x i64> 254 ret <2 x i64> %res 255} 256declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone 257 258define <2 x i64> @test_mm_sign_epi8(<2 x i64> %a0, <2 x i64> %a1) { 259; X32-LABEL: test_mm_sign_epi8: 260; X32: # BB#0: 261; X32-NEXT: psignb %xmm1, %xmm0 262; X32-NEXT: retl 263; 264; X64-LABEL: test_mm_sign_epi8: 265; X64: # BB#0: 266; X64-NEXT: psignb %xmm1, %xmm0 267; X64-NEXT: retq 268 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 269 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 270 %call = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %arg0, <16 x i8> %arg1) 271 %res = bitcast <16 x i8> %call to <2 x i64> 272 ret <2 x i64> %res 273} 274declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone 275 276define <2 x i64> @test_mm_sign_epi16(<2 x i64> %a0, <2 x i64> %a1) { 277; X32-LABEL: test_mm_sign_epi16: 278; X32: # BB#0: 279; X32-NEXT: psignw %xmm1, %xmm0 280; X32-NEXT: retl 281; 282; X64-LABEL: test_mm_sign_epi16: 283; X64: # BB#0: 284; X64-NEXT: psignw %xmm1, %xmm0 285; X64-NEXT: retq 286 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 287 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 288 %call = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %arg0, <8 x i16> %arg1) 289 %res = bitcast <8 x i16> %call to <2 x i64> 290 ret <2 x i64> %res 291} 292declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone 293 294define <2 x i64> @test_mm_sign_epi32(<2 x i64> %a0, <2 x i64> %a1) { 295; X32-LABEL: test_mm_sign_epi32: 296; X32: # BB#0: 297; X32-NEXT: psignd %xmm1, %xmm0 298; X32-NEXT: retl 299; 300; X64-LABEL: test_mm_sign_epi32: 301; X64: # BB#0: 302; X64-NEXT: psignd %xmm1, %xmm0 303; X64-NEXT: retq 304 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 305 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 306 %call = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %arg0, <4 x i32> %arg1) 307 %res = bitcast <4 x i32> %call to <2 x i64> 308 ret <2 x i64> %res 309} 310declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone 311