1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,MASK 3; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2,+fast-vector-shift-masks | FileCheck %s --check-prefixes=CHECK,SHIFT 4; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver1 | FileCheck %s --check-prefixes=CHECK,SHIFT 5 6; SSE2 Logical Shift Left 7 8define <8 x i16> @test_sllw_1(<8 x i16> %InVec) { 9; CHECK-LABEL: test_sllw_1: 10; CHECK: # %bb.0: # %entry 11; CHECK-NEXT: retq 12entry: 13 %shl = shl <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 14 ret <8 x i16> %shl 15} 16 17define <8 x i16> @test_sllw_2(<8 x i16> %InVec) { 18; CHECK-LABEL: test_sllw_2: 19; CHECK: # %bb.0: # %entry 20; CHECK-NEXT: paddw %xmm0, %xmm0 21; CHECK-NEXT: retq 22entry: 23 %shl = shl <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 24 ret <8 x i16> %shl 25} 26 27define <8 x i16> @test_sllw_3(<8 x i16> %InVec) { 28; CHECK-LABEL: test_sllw_3: 29; CHECK: # %bb.0: # %entry 30; CHECK-NEXT: psllw $15, %xmm0 31; CHECK-NEXT: retq 32entry: 33 %shl = shl <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 34 ret <8 x i16> %shl 35} 36 37define <4 x i32> @test_slld_1(<4 x i32> %InVec) { 38; CHECK-LABEL: test_slld_1: 39; CHECK: # %bb.0: # %entry 40; CHECK-NEXT: retq 41entry: 42 %shl = shl <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0> 43 ret <4 x i32> %shl 44} 45 46define <4 x i32> @test_slld_2(<4 x i32> %InVec) { 47; CHECK-LABEL: test_slld_2: 48; CHECK: # %bb.0: # %entry 49; CHECK-NEXT: paddd %xmm0, %xmm0 50; CHECK-NEXT: retq 51entry: 52 %shl = shl <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1> 53 ret <4 x i32> %shl 54} 55 56define <4 x i32> @test_slld_3(<4 x i32> %InVec) { 57; CHECK-LABEL: test_slld_3: 58; CHECK: # %bb.0: # %entry 59; CHECK-NEXT: pslld $31, %xmm0 60; CHECK-NEXT: retq 61entry: 62 %shl = shl <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31> 63 ret <4 x i32> %shl 64} 65 66define <2 x i64> @test_sllq_1(<2 x i64> %InVec) { 67; CHECK-LABEL: test_sllq_1: 68; CHECK: # %bb.0: # %entry 69; CHECK-NEXT: retq 70entry: 71 %shl = shl <2 x i64> %InVec, <i64 0, i64 0> 72 ret <2 x i64> %shl 73} 74 75define <2 x i64> @test_sllq_2(<2 x i64> %InVec) { 76; CHECK-LABEL: test_sllq_2: 77; CHECK: # %bb.0: # %entry 78; CHECK-NEXT: paddq %xmm0, %xmm0 79; CHECK-NEXT: retq 80entry: 81 %shl = shl <2 x i64> %InVec, <i64 1, i64 1> 82 ret <2 x i64> %shl 83} 84 85define <2 x i64> @test_sllq_3(<2 x i64> %InVec) { 86; CHECK-LABEL: test_sllq_3: 87; CHECK: # %bb.0: # %entry 88; CHECK-NEXT: psllq $63, %xmm0 89; CHECK-NEXT: retq 90entry: 91 %shl = shl <2 x i64> %InVec, <i64 63, i64 63> 92 ret <2 x i64> %shl 93} 94 95; SSE2 Arithmetic Shift 96 97define <8 x i16> @test_sraw_1(<8 x i16> %InVec) { 98; CHECK-LABEL: test_sraw_1: 99; CHECK: # %bb.0: # %entry 100; CHECK-NEXT: retq 101entry: 102 %shl = ashr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 103 ret <8 x i16> %shl 104} 105 106define <8 x i16> @test_sraw_2(<8 x i16> %InVec) { 107; CHECK-LABEL: test_sraw_2: 108; CHECK: # %bb.0: # %entry 109; CHECK-NEXT: psraw $1, %xmm0 110; CHECK-NEXT: retq 111entry: 112 %shl = ashr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 113 ret <8 x i16> %shl 114} 115 116define <8 x i16> @test_sraw_3(<8 x i16> %InVec) { 117; CHECK-LABEL: test_sraw_3: 118; CHECK: # %bb.0: # %entry 119; CHECK-NEXT: psraw $15, %xmm0 120; CHECK-NEXT: retq 121entry: 122 %shl = ashr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 123 ret <8 x i16> %shl 124} 125 126define <4 x i32> @test_srad_1(<4 x i32> %InVec) { 127; CHECK-LABEL: test_srad_1: 128; CHECK: # %bb.0: # %entry 129; CHECK-NEXT: retq 130entry: 131 %shl = ashr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0> 132 ret <4 x i32> %shl 133} 134 135define <4 x i32> @test_srad_2(<4 x i32> %InVec) { 136; CHECK-LABEL: test_srad_2: 137; CHECK: # %bb.0: # %entry 138; CHECK-NEXT: psrad $1, %xmm0 139; CHECK-NEXT: retq 140entry: 141 %shl = ashr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1> 142 ret <4 x i32> %shl 143} 144 145define <4 x i32> @test_srad_3(<4 x i32> %InVec) { 146; CHECK-LABEL: test_srad_3: 147; CHECK: # %bb.0: # %entry 148; CHECK-NEXT: psrad $31, %xmm0 149; CHECK-NEXT: retq 150entry: 151 %shl = ashr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31> 152 ret <4 x i32> %shl 153} 154 155; SSE Logical Shift Right 156 157define <8 x i16> @test_srlw_1(<8 x i16> %InVec) { 158; CHECK-LABEL: test_srlw_1: 159; CHECK: # %bb.0: # %entry 160; CHECK-NEXT: retq 161entry: 162 %shl = lshr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 163 ret <8 x i16> %shl 164} 165 166define <8 x i16> @test_srlw_2(<8 x i16> %InVec) { 167; CHECK-LABEL: test_srlw_2: 168; CHECK: # %bb.0: # %entry 169; CHECK-NEXT: psrlw $1, %xmm0 170; CHECK-NEXT: retq 171entry: 172 %shl = lshr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 173 ret <8 x i16> %shl 174} 175 176define <8 x i16> @test_srlw_3(<8 x i16> %InVec) { 177; CHECK-LABEL: test_srlw_3: 178; CHECK: # %bb.0: # %entry 179; CHECK-NEXT: psrlw $15, %xmm0 180; CHECK-NEXT: retq 181entry: 182 %shl = lshr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 183 ret <8 x i16> %shl 184} 185 186define <4 x i32> @test_srld_1(<4 x i32> %InVec) { 187; CHECK-LABEL: test_srld_1: 188; CHECK: # %bb.0: # %entry 189; CHECK-NEXT: retq 190entry: 191 %shl = lshr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0> 192 ret <4 x i32> %shl 193} 194 195define <4 x i32> @test_srld_2(<4 x i32> %InVec) { 196; CHECK-LABEL: test_srld_2: 197; CHECK: # %bb.0: # %entry 198; CHECK-NEXT: psrld $1, %xmm0 199; CHECK-NEXT: retq 200entry: 201 %shl = lshr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1> 202 ret <4 x i32> %shl 203} 204 205define <4 x i32> @test_srld_3(<4 x i32> %InVec) { 206; CHECK-LABEL: test_srld_3: 207; CHECK: # %bb.0: # %entry 208; CHECK-NEXT: psrld $31, %xmm0 209; CHECK-NEXT: retq 210entry: 211 %shl = lshr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31> 212 ret <4 x i32> %shl 213} 214 215define <2 x i64> @test_srlq_1(<2 x i64> %InVec) { 216; CHECK-LABEL: test_srlq_1: 217; CHECK: # %bb.0: # %entry 218; CHECK-NEXT: retq 219entry: 220 %shl = lshr <2 x i64> %InVec, <i64 0, i64 0> 221 ret <2 x i64> %shl 222} 223 224define <2 x i64> @test_srlq_2(<2 x i64> %InVec) { 225; CHECK-LABEL: test_srlq_2: 226; CHECK: # %bb.0: # %entry 227; CHECK-NEXT: psrlq $1, %xmm0 228; CHECK-NEXT: retq 229entry: 230 %shl = lshr <2 x i64> %InVec, <i64 1, i64 1> 231 ret <2 x i64> %shl 232} 233 234define <2 x i64> @test_srlq_3(<2 x i64> %InVec) { 235; CHECK-LABEL: test_srlq_3: 236; CHECK: # %bb.0: # %entry 237; CHECK-NEXT: psrlq $63, %xmm0 238; CHECK-NEXT: retq 239entry: 240 %shl = lshr <2 x i64> %InVec, <i64 63, i64 63> 241 ret <2 x i64> %shl 242} 243 244define <4 x i32> @sra_sra_v4i32(<4 x i32> %x) nounwind { 245; CHECK-LABEL: sra_sra_v4i32: 246; CHECK: # %bb.0: 247; CHECK-NEXT: psrad $6, %xmm0 248; CHECK-NEXT: retq 249 %sra0 = ashr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 250 %sra1 = ashr <4 x i32> %sra0, <i32 4, i32 4, i32 4, i32 4> 251 ret <4 x i32> %sra1 252} 253 254define <4 x i32> @srl_srl_v4i32(<4 x i32> %x) nounwind { 255; CHECK-LABEL: srl_srl_v4i32: 256; CHECK: # %bb.0: 257; CHECK-NEXT: psrld $6, %xmm0 258; CHECK-NEXT: retq 259 %srl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 260 %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4> 261 ret <4 x i32> %srl1 262} 263 264define <4 x i32> @srl_shl_v4i32(<4 x i32> %x) nounwind { 265; CHECK-LABEL: srl_shl_v4i32: 266; CHECK: # %bb.0: 267; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 268; CHECK-NEXT: retq 269 %srl0 = shl <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4> 270 %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4> 271 ret <4 x i32> %srl1 272} 273 274define <4 x i32> @srl_sra_31_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { 275; CHECK-LABEL: srl_sra_31_v4i32: 276; CHECK: # %bb.0: 277; CHECK-NEXT: psrld $31, %xmm0 278; CHECK-NEXT: retq 279 %sra = ashr <4 x i32> %x, %y 280 %srl1 = lshr <4 x i32> %sra, <i32 31, i32 31, i32 31, i32 31> 281 ret <4 x i32> %srl1 282} 283 284define <4 x i32> @shl_shl_v4i32(<4 x i32> %x) nounwind { 285; CHECK-LABEL: shl_shl_v4i32: 286; CHECK: # %bb.0: 287; CHECK-NEXT: pslld $6, %xmm0 288; CHECK-NEXT: retq 289 %shl0 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 290 %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4> 291 ret <4 x i32> %shl1 292} 293 294define <4 x i32> @shl_sra_v4i32(<4 x i32> %x) nounwind { 295; CHECK-LABEL: shl_sra_v4i32: 296; CHECK: # %bb.0: 297; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 298; CHECK-NEXT: retq 299 %shl0 = ashr <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4> 300 %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4> 301 ret <4 x i32> %shl1 302} 303 304define <4 x i32> @shl_srl_v4i32(<4 x i32> %x) nounwind { 305; MASK-LABEL: shl_srl_v4i32: 306; MASK: # %bb.0: 307; MASK-NEXT: pslld $3, %xmm0 308; MASK-NEXT: pand {{.*}}(%rip), %xmm0 309; MASK-NEXT: retq 310; 311; SHIFT-LABEL: shl_srl_v4i32: 312; SHIFT: # %bb.0: 313; SHIFT-NEXT: psrld $2, %xmm0 314; SHIFT-NEXT: pslld $5, %xmm0 315; SHIFT-NEXT: retq 316 %shl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 317 %shl1 = shl <4 x i32> %shl0, <i32 5, i32 5, i32 5, i32 5> 318 ret <4 x i32> %shl1 319} 320 321define <4 x i32> @shl_zext_srl_v4i32(<4 x i16> %x) nounwind { 322; CHECK-LABEL: shl_zext_srl_v4i32: 323; CHECK: # %bb.0: 324; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 325; CHECK-NEXT: pxor %xmm1, %xmm1 326; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 327; CHECK-NEXT: retq 328 %srl = lshr <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2> 329 %zext = zext <4 x i16> %srl to <4 x i32> 330 %shl = shl <4 x i32> %zext, <i32 2, i32 2, i32 2, i32 2> 331 ret <4 x i32> %shl 332} 333 334define <4 x i16> @sra_trunc_srl_v4i32(<4 x i32> %x) nounwind { 335; CHECK-LABEL: sra_trunc_srl_v4i32: 336; CHECK: # %bb.0: 337; CHECK-NEXT: psrad $19, %xmm0 338; CHECK-NEXT: packssdw %xmm0, %xmm0 339; CHECK-NEXT: retq 340 %srl = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16> 341 %trunc = trunc <4 x i32> %srl to <4 x i16> 342 %sra = ashr <4 x i16> %trunc, <i16 3, i16 3, i16 3, i16 3> 343 ret <4 x i16> %sra 344} 345 346define <4 x i32> @shl_zext_shl_v4i32(<4 x i16> %x) nounwind { 347; CHECK-LABEL: shl_zext_shl_v4i32: 348; CHECK: # %bb.0: 349; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 350; CHECK-NEXT: pslld $19, %xmm0 351; CHECK-NEXT: retq 352 %shl0 = shl <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2> 353 %ext = zext <4 x i16> %shl0 to <4 x i32> 354 %shl1 = shl <4 x i32> %ext, <i32 17, i32 17, i32 17, i32 17> 355 ret <4 x i32> %shl1 356} 357 358define <4 x i32> @sra_v4i32(<4 x i32> %x) nounwind { 359; CHECK-LABEL: sra_v4i32: 360; CHECK: # %bb.0: 361; CHECK-NEXT: psrad $3, %xmm0 362; CHECK-NEXT: retq 363 %sra = ashr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 364 ret <4 x i32> %sra 365} 366 367define <4 x i32> @srl_v4i32(<4 x i32> %x) nounwind { 368; CHECK-LABEL: srl_v4i32: 369; CHECK: # %bb.0: 370; CHECK-NEXT: psrld $3, %xmm0 371; CHECK-NEXT: retq 372 %sra = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 373 ret <4 x i32> %sra 374} 375 376define <4 x i32> @shl_v4i32(<4 x i32> %x) nounwind { 377; CHECK-LABEL: shl_v4i32: 378; CHECK: # %bb.0: 379; CHECK-NEXT: pslld $3, %xmm0 380; CHECK-NEXT: retq 381 %sra = shl <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 382 ret <4 x i32> %sra 383} 384