1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 2 3;;; Shift left 4define <8 x i32> @vshift00(<8 x i32> %a) { 5; CHECK-LABEL: vshift00: 6; CHECK: # BB#0: 7; CHECK-NEXT: vpslld $2, %xmm0, %xmm1 8; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 9; CHECK-NEXT: vpslld $2, %xmm0, %xmm0 10; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 11; CHECK-NEXT: retq 12 %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 132> 14 ret <8 x i32> %s 15} 16 17define <16 x i16> @vshift01(<16 x i16> %a) { 18; CHECK-LABEL: vshift01: 19; CHECK: # BB#0: 20; CHECK-NEXT: vpsllw $2, %xmm0, %xmm1 21; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 22; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0 23; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 24; CHECK-NEXT: retq 25 %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 26 ret <16 x i16> %s 27} 28 29define <4 x i64> @vshift02(<4 x i64> %a) { 30; CHECK-LABEL: vshift02: 31; CHECK: # BB#0: 32; CHECK-NEXT: vpsllq $2, %xmm0, %xmm1 33; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 34; CHECK-NEXT: vpsllq $2, %xmm0, %xmm0 35; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 36; CHECK-NEXT: retq 37 %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> 38 ret <4 x i64> %s 39} 40 41;;; Logical Shift right 42define <8 x i32> @vshift03(<8 x i32> %a) { 43; CHECK-LABEL: vshift03: 44; CHECK: # BB#0: 45; CHECK-NEXT: vpsrld $2, %xmm0, %xmm1 46; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 47; CHECK-NEXT: vpsrld $2, %xmm0, %xmm0 48; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 49; CHECK-NEXT: retq 50 %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 512> 52 ret <8 x i32> %s 53} 54 55define <16 x i16> @vshift04(<16 x i16> %a) { 56; CHECK-LABEL: vshift04: 57; CHECK: # BB#0: 58; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm1 59; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 60; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0 61; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 62; CHECK-NEXT: retq 63 %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 64 ret <16 x i16> %s 65} 66 67define <4 x i64> @vshift05(<4 x i64> %a) { 68; CHECK-LABEL: vshift05: 69; CHECK: # BB#0: 70; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm1 71; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 72; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm0 73; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 74; CHECK-NEXT: retq 75 %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> 76 ret <4 x i64> %s 77} 78 79;;; Arithmetic Shift right 80define <8 x i32> @vshift06(<8 x i32> %a) { 81; CHECK-LABEL: vshift06: 82; CHECK: # BB#0: 83; CHECK-NEXT: vpsrad $2, %xmm0, %xmm1 84; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 85; CHECK-NEXT: vpsrad $2, %xmm0, %xmm0 86; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 87; CHECK-NEXT: retq 88 %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 892> 90 ret <8 x i32> %s 91} 92 93define <16 x i16> @vshift07(<16 x i16> %a) { 94; CHECK-LABEL: vshift07: 95; CHECK: # BB#0: 96; CHECK-NEXT: vpsraw $2, %xmm0, %xmm1 97; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 98; CHECK-NEXT: vpsraw $2, %xmm0, %xmm0 99; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 100; CHECK-NEXT: retq 101 %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 102 ret <16 x i16> %s 103} 104 105define <32 x i8> @vshift09(<32 x i8> %a) { 106; CHECK-LABEL: vshift09: 107; CHECK: # BB#0: 108; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 109; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1 110; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 111; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 112; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] 113; CHECK-NEXT: vpxor %xmm3, %xmm1, %xmm1 114; CHECK-NEXT: vpsubb %xmm3, %xmm1, %xmm1 115; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0 116; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 117; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0 118; CHECK-NEXT: vpsubb %xmm3, %xmm0, %xmm0 119; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 120; CHECK-NEXT: retq 121 %s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 122 ret <32 x i8> %s 123} 124 125define <32 x i8> @vshift10(<32 x i8> %a) { 126; CHECK-LABEL: vshift10: 127; CHECK: # BB#0: 128; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 129; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 130; CHECK-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1 131; CHECK-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 132; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 133; CHECK-NEXT: retq 134 %s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 135 ret <32 x i8> %s 136} 137 138define <32 x i8> @vshift11(<32 x i8> %a) { 139; CHECK-LABEL: vshift11: 140; CHECK: # BB#0: 141; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 142; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1 143; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 144; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 145; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0 146; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 147; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 148; CHECK-NEXT: retq 149 %s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 150 ret <32 x i8> %s 151} 152 153define <32 x i8> @vshift12(<32 x i8> %a) { 154; CHECK-LABEL: vshift12: 155; CHECK: # BB#0: 156; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 157; CHECK-NEXT: vpsllw $2, %xmm1, %xmm1 158; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 159; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 160; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0 161; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 162; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 163; CHECK-NEXT: retq 164 %s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 165 ret <32 x i8> %s 166} 167 168;;; Support variable shifts 169define <8 x i32> @vshift08(<8 x i32> %a) { 170; CHECK-LABEL: vshift08: 171; CHECK: # BB#0: 172; CHECK-NEXT: vpslld $23, %xmm0, %xmm1 173; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216] 174; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 175; CHECK-NEXT: vcvttps2dq %xmm1, %xmm1 176; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 177; CHECK-NEXT: vpslld $23, %xmm0, %xmm0 178; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 179; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 180; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 181; CHECK-NEXT: retq 182 %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a 183 ret <8 x i32> %bitop 184} 185 186; PR15141 187define <4 x i32> @vshift13(<4 x i32> %in) { 188; CHECK-LABEL: vshift13: 189; CHECK: # BB#0: 190; CHECK-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 191; CHECK-NEXT: retq 192 %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4> 193 ret <4 x i32> %T 194} 195 196;;; Uses shifts for sign extension 197define <16 x i16> @sext_v16i16(<16 x i16> %a) { 198; CHECK-LABEL: sext_v16i16: 199; CHECK: # BB#0: 200; CHECK-NEXT: vpsllw $8, %xmm0, %xmm1 201; CHECK-NEXT: vpsraw $8, %xmm1, %xmm1 202; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 203; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0 204; CHECK-NEXT: vpsraw $8, %xmm0, %xmm0 205; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 206; CHECK-NEXT: retq 207 %b = trunc <16 x i16> %a to <16 x i8> 208 %c = sext <16 x i8> %b to <16 x i16> 209 ret <16 x i16> %c 210} 211 212define <8 x i32> @sext_v8i32(<8 x i32> %a) { 213; CHECK-LABEL: sext_v8i32: 214; CHECK: # BB#0: 215; CHECK-NEXT: vpslld $16, %xmm0, %xmm1 216; CHECK-NEXT: vpsrad $16, %xmm1, %xmm1 217; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 218; CHECK-NEXT: vpslld $16, %xmm0, %xmm0 219; CHECK-NEXT: vpsrad $16, %xmm0, %xmm0 220; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 221; CHECK-NEXT: retq 222 %b = trunc <8 x i32> %a to <8 x i16> 223 %c = sext <8 x i16> %b to <8 x i32> 224 ret <8 x i32> %c 225} 226