1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 3 4;;; Shift left 5define <8 x i32> @vshift00(<8 x i32> %a) { 6; CHECK-LABEL: vshift00: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vpslld $2, %xmm0, %xmm1 9; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 10; CHECK-NEXT: vpslld $2, %xmm0, %xmm0 11; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 12; CHECK-NEXT: retq 13 %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 14 ret <8 x i32> %s 15} 16 17define <16 x i16> @vshift01(<16 x i16> %a) { 18; CHECK-LABEL: vshift01: 19; CHECK: # %bb.0: 20; CHECK-NEXT: vpsllw $2, %xmm0, %xmm1 21; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 22; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0 23; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 24; CHECK-NEXT: retq 25 %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 26 ret <16 x i16> %s 27} 28 29define <4 x i64> @vshift02(<4 x i64> %a) { 30; CHECK-LABEL: vshift02: 31; CHECK: # %bb.0: 32; CHECK-NEXT: vpsllq $2, %xmm0, %xmm1 33; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 34; CHECK-NEXT: vpsllq $2, %xmm0, %xmm0 35; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 36; CHECK-NEXT: retq 37 %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> 38 ret <4 x i64> %s 39} 40 41;;; Logical Shift right 42define <8 x i32> @vshift03(<8 x i32> %a) { 43; CHECK-LABEL: vshift03: 44; CHECK: # %bb.0: 45; CHECK-NEXT: vpsrld $2, %xmm0, %xmm1 46; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 47; CHECK-NEXT: vpsrld $2, %xmm0, %xmm0 48; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 49; CHECK-NEXT: retq 50 %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 51 ret <8 x i32> %s 52} 53 54define <16 x i16> @vshift04(<16 x i16> %a) { 55; CHECK-LABEL: vshift04: 56; CHECK: # %bb.0: 57; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm1 58; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 59; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0 60; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 61; CHECK-NEXT: retq 62 %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 63 ret <16 x i16> %s 64} 65 66define <4 x i64> @vshift05(<4 x i64> %a) { 67; CHECK-LABEL: vshift05: 68; CHECK: # %bb.0: 69; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm1 70; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 71; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm0 72; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 73; CHECK-NEXT: retq 74 %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> 75 ret <4 x i64> %s 76} 77 78;;; Arithmetic Shift right 79define <8 x i32> @vshift06(<8 x i32> %a) { 80; CHECK-LABEL: vshift06: 81; CHECK: # %bb.0: 82; CHECK-NEXT: vpsrad $2, %xmm0, %xmm1 83; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 84; CHECK-NEXT: vpsrad $2, %xmm0, %xmm0 85; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 86; CHECK-NEXT: retq 87 %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 88 ret <8 x i32> %s 89} 90 91define <16 x i16> @vshift07(<16 x i16> %a) { 92; CHECK-LABEL: vshift07: 93; CHECK: # %bb.0: 94; CHECK-NEXT: vpsraw $2, %xmm0, %xmm1 95; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 96; CHECK-NEXT: vpsraw $2, %xmm0, %xmm0 97; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 98; CHECK-NEXT: retq 99 %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 100 ret <16 x i16> %s 101} 102 103define <32 x i8> @vshift09(<32 x i8> %a) { 104; CHECK-LABEL: vshift09: 105; CHECK: # %bb.0: 106; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 107; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1 108; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 109; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 110; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] 111; CHECK-NEXT: vpxor %xmm3, %xmm1, %xmm1 112; CHECK-NEXT: vpsubb %xmm3, %xmm1, %xmm1 113; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0 114; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 115; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0 116; CHECK-NEXT: vpsubb %xmm3, %xmm0, %xmm0 117; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 118; CHECK-NEXT: retq 119 %s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 120 ret <32 x i8> %s 121} 122 123define <32 x i8> @vshift10(<32 x i8> %a) { 124; CHECK-LABEL: vshift10: 125; CHECK: # %bb.0: 126; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 127; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 128; CHECK-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1 129; CHECK-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 130; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 131; CHECK-NEXT: retq 132 %s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 133 ret <32 x i8> %s 134} 135 136define <32 x i8> @vshift11(<32 x i8> %a) { 137; CHECK-LABEL: vshift11: 138; CHECK: # %bb.0: 139; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 140; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1 141; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 142; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 143; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0 144; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 145; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 146; CHECK-NEXT: retq 147 %s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 148 ret <32 x i8> %s 149} 150 151define <32 x i8> @vshift12(<32 x i8> %a) { 152; CHECK-LABEL: vshift12: 153; CHECK: # %bb.0: 154; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 155; CHECK-NEXT: vpsllw $2, %xmm1, %xmm1 156; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 157; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 158; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0 159; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 160; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 161; CHECK-NEXT: retq 162 %s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 163 ret <32 x i8> %s 164} 165 166;;; Support variable shifts 167define <8 x i32> @vshift08(<8 x i32> %a) { 168; CHECK-LABEL: vshift08: 169; CHECK: # %bb.0: 170; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 171; CHECK-NEXT: vpslld $23, %xmm1, %xmm1 172; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216] 173; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 174; CHECK-NEXT: vpslld $23, %xmm0, %xmm0 175; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 176; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 177; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 178; CHECK-NEXT: retq 179 %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a 180 ret <8 x i32> %bitop 181} 182 183define <8 x i32> @vshift08_add(<8 x i32> %a, <8 x i32> %y) { 184; CHECK-LABEL: vshift08_add: 185; CHECK: # %bb.0: 186; CHECK-NEXT: vpslld $23, %xmm0, %xmm2 187; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216] 188; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm2 189; CHECK-NEXT: vcvttps2dq %xmm2, %xmm2 190; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 191; CHECK-NEXT: vpslld $23, %xmm0, %xmm0 192; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 193; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 194; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm3 195; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 196; CHECK-NEXT: vpaddd %xmm1, %xmm2, %xmm1 197; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 198; CHECK-NEXT: retq 199 %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a 200 %r = add <8 x i32> %bitop, %y 201 ret <8 x i32> %r 202} 203 204; PR15141 205define <4 x i32> @vshift13(<4 x i32> %in) { 206; CHECK-LABEL: vshift13: 207; CHECK: # %bb.0: 208; CHECK-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 209; CHECK-NEXT: retq 210 %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4> 211 ret <4 x i32> %T 212} 213 214;;; Uses shifts for sign extension 215define <16 x i16> @sext_v16i16(<16 x i16> %a) { 216; CHECK-LABEL: sext_v16i16: 217; CHECK: # %bb.0: 218; CHECK-NEXT: vpsllw $8, %xmm0, %xmm1 219; CHECK-NEXT: vpsraw $8, %xmm1, %xmm1 220; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 221; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0 222; CHECK-NEXT: vpsraw $8, %xmm0, %xmm0 223; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 224; CHECK-NEXT: retq 225 %b = trunc <16 x i16> %a to <16 x i8> 226 %c = sext <16 x i8> %b to <16 x i16> 227 ret <16 x i16> %c 228} 229 230define <8 x i32> @sext_v8i32(<8 x i32> %a) { 231; CHECK-LABEL: sext_v8i32: 232; CHECK: # %bb.0: 233; CHECK-NEXT: vpslld $16, %xmm0, %xmm1 234; CHECK-NEXT: vpsrad $16, %xmm1, %xmm1 235; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 236; CHECK-NEXT: vpslld $16, %xmm0, %xmm0 237; CHECK-NEXT: vpsrad $16, %xmm0, %xmm0 238; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 239; CHECK-NEXT: retq 240 %b = trunc <8 x i32> %a to <8 x i16> 241 %c = sext <8 x i16> %b to <8 x i32> 242 ret <8 x i32> %c 243} 244