1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s 2 3; CHECK: variable_shl0 4; CHECK: psllvd 5; CHECK: ret 6define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) { 7 %k = shl <4 x i32> %x, %y 8 ret <4 x i32> %k 9} 10; CHECK: variable_shl1 11; CHECK: psllvd 12; CHECK: ret 13define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) { 14 %k = shl <8 x i32> %x, %y 15 ret <8 x i32> %k 16} 17; CHECK: variable_shl2 18; CHECK: psllvq 19; CHECK: ret 20define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) { 21 %k = shl <2 x i64> %x, %y 22 ret <2 x i64> %k 23} 24; CHECK: variable_shl3 25; CHECK: psllvq 26; CHECK: ret 27define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) { 28 %k = shl <4 x i64> %x, %y 29 ret <4 x i64> %k 30} 31; CHECK: variable_srl0 32; CHECK: psrlvd 33; CHECK: ret 34define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) { 35 %k = lshr <4 x i32> %x, %y 36 ret <4 x i32> %k 37} 38; CHECK: variable_srl1 39; CHECK: psrlvd 40; CHECK: ret 41define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) { 42 %k = lshr <8 x i32> %x, %y 43 ret <8 x i32> %k 44} 45; CHECK: variable_srl2 46; CHECK: psrlvq 47; CHECK: ret 48define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) { 49 %k = lshr <2 x i64> %x, %y 50 ret <2 x i64> %k 51} 52; CHECK: variable_srl3 53; CHECK: psrlvq 54; CHECK: ret 55define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) { 56 %k = lshr <4 x i64> %x, %y 57 ret <4 x i64> %k 58} 59 60; CHECK: variable_sra0 61; CHECK: vpsravd 62; CHECK: ret 63define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) { 64 %k = ashr <4 x i32> %x, %y 65 ret <4 x i32> %k 66} 67; CHECK: variable_sra1 68; CHECK: vpsravd 69; CHECK: ret 70define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) { 71 %k = ashr <8 x i32> %x, %y 72 ret <8 x i32> %k 73} 74 75;;; Shift left 76; CHECK: vpslld 77define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone { 78 %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 792> 80 ret <8 x i32> %s 81} 82 83; CHECK: vpsllw 84define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone { 85 %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 86 ret <16 x i16> %s 87} 88 89; CHECK: vpsllq 90define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone { 91 %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> 92 ret <4 x i64> %s 93} 94 95;;; Logical Shift right 96; CHECK: vpsrld 97define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone { 98 %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 992> 100 ret <8 x i32> %s 101} 102 103; CHECK: vpsrlw 104define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone { 105 %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 106 ret <16 x i16> %s 107} 108 109; CHECK: vpsrlq 110define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone { 111 %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> 112 ret <4 x i64> %s 113} 114 115;;; Arithmetic Shift right 116; CHECK: vpsrad 117define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone { 118 %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 1192> 120 ret <8 x i32> %s 121} 122 123; CHECK: vpsraw 124define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone { 125 %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 126 ret <16 x i16> %s 127} 128 129; CHECK: variable_sra0_load 130; CHECK: vpsravd (% 131; CHECK: ret 132define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) { 133 %y1 = load <4 x i32>, <4 x i32>* %y 134 %k = ashr <4 x i32> %x, %y1 135 ret <4 x i32> %k 136} 137 138; CHECK: variable_sra1_load 139; CHECK: vpsravd (% 140; CHECK: ret 141define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) { 142 %y1 = load <8 x i32>, <8 x i32>* %y 143 %k = ashr <8 x i32> %x, %y1 144 ret <8 x i32> %k 145} 146 147; CHECK: variable_shl0_load 148; CHECK: vpsllvd (% 149; CHECK: ret 150define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) { 151 %y1 = load <4 x i32>, <4 x i32>* %y 152 %k = shl <4 x i32> %x, %y1 153 ret <4 x i32> %k 154} 155; CHECK: variable_shl1_load 156; CHECK: vpsllvd (% 157; CHECK: ret 158define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) { 159 %y1 = load <8 x i32>, <8 x i32>* %y 160 %k = shl <8 x i32> %x, %y1 161 ret <8 x i32> %k 162} 163; CHECK: variable_shl2_load 164; CHECK: vpsllvq (% 165; CHECK: ret 166define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) { 167 %y1 = load <2 x i64>, <2 x i64>* %y 168 %k = shl <2 x i64> %x, %y1 169 ret <2 x i64> %k 170} 171; CHECK: variable_shl3_load 172; CHECK: vpsllvq (% 173; CHECK: ret 174define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) { 175 %y1 = load <4 x i64>, <4 x i64>* %y 176 %k = shl <4 x i64> %x, %y1 177 ret <4 x i64> %k 178} 179; CHECK: variable_srl0_load 180; CHECK: vpsrlvd (% 181; CHECK: ret 182define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) { 183 %y1 = load <4 x i32>, <4 x i32>* %y 184 %k = lshr <4 x i32> %x, %y1 185 ret <4 x i32> %k 186} 187; CHECK: variable_srl1_load 188; CHECK: vpsrlvd (% 189; CHECK: ret 190define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) { 191 %y1 = load <8 x i32>, <8 x i32>* %y 192 %k = lshr <8 x i32> %x, %y1 193 ret <8 x i32> %k 194} 195; CHECK: variable_srl2_load 196; CHECK: vpsrlvq (% 197; CHECK: ret 198define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) { 199 %y1 = load <2 x i64>, <2 x i64>* %y 200 %k = lshr <2 x i64> %x, %y1 201 ret <2 x i64> %k 202} 203; CHECK: variable_srl3_load 204; CHECK: vpsrlvq (% 205; CHECK: ret 206define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) { 207 %y1 = load <4 x i64>, <4 x i64>* %y 208 %k = lshr <4 x i64> %x, %y1 209 ret <4 x i64> %k 210} 211 212define <32 x i8> @shl9(<32 x i8> %A) nounwind { 213 %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 214 ret <32 x i8> %B 215; CHECK-LABEL: shl9: 216; CHECK: vpsllw $3 217; CHECK: vpand 218; CHECK: ret 219} 220 221define <32 x i8> @shr9(<32 x i8> %A) nounwind { 222 %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 223 ret <32 x i8> %B 224; CHECK-LABEL: shr9: 225; CHECK: vpsrlw $3 226; CHECK: vpand 227; CHECK: ret 228} 229 230define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind { 231 %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 232 ret <32 x i8> %B 233; CHECK-LABEL: sra_v32i8_7: 234; CHECK: vpxor 235; CHECK: vpcmpgtb 236; CHECK: ret 237} 238 239define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind { 240 %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 241 ret <32 x i8> %B 242; CHECK-LABEL: sra_v32i8: 243; CHECK: vpsrlw $3 244; CHECK: vpand 245; CHECK: vpxor 246; CHECK: vpsubb 247; CHECK: ret 248} 249 250; CHECK: _sext_v16i16 251; CHECK: vpsllw 252; CHECK: vpsraw 253; CHECK-NOT: vinsertf128 254define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind { 255 %b = trunc <16 x i16> %a to <16 x i8> 256 %c = sext <16 x i8> %b to <16 x i16> 257 ret <16 x i16> %c 258} 259 260; CHECK: _sext_v8i32 261; CHECK: vpslld 262; CHECK: vpsrad 263; CHECK-NOT: vinsertf128 264define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind { 265 %b = trunc <8 x i32> %a to <8 x i16> 266 %c = sext <8 x i16> %b to <8 x i32> 267 ret <8 x i32> %c 268} 269 270define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) { 271; CHECK-LABEL: variable_shl16: 272; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]] 273; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]] 274; CHECK: vpsllvd [[AMT]], [[LHS]], {{%ymm[0-9]+}} 275; CHECK: vpshufb 276; CHECK: vpermq 277 %res = shl <8 x i16> %lhs, %rhs 278 ret <8 x i16> %res 279} 280 281define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) { 282; CHECK-LABEL: variable_ashr16: 283; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]] 284; CHECK-DAG: vpmovsxwd %xmm0, [[LHS:%ymm[0-9]+]] 285; CHECK: vpsravd [[AMT]], [[LHS]], {{%ymm[0-9]+}} 286; CHECK: vpshufb 287; CHECK: vpermq 288 %res = ashr <8 x i16> %lhs, %rhs 289 ret <8 x i16> %res 290} 291 292define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8 x i16> %rhs) { 293; CHECK-LABEL: variable_lshr16: 294; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]] 295; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]] 296; CHECK: vpsrlvd [[AMT]], [[LHS]], {{%ymm[0-9]+}} 297; CHECK: vpshufb 298; CHECK: vpermq 299 %res = lshr <8 x i16> %lhs, %rhs 300 ret <8 x i16> %res 301}