1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLBW 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512,AVX512VBMI2 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLVBMI2 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOPAVX1 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOPAVX2 12 13declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) 14declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) 15declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) 16declare <32 x i8> @llvm.fshl.v32i8(<32 x i8>, <32 x i8>, <32 x i8>) 17 18; 19; Variable Shifts 20; 21 22define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind { 23; AVX1-LABEL: var_funnnel_v4i64: 24; AVX1: # %bb.0: 25; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 26; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm3 27; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 28; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm5 29; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3] 30; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm4 31; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7] 32; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm5 33; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] 34; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3 35; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1,2,3],xmm3[4,5,6,7] 36; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 37; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 38; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 39; AVX1-NEXT: vpsubq %xmm4, %xmm5, %xmm4 40; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [63,63] 41; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4 42; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm7 43; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3] 44; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm2 45; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm7[0,1,2,3],xmm2[4,5,6,7] 46; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1 47; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1 48; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm4 49; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 50; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 51; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7] 52; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 53; AVX1-NEXT: vorps %ymm0, %ymm3, %ymm0 54; AVX1-NEXT: retq 55; 56; AVX2-LABEL: var_funnnel_v4i64: 57; AVX2: # %bb.0: 58; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63] 59; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3 60; AVX2-NEXT: vpsllvq %ymm3, %ymm0, %ymm3 61; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 62; AVX2-NEXT: vpsubq %ymm1, %ymm4, %ymm1 63; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 64; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 65; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0 66; AVX2-NEXT: retq 67; 68; AVX512F-LABEL: var_funnnel_v4i64: 69; AVX512F: # %bb.0: 70; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 71; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 72; AVX512F-NEXT: vprolvq %zmm1, %zmm0, %zmm0 73; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 74; AVX512F-NEXT: retq 75; 76; AVX512VL-LABEL: var_funnnel_v4i64: 77; AVX512VL: # %bb.0: 78; AVX512VL-NEXT: vprolvq %ymm1, %ymm0, %ymm0 79; AVX512VL-NEXT: retq 80; 81; AVX512BW-LABEL: var_funnnel_v4i64: 82; AVX512BW: # %bb.0: 83; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 84; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 85; AVX512BW-NEXT: vprolvq %zmm1, %zmm0, %zmm0 86; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 87; AVX512BW-NEXT: retq 88; 89; AVX512VLBW-LABEL: var_funnnel_v4i64: 90; AVX512VLBW: # %bb.0: 91; AVX512VLBW-NEXT: vprolvq %ymm1, %ymm0, %ymm0 92; AVX512VLBW-NEXT: retq 93; 94; AVX512VBMI2-LABEL: var_funnnel_v4i64: 95; AVX512VBMI2: # %bb.0: 96; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 97; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 98; AVX512VBMI2-NEXT: vprolvq %zmm1, %zmm0, %zmm0 99; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 100; AVX512VBMI2-NEXT: retq 101; 102; AVX512VLVBMI2-LABEL: var_funnnel_v4i64: 103; AVX512VLVBMI2: # %bb.0: 104; AVX512VLVBMI2-NEXT: vprolvq %ymm1, %ymm0, %ymm0 105; AVX512VLVBMI2-NEXT: retq 106; 107; XOPAVX1-LABEL: var_funnnel_v4i64: 108; XOPAVX1: # %bb.0: 109; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 110; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 111; XOPAVX1-NEXT: vprotq %xmm2, %xmm3, %xmm2 112; XOPAVX1-NEXT: vprotq %xmm1, %xmm0, %xmm0 113; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 114; XOPAVX1-NEXT: retq 115; 116; XOPAVX2-LABEL: var_funnnel_v4i64: 117; XOPAVX2: # %bb.0: 118; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 119; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 120; XOPAVX2-NEXT: vprotq %xmm2, %xmm3, %xmm2 121; XOPAVX2-NEXT: vprotq %xmm1, %xmm0, %xmm0 122; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 123; XOPAVX2-NEXT: retq 124 %res = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> %amt) 125 ret <4 x i64> %res 126} 127 128define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind { 129; AVX1-LABEL: var_funnnel_v8i32: 130; AVX1: # %bb.0: 131; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 132; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [31,31,31,31] 133; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 134; AVX1-NEXT: vpslld $23, %xmm2, %xmm2 135; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1065353216,1065353216,1065353216,1065353216] 136; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2 137; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 138; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[1,1,3,3] 139; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 140; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm6[1,1,3,3] 141; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 142; AVX1-NEXT: vpmuludq %xmm2, %xmm6, %xmm2 143; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[1,1,3,3] 144; AVX1-NEXT: vpblendw {{.*#+}} xmm6 = xmm6[0,1],xmm5[2,3],xmm6[4,5],xmm5[6,7] 145; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[0,0,2,2] 146; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7] 147; AVX1-NEXT: vpor %xmm6, %xmm2, %xmm2 148; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 149; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 150; AVX1-NEXT: vpaddd %xmm4, %xmm1, %xmm1 151; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 152; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 153; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 154; AVX1-NEXT: vpmuludq %xmm3, %xmm4, %xmm3 155; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 156; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 157; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 158; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2] 159; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7] 160; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 161; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 162; AVX1-NEXT: retq 163; 164; AVX2-LABEL: var_funnnel_v8i32: 165; AVX2: # %bb.0: 166; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31] 167; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 168; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm2 169; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32] 170; AVX2-NEXT: vpsubd %ymm1, %ymm3, %ymm1 171; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 172; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 173; AVX2-NEXT: retq 174; 175; AVX512F-LABEL: var_funnnel_v8i32: 176; AVX512F: # %bb.0: 177; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 178; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 179; AVX512F-NEXT: vprolvd %zmm1, %zmm0, %zmm0 180; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 181; AVX512F-NEXT: retq 182; 183; AVX512VL-LABEL: var_funnnel_v8i32: 184; AVX512VL: # %bb.0: 185; AVX512VL-NEXT: vprolvd %ymm1, %ymm0, %ymm0 186; AVX512VL-NEXT: retq 187; 188; AVX512BW-LABEL: var_funnnel_v8i32: 189; AVX512BW: # %bb.0: 190; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 191; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 192; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0 193; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 194; AVX512BW-NEXT: retq 195; 196; AVX512VLBW-LABEL: var_funnnel_v8i32: 197; AVX512VLBW: # %bb.0: 198; AVX512VLBW-NEXT: vprolvd %ymm1, %ymm0, %ymm0 199; AVX512VLBW-NEXT: retq 200; 201; AVX512VBMI2-LABEL: var_funnnel_v8i32: 202; AVX512VBMI2: # %bb.0: 203; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 204; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 205; AVX512VBMI2-NEXT: vprolvd %zmm1, %zmm0, %zmm0 206; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 207; AVX512VBMI2-NEXT: retq 208; 209; AVX512VLVBMI2-LABEL: var_funnnel_v8i32: 210; AVX512VLVBMI2: # %bb.0: 211; AVX512VLVBMI2-NEXT: vprolvd %ymm1, %ymm0, %ymm0 212; AVX512VLVBMI2-NEXT: retq 213; 214; XOPAVX1-LABEL: var_funnnel_v8i32: 215; XOPAVX1: # %bb.0: 216; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 217; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 218; XOPAVX1-NEXT: vprotd %xmm2, %xmm3, %xmm2 219; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0 220; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 221; XOPAVX1-NEXT: retq 222; 223; XOPAVX2-LABEL: var_funnnel_v8i32: 224; XOPAVX2: # %bb.0: 225; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 226; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 227; XOPAVX2-NEXT: vprotd %xmm2, %xmm3, %xmm2 228; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0 229; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 230; XOPAVX2-NEXT: retq 231 %res = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> %amt) 232 ret <8 x i32> %res 233} 234 235define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind { 236; AVX1-LABEL: var_funnnel_v16i16: 237; AVX1: # %bb.0: 238; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 239; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] 240; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 241; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm2[4,4,5,5,6,6,7,7] 242; AVX1-NEXT: vpslld $23, %xmm4, %xmm4 243; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1065353216,1065353216,1065353216,1065353216] 244; AVX1-NEXT: vpaddd %xmm5, %xmm4, %xmm4 245; AVX1-NEXT: vcvttps2dq %xmm4, %xmm4 246; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero 247; AVX1-NEXT: vpslld $23, %xmm2, %xmm2 248; AVX1-NEXT: vpaddd %xmm5, %xmm2, %xmm2 249; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 250; AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2 251; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 252; AVX1-NEXT: vpmulhuw %xmm2, %xmm4, %xmm6 253; AVX1-NEXT: vpmullw %xmm2, %xmm4, %xmm2 254; AVX1-NEXT: vpor %xmm6, %xmm2, %xmm2 255; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 256; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4,4,5,5,6,6,7,7] 257; AVX1-NEXT: vpslld $23, %xmm3, %xmm3 258; AVX1-NEXT: vpaddd %xmm5, %xmm3, %xmm3 259; AVX1-NEXT: vcvttps2dq %xmm3, %xmm3 260; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 261; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 262; AVX1-NEXT: vpaddd %xmm5, %xmm1, %xmm1 263; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 264; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 265; AVX1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm3 266; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 267; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 268; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 269; AVX1-NEXT: retq 270; 271; AVX2-LABEL: var_funnnel_v16i16: 272; AVX2: # %bb.0: 273; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 274; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] 275; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 276; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 277; AVX2-NEXT: vpsllvd %ymm4, %ymm3, %ymm4 278; AVX2-NEXT: vpsrld $16, %ymm4, %ymm4 279; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] 280; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm5 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 281; AVX2-NEXT: vpsllvd %ymm5, %ymm0, %ymm5 282; AVX2-NEXT: vpsrld $16, %ymm5, %ymm5 283; AVX2-NEXT: vpackusdw %ymm4, %ymm5, %ymm4 284; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 285; AVX2-NEXT: vpsubw %ymm1, %ymm5, %ymm1 286; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 287; AVX2-NEXT: vpsrlvd %ymm5, %ymm3, %ymm3 288; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3 289; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 290; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 291; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 292; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 293; AVX2-NEXT: vpor %ymm0, %ymm4, %ymm0 294; AVX2-NEXT: retq 295; 296; AVX512F-LABEL: var_funnnel_v16i16: 297; AVX512F: # %bb.0: 298; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 299; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 300; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 301; AVX512F-NEXT: vpsllvd %zmm2, %zmm0, %zmm2 302; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 303; AVX512F-NEXT: vpsubw %ymm1, %ymm3, %ymm1 304; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 305; AVX512F-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 306; AVX512F-NEXT: vpord %zmm0, %zmm2, %zmm0 307; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 308; AVX512F-NEXT: retq 309; 310; AVX512VL-LABEL: var_funnnel_v16i16: 311; AVX512VL: # %bb.0: 312; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 313; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 314; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 315; AVX512VL-NEXT: vpsllvd %zmm2, %zmm0, %zmm2 316; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 317; AVX512VL-NEXT: vpsubw %ymm1, %ymm3, %ymm1 318; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 319; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 320; AVX512VL-NEXT: vpord %zmm0, %zmm2, %zmm0 321; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 322; AVX512VL-NEXT: retq 323; 324; AVX512BW-LABEL: var_funnnel_v16i16: 325; AVX512BW: # %bb.0: 326; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 327; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 328; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 329; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 330; AVX512BW-NEXT: vpsubw %ymm1, %ymm3, %ymm1 331; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 332; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0 333; AVX512BW-NEXT: retq 334; 335; AVX512VLBW-LABEL: var_funnnel_v16i16: 336; AVX512VLBW: # %bb.0: 337; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 338; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 339; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 340; AVX512VLBW-NEXT: vpsubw %ymm1, %ymm3, %ymm1 341; AVX512VLBW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 342; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0 343; AVX512VLBW-NEXT: retq 344; 345; AVX512VBMI2-LABEL: var_funnnel_v16i16: 346; AVX512VBMI2: # %bb.0: 347; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 348; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 349; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 350; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 351; AVX512VBMI2-NEXT: retq 352; 353; AVX512VLVBMI2-LABEL: var_funnnel_v16i16: 354; AVX512VLVBMI2: # %bb.0: 355; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0 356; AVX512VLVBMI2-NEXT: retq 357; 358; XOPAVX1-LABEL: var_funnnel_v16i16: 359; XOPAVX1: # %bb.0: 360; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 361; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 362; XOPAVX1-NEXT: vprotw %xmm2, %xmm3, %xmm2 363; XOPAVX1-NEXT: vprotw %xmm1, %xmm0, %xmm0 364; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 365; XOPAVX1-NEXT: retq 366; 367; XOPAVX2-LABEL: var_funnnel_v16i16: 368; XOPAVX2: # %bb.0: 369; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 370; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 371; XOPAVX2-NEXT: vprotw %xmm2, %xmm3, %xmm2 372; XOPAVX2-NEXT: vprotw %xmm1, %xmm0, %xmm0 373; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 374; XOPAVX2-NEXT: retq 375 %res = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> %amt) 376 ret <16 x i16> %res 377} 378 379define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind { 380; AVX1-LABEL: var_funnnel_v32i8: 381; AVX1: # %bb.0: 382; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 383; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3 384; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 385; AVX1-NEXT: vpandn %xmm3, %xmm4, %xmm3 386; AVX1-NEXT: vpsllw $4, %xmm2, %xmm5 387; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm5 388; AVX1-NEXT: vpor %xmm3, %xmm5, %xmm3 389; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 390; AVX1-NEXT: vpsllw $5, %xmm5, %xmm5 391; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 392; AVX1-NEXT: vpsrlw $6, %xmm2, %xmm3 393; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 394; AVX1-NEXT: vpandn %xmm3, %xmm6, %xmm3 395; AVX1-NEXT: vpsllw $2, %xmm2, %xmm7 396; AVX1-NEXT: vpand %xmm6, %xmm7, %xmm7 397; AVX1-NEXT: vpor %xmm3, %xmm7, %xmm3 398; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 399; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 400; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm3 401; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 402; AVX1-NEXT: vpand %xmm3, %xmm8, %xmm3 403; AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm7 404; AVX1-NEXT: vpor %xmm3, %xmm7, %xmm3 405; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 406; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 407; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3 408; AVX1-NEXT: vpandn %xmm3, %xmm4, %xmm3 409; AVX1-NEXT: vpsllw $4, %xmm0, %xmm5 410; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm4 411; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3 412; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 413; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 414; AVX1-NEXT: vpsrlw $6, %xmm0, %xmm3 415; AVX1-NEXT: vpandn %xmm3, %xmm6, %xmm3 416; AVX1-NEXT: vpsllw $2, %xmm0, %xmm4 417; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4 418; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3 419; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 420; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 421; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm3 422; AVX1-NEXT: vpand %xmm3, %xmm8, %xmm3 423; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm4 424; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3 425; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 426; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 427; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 428; AVX1-NEXT: retq 429; 430; AVX2-LABEL: var_funnnel_v32i8: 431; AVX2: # %bb.0: 432; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2 433; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 434; AVX2-NEXT: vpsllw $4, %ymm0, %ymm3 435; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 436; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2 437; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 438; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 439; AVX2-NEXT: vpsrlw $6, %ymm0, %ymm2 440; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 441; AVX2-NEXT: vpsllw $2, %ymm0, %ymm3 442; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 443; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2 444; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 445; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 446; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 447; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm3 448; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 449; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2 450; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 451; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 452; AVX2-NEXT: retq 453; 454; AVX512F-LABEL: var_funnnel_v32i8: 455; AVX512F: # %bb.0: 456; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2 457; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 458; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm3 459; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 460; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2 461; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1 462; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 463; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm2 464; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 465; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3 466; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 467; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2 468; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1 469; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 470; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm2 471; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm3 472; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 473; AVX512F-NEXT: vpor %ymm3, %ymm2, %ymm2 474; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1 475; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 476; AVX512F-NEXT: retq 477; 478; AVX512VL-LABEL: var_funnnel_v32i8: 479; AVX512VL: # %bb.0: 480; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 481; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3 482; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3 483; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1 484; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 485; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2 486; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3 487; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3 488; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 489; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 490; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2 491; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm3 492; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %ymm2, %ymm3 493; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 494; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 495; AVX512VL-NEXT: retq 496; 497; AVX512BW-LABEL: var_funnnel_v32i8: 498; AVX512BW: # %bb.0: 499; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 500; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm3 501; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero 502; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 503; AVX512BW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3 504; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 505; AVX512BW-NEXT: vpsubb %ymm1, %ymm4, %ymm1 506; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1 507; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 508; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 509; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 510; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 511; AVX512BW-NEXT: retq 512; 513; AVX512VLBW-LABEL: var_funnnel_v32i8: 514; AVX512VLBW: # %bb.0: 515; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 516; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm3 517; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero 518; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 519; AVX512VLBW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3 520; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 521; AVX512VLBW-NEXT: vpsubb %ymm1, %ymm4, %ymm1 522; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm1 523; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 524; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 525; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 526; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0 527; AVX512VLBW-NEXT: retq 528; 529; AVX512VBMI2-LABEL: var_funnnel_v32i8: 530; AVX512VBMI2: # %bb.0: 531; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 532; AVX512VBMI2-NEXT: vpand %ymm2, %ymm1, %ymm3 533; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero 534; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 535; AVX512VBMI2-NEXT: vpsllvw %zmm3, %zmm0, %zmm3 536; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4 537; AVX512VBMI2-NEXT: vpsubb %ymm1, %ymm4, %ymm1 538; AVX512VBMI2-NEXT: vpand %ymm2, %ymm1, %ymm1 539; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 540; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 541; AVX512VBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0 542; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0 543; AVX512VBMI2-NEXT: retq 544; 545; AVX512VLVBMI2-LABEL: var_funnnel_v32i8: 546; AVX512VLVBMI2: # %bb.0: 547; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 548; AVX512VLVBMI2-NEXT: vpand %ymm2, %ymm1, %ymm3 549; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero 550; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 551; AVX512VLVBMI2-NEXT: vpsllvw %zmm3, %zmm0, %zmm3 552; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4 553; AVX512VLVBMI2-NEXT: vpsubb %ymm1, %ymm4, %ymm1 554; AVX512VLVBMI2-NEXT: vpand %ymm2, %ymm1, %ymm1 555; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 556; AVX512VLVBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 557; AVX512VLVBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0 558; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0 559; AVX512VLVBMI2-NEXT: retq 560; 561; XOPAVX1-LABEL: var_funnnel_v32i8: 562; XOPAVX1: # %bb.0: 563; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 564; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 565; XOPAVX1-NEXT: vprotb %xmm2, %xmm3, %xmm2 566; XOPAVX1-NEXT: vprotb %xmm1, %xmm0, %xmm0 567; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 568; XOPAVX1-NEXT: retq 569; 570; XOPAVX2-LABEL: var_funnnel_v32i8: 571; XOPAVX2: # %bb.0: 572; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 573; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 574; XOPAVX2-NEXT: vprotb %xmm2, %xmm3, %xmm2 575; XOPAVX2-NEXT: vprotb %xmm1, %xmm0, %xmm0 576; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 577; XOPAVX2-NEXT: retq 578 %res = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> %amt) 579 ret <32 x i8> %res 580} 581 582; 583; Uniform Variable Shifts 584; 585 586define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind { 587; AVX1-LABEL: splatvar_funnnel_v4i64: 588; AVX1: # %bb.0: 589; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] 590; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 591; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 592; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] 593; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 594; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 595; AVX1-NEXT: vpsrlq %xmm2, %xmm4, %xmm5 596; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3] 597; AVX1-NEXT: vpsrlq %xmm6, %xmm4, %xmm7 598; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm7[4,5,6,7] 599; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm2 600; AVX1-NEXT: vpsrlq %xmm6, %xmm0, %xmm6 601; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7] 602; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2 603; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 604; AVX1-NEXT: vpsllq %xmm1, %xmm4, %xmm3 605; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 606; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 607; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 608; AVX1-NEXT: retq 609; 610; AVX2-LABEL: splatvar_funnnel_v4i64: 611; AVX2: # %bb.0: 612; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] 613; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 614; AVX2-NEXT: vpsllq %xmm3, %ymm0, %ymm3 615; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 616; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1 617; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 618; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 619; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0 620; AVX2-NEXT: retq 621; 622; AVX512F-LABEL: splatvar_funnnel_v4i64: 623; AVX512F: # %bb.0: 624; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 625; AVX512F-NEXT: vpbroadcastq %xmm1, %ymm1 626; AVX512F-NEXT: vprolvq %zmm1, %zmm0, %zmm0 627; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 628; AVX512F-NEXT: retq 629; 630; AVX512VL-LABEL: splatvar_funnnel_v4i64: 631; AVX512VL: # %bb.0: 632; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1 633; AVX512VL-NEXT: vprolvq %ymm1, %ymm0, %ymm0 634; AVX512VL-NEXT: retq 635; 636; AVX512BW-LABEL: splatvar_funnnel_v4i64: 637; AVX512BW: # %bb.0: 638; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 639; AVX512BW-NEXT: vpbroadcastq %xmm1, %ymm1 640; AVX512BW-NEXT: vprolvq %zmm1, %zmm0, %zmm0 641; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 642; AVX512BW-NEXT: retq 643; 644; AVX512VLBW-LABEL: splatvar_funnnel_v4i64: 645; AVX512VLBW: # %bb.0: 646; AVX512VLBW-NEXT: vpbroadcastq %xmm1, %ymm1 647; AVX512VLBW-NEXT: vprolvq %ymm1, %ymm0, %ymm0 648; AVX512VLBW-NEXT: retq 649; 650; AVX512VBMI2-LABEL: splatvar_funnnel_v4i64: 651; AVX512VBMI2: # %bb.0: 652; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 653; AVX512VBMI2-NEXT: vpbroadcastq %xmm1, %ymm1 654; AVX512VBMI2-NEXT: vprolvq %zmm1, %zmm0, %zmm0 655; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 656; AVX512VBMI2-NEXT: retq 657; 658; AVX512VLVBMI2-LABEL: splatvar_funnnel_v4i64: 659; AVX512VLVBMI2: # %bb.0: 660; AVX512VLVBMI2-NEXT: vpbroadcastq %xmm1, %ymm1 661; AVX512VLVBMI2-NEXT: vprolvq %ymm1, %ymm0, %ymm0 662; AVX512VLVBMI2-NEXT: retq 663; 664; XOPAVX1-LABEL: splatvar_funnnel_v4i64: 665; XOPAVX1: # %bb.0: 666; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 667; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 668; XOPAVX1-NEXT: vprotq %xmm1, %xmm2, %xmm2 669; XOPAVX1-NEXT: vprotq %xmm1, %xmm0, %xmm0 670; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 671; XOPAVX1-NEXT: retq 672; 673; XOPAVX2-LABEL: splatvar_funnnel_v4i64: 674; XOPAVX2: # %bb.0: 675; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 676; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1 677; XOPAVX2-NEXT: vprotq %xmm1, %xmm2, %xmm2 678; XOPAVX2-NEXT: vprotq %xmm1, %xmm0, %xmm0 679; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 680; XOPAVX2-NEXT: retq 681 %splat = shufflevector <4 x i64> %amt, <4 x i64> undef, <4 x i32> zeroinitializer 682 %res = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> %splat) 683 ret <4 x i64> %res 684} 685 686define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind { 687; AVX1-LABEL: splatvar_funnnel_v8i32: 688; AVX1: # %bb.0: 689; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 690; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 691; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 692; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero 693; AVX1-NEXT: vpslld %xmm3, %xmm2, %xmm4 694; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [32,32,32,32] 695; AVX1-NEXT: vpsubd %xmm1, %xmm5, %xmm1 696; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 697; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 698; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2 699; AVX1-NEXT: vpslld %xmm3, %xmm0, %xmm3 700; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 701; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 702; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 703; AVX1-NEXT: retq 704; 705; AVX2-LABEL: splatvar_funnnel_v8i32: 706; AVX2: # %bb.0: 707; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 708; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] 709; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 710; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero 711; AVX2-NEXT: vpslld %xmm2, %ymm0, %ymm2 712; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] 713; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 714; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 715; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0 716; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 717; AVX2-NEXT: retq 718; 719; AVX512F-LABEL: splatvar_funnnel_v8i32: 720; AVX512F: # %bb.0: 721; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 722; AVX512F-NEXT: vpbroadcastd %xmm1, %ymm1 723; AVX512F-NEXT: vprolvd %zmm1, %zmm0, %zmm0 724; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 725; AVX512F-NEXT: retq 726; 727; AVX512VL-LABEL: splatvar_funnnel_v8i32: 728; AVX512VL: # %bb.0: 729; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1 730; AVX512VL-NEXT: vprolvd %ymm1, %ymm0, %ymm0 731; AVX512VL-NEXT: retq 732; 733; AVX512BW-LABEL: splatvar_funnnel_v8i32: 734; AVX512BW: # %bb.0: 735; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 736; AVX512BW-NEXT: vpbroadcastd %xmm1, %ymm1 737; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0 738; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 739; AVX512BW-NEXT: retq 740; 741; AVX512VLBW-LABEL: splatvar_funnnel_v8i32: 742; AVX512VLBW: # %bb.0: 743; AVX512VLBW-NEXT: vpbroadcastd %xmm1, %ymm1 744; AVX512VLBW-NEXT: vprolvd %ymm1, %ymm0, %ymm0 745; AVX512VLBW-NEXT: retq 746; 747; AVX512VBMI2-LABEL: splatvar_funnnel_v8i32: 748; AVX512VBMI2: # %bb.0: 749; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 750; AVX512VBMI2-NEXT: vpbroadcastd %xmm1, %ymm1 751; AVX512VBMI2-NEXT: vprolvd %zmm1, %zmm0, %zmm0 752; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 753; AVX512VBMI2-NEXT: retq 754; 755; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i32: 756; AVX512VLVBMI2: # %bb.0: 757; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm1, %ymm1 758; AVX512VLVBMI2-NEXT: vprolvd %ymm1, %ymm0, %ymm0 759; AVX512VLVBMI2-NEXT: retq 760; 761; XOPAVX1-LABEL: splatvar_funnnel_v8i32: 762; XOPAVX1: # %bb.0: 763; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 764; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 765; XOPAVX1-NEXT: vprotd %xmm1, %xmm2, %xmm2 766; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0 767; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 768; XOPAVX1-NEXT: retq 769; 770; XOPAVX2-LABEL: splatvar_funnnel_v8i32: 771; XOPAVX2: # %bb.0: 772; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 773; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1 774; XOPAVX2-NEXT: vprotd %xmm1, %xmm2, %xmm2 775; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0 776; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 777; XOPAVX2-NEXT: retq 778 %splat = shufflevector <8 x i32> %amt, <8 x i32> undef, <8 x i32> zeroinitializer 779 %res = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> %splat) 780 ret <8 x i32> %res 781} 782 783define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind { 784; AVX1-LABEL: splatvar_funnnel_v16i16: 785; AVX1: # %bb.0: 786; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 787; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 788; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 789; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 790; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 791; AVX1-NEXT: vpsllw %xmm3, %xmm2, %xmm4 792; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16] 793; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1 794; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 795; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 796; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2 797; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3 798; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 799; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 800; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 801; AVX1-NEXT: retq 802; 803; AVX2-LABEL: splatvar_funnnel_v16i16: 804; AVX2: # %bb.0: 805; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 806; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 807; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 808; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2 809; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] 810; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 811; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 812; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 813; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 814; AVX2-NEXT: retq 815; 816; AVX512F-LABEL: splatvar_funnnel_v16i16: 817; AVX512F: # %bb.0: 818; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1 819; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 820; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 821; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2 822; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] 823; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 824; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 825; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 826; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 827; AVX512F-NEXT: retq 828; 829; AVX512VL-LABEL: splatvar_funnnel_v16i16: 830; AVX512VL: # %bb.0: 831; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1 832; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 833; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 834; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2 835; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] 836; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 837; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 838; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 839; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 840; AVX512VL-NEXT: retq 841; 842; AVX512BW-LABEL: splatvar_funnnel_v16i16: 843; AVX512BW: # %bb.0: 844; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1 845; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 846; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 847; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 848; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] 849; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 850; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 851; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 852; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0 853; AVX512BW-NEXT: retq 854; 855; AVX512VLBW-LABEL: splatvar_funnnel_v16i16: 856; AVX512VLBW: # %bb.0: 857; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1 858; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 859; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 860; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 861; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] 862; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 863; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 864; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 865; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0 866; AVX512VLBW-NEXT: retq 867; 868; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16: 869; AVX512VBMI2: # %bb.0: 870; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 871; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %ymm1 872; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 873; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 874; AVX512VBMI2-NEXT: retq 875; 876; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i16: 877; AVX512VLVBMI2: # %bb.0: 878; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %ymm1 879; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0 880; AVX512VLVBMI2-NEXT: retq 881; 882; XOPAVX1-LABEL: splatvar_funnnel_v16i16: 883; XOPAVX1: # %bb.0: 884; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 885; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 886; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 887; XOPAVX1-NEXT: vprotw %xmm1, %xmm2, %xmm2 888; XOPAVX1-NEXT: vprotw %xmm1, %xmm0, %xmm0 889; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 890; XOPAVX1-NEXT: retq 891; 892; XOPAVX2-LABEL: splatvar_funnnel_v16i16: 893; XOPAVX2: # %bb.0: 894; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 895; XOPAVX2-NEXT: vpbroadcastw %xmm1, %xmm1 896; XOPAVX2-NEXT: vprotw %xmm1, %xmm2, %xmm2 897; XOPAVX2-NEXT: vprotw %xmm1, %xmm0, %xmm0 898; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 899; XOPAVX2-NEXT: retq 900 %splat = shufflevector <16 x i16> %amt, <16 x i16> undef, <16 x i32> zeroinitializer 901 %res = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> %splat) 902 ret <16 x i16> %res 903} 904 905define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind { 906; AVX1-LABEL: splatvar_funnnel_v32i8: 907; AVX1: # %bb.0: 908; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 909; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 910; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 911; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 912; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 913; AVX1-NEXT: vpsllw %xmm3, %xmm4, %xmm5 914; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6 915; AVX1-NEXT: vpsllw %xmm3, %xmm6, %xmm7 916; AVX1-NEXT: vpshufb %xmm2, %xmm7, %xmm2 917; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm5 918; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] 919; AVX1-NEXT: vpsubb %xmm1, %xmm7, %xmm1 920; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 921; AVX1-NEXT: vpsrlw %xmm1, %xmm4, %xmm4 922; AVX1-NEXT: vpsrlw %xmm1, %xmm6, %xmm6 923; AVX1-NEXT: vpshufb {{.*#+}} xmm6 = xmm6[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 924; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4 925; AVX1-NEXT: vpor %xmm4, %xmm5, %xmm4 926; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3 927; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2 928; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 929; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0 930; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0 931; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 932; AVX1-NEXT: retq 933; 934; AVX2-LABEL: splatvar_funnnel_v32i8: 935; AVX2: # %bb.0: 936; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 937; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 938; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 939; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm3 940; AVX2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 941; AVX2-NEXT: vpsllw %xmm2, %xmm4, %xmm2 942; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 943; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm2 944; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] 945; AVX2-NEXT: vpsubb %xmm1, %xmm3, %xmm1 946; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 947; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 948; AVX2-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 949; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1 950; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 951; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 952; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 953; AVX2-NEXT: retq 954; 955; AVX512F-LABEL: splatvar_funnnel_v32i8: 956; AVX512F: # %bb.0: 957; AVX512F-NEXT: vpbroadcastb %xmm1, %xmm1 958; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 959; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 960; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm3 961; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 962; AVX512F-NEXT: vpsllw %xmm2, %xmm4, %xmm2 963; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 964; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm2 965; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] 966; AVX512F-NEXT: vpsubb %xmm1, %xmm3, %xmm1 967; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 968; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 969; AVX512F-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 970; AVX512F-NEXT: vpsrlw $8, %xmm1, %xmm1 971; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm1 972; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 973; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 974; AVX512F-NEXT: retq 975; 976; AVX512VL-LABEL: splatvar_funnnel_v32i8: 977; AVX512VL: # %bb.0: 978; AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1 979; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 980; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 981; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm3 982; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 983; AVX512VL-NEXT: vpsllw %xmm2, %xmm4, %xmm2 984; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 985; AVX512VL-NEXT: vpand %ymm2, %ymm3, %ymm2 986; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] 987; AVX512VL-NEXT: vpsubb %xmm1, %xmm3, %xmm1 988; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 989; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm3 990; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm0 991; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 992; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0 993; AVX512VL-NEXT: vpternlogq $236, %ymm3, %ymm2, %ymm0 994; AVX512VL-NEXT: retq 995; 996; AVX512BW-LABEL: splatvar_funnnel_v32i8: 997; AVX512BW: # %bb.0: 998; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 999; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 1000; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 1001; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero 1002; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1003; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm3 1004; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 1005; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1 1006; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 1007; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1008; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 1009; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 1010; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 1011; AVX512BW-NEXT: retq 1012; 1013; AVX512VLBW-LABEL: splatvar_funnnel_v32i8: 1014; AVX512VLBW: # %bb.0: 1015; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1 1016; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 1017; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 1018; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero 1019; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1020; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm3 1021; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 1022; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1 1023; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 1024; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1025; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 1026; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 1027; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0 1028; AVX512VLBW-NEXT: retq 1029; 1030; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8: 1031; AVX512VBMI2: # %bb.0: 1032; AVX512VBMI2-NEXT: vpbroadcastb %xmm1, %xmm1 1033; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 1034; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3 1035; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero 1036; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1037; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm3 1038; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4 1039; AVX512VBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1 1040; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1 1041; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1042; AVX512VBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 1043; AVX512VBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0 1044; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0 1045; AVX512VBMI2-NEXT: retq 1046; 1047; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8: 1048; AVX512VLVBMI2: # %bb.0: 1049; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm1, %xmm1 1050; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 1051; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3 1052; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero 1053; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1054; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm3 1055; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4 1056; AVX512VLVBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1 1057; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1 1058; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1059; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 1060; AVX512VLVBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0 1061; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0 1062; AVX512VLVBMI2-NEXT: retq 1063; 1064; XOPAVX1-LABEL: splatvar_funnnel_v32i8: 1065; XOPAVX1: # %bb.0: 1066; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1067; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1068; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1069; XOPAVX1-NEXT: vprotb %xmm1, %xmm2, %xmm2 1070; XOPAVX1-NEXT: vprotb %xmm1, %xmm0, %xmm0 1071; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1072; XOPAVX1-NEXT: retq 1073; 1074; XOPAVX2-LABEL: splatvar_funnnel_v32i8: 1075; XOPAVX2: # %bb.0: 1076; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 1077; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1 1078; XOPAVX2-NEXT: vprotb %xmm1, %xmm2, %xmm2 1079; XOPAVX2-NEXT: vprotb %xmm1, %xmm0, %xmm0 1080; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1081; XOPAVX2-NEXT: retq 1082 %splat = shufflevector <32 x i8> %amt, <32 x i8> undef, <32 x i32> zeroinitializer 1083 %res = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> %splat) 1084 ret <32 x i8> %res 1085} 1086 1087; 1088; Constant Shifts 1089; 1090 1091define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x) nounwind { 1092; AVX1-LABEL: constant_funnnel_v4i64: 1093; AVX1: # %bb.0: 1094; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1095; AVX1-NEXT: vpsrlq $4, %xmm1, %xmm2 1096; AVX1-NEXT: vpsrlq $14, %xmm1, %xmm3 1097; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 1098; AVX1-NEXT: vpsrlq $50, %xmm0, %xmm3 1099; AVX1-NEXT: vpsrlq $60, %xmm0, %xmm4 1100; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7] 1101; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1102; AVX1-NEXT: vpsllq $60, %xmm1, %xmm3 1103; AVX1-NEXT: vpsllq $50, %xmm1, %xmm1 1104; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 1105; AVX1-NEXT: vpsllq $14, %xmm0, %xmm3 1106; AVX1-NEXT: vpsllq $4, %xmm0, %xmm0 1107; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] 1108; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1109; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 1110; AVX1-NEXT: retq 1111; 1112; AVX2-LABEL: constant_funnnel_v4i64: 1113; AVX2: # %bb.0: 1114; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm1 1115; AVX2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 1116; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1117; AVX2-NEXT: retq 1118; 1119; AVX512F-LABEL: constant_funnnel_v4i64: 1120; AVX512F: # %bb.0: 1121; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1122; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4,14,50,60] 1123; AVX512F-NEXT: vprolvq %zmm1, %zmm0, %zmm0 1124; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1125; AVX512F-NEXT: retq 1126; 1127; AVX512VL-LABEL: constant_funnnel_v4i64: 1128; AVX512VL: # %bb.0: 1129; AVX512VL-NEXT: vprolvq {{.*}}(%rip), %ymm0, %ymm0 1130; AVX512VL-NEXT: retq 1131; 1132; AVX512BW-LABEL: constant_funnnel_v4i64: 1133; AVX512BW: # %bb.0: 1134; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1135; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [4,14,50,60] 1136; AVX512BW-NEXT: vprolvq %zmm1, %zmm0, %zmm0 1137; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1138; AVX512BW-NEXT: retq 1139; 1140; AVX512VLBW-LABEL: constant_funnnel_v4i64: 1141; AVX512VLBW: # %bb.0: 1142; AVX512VLBW-NEXT: vprolvq {{.*}}(%rip), %ymm0, %ymm0 1143; AVX512VLBW-NEXT: retq 1144; 1145; AVX512VBMI2-LABEL: constant_funnnel_v4i64: 1146; AVX512VBMI2: # %bb.0: 1147; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1148; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,14,50,60] 1149; AVX512VBMI2-NEXT: vprolvq %zmm1, %zmm0, %zmm0 1150; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1151; AVX512VBMI2-NEXT: retq 1152; 1153; AVX512VLVBMI2-LABEL: constant_funnnel_v4i64: 1154; AVX512VLVBMI2: # %bb.0: 1155; AVX512VLVBMI2-NEXT: vprolvq {{.*}}(%rip), %ymm0, %ymm0 1156; AVX512VLVBMI2-NEXT: retq 1157; 1158; XOPAVX1-LABEL: constant_funnnel_v4i64: 1159; XOPAVX1: # %bb.0: 1160; XOPAVX1-NEXT: vprotq {{.*}}(%rip), %xmm0, %xmm1 1161; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1162; XOPAVX1-NEXT: vprotq {{.*}}(%rip), %xmm0, %xmm0 1163; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1164; XOPAVX1-NEXT: retq 1165; 1166; XOPAVX2-LABEL: constant_funnnel_v4i64: 1167; XOPAVX2: # %bb.0: 1168; XOPAVX2-NEXT: vprotq {{.*}}(%rip), %xmm0, %xmm1 1169; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1170; XOPAVX2-NEXT: vprotq {{.*}}(%rip), %xmm0, %xmm0 1171; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1172; XOPAVX2-NEXT: retq 1173 %res = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> <i64 4, i64 14, i64 50, i64 60>) 1174 ret <4 x i64> %res 1175} 1176 1177define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x) nounwind { 1178; AVX1-LABEL: constant_funnnel_v8i32: 1179; AVX1: # %bb.0: 1180; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [256,512,1024,2048] 1181; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1182; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1183; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1184; AVX1-NEXT: vpmuludq %xmm2, %xmm4, %xmm2 1185; AVX1-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 1186; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 1187; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 1188; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2] 1189; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 1190; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1 1191; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [16,32,64,128] 1192; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 1193; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 1194; AVX1-NEXT: vpmuludq %xmm3, %xmm4, %xmm3 1195; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm0 1196; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1197; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 1198; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2] 1199; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7] 1200; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1201; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1202; AVX1-NEXT: retq 1203; 1204; AVX2-LABEL: constant_funnnel_v8i32: 1205; AVX2: # %bb.0: 1206; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm1 1207; AVX2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 1208; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1209; AVX2-NEXT: retq 1210; 1211; AVX512F-LABEL: constant_funnnel_v8i32: 1212; AVX512F: # %bb.0: 1213; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1214; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,6,7,8,9,10,11] 1215; AVX512F-NEXT: vprolvd %zmm1, %zmm0, %zmm0 1216; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1217; AVX512F-NEXT: retq 1218; 1219; AVX512VL-LABEL: constant_funnnel_v8i32: 1220; AVX512VL: # %bb.0: 1221; AVX512VL-NEXT: vprolvd {{.*}}(%rip), %ymm0, %ymm0 1222; AVX512VL-NEXT: retq 1223; 1224; AVX512BW-LABEL: constant_funnnel_v8i32: 1225; AVX512BW: # %bb.0: 1226; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1227; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,6,7,8,9,10,11] 1228; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0 1229; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1230; AVX512BW-NEXT: retq 1231; 1232; AVX512VLBW-LABEL: constant_funnnel_v8i32: 1233; AVX512VLBW: # %bb.0: 1234; AVX512VLBW-NEXT: vprolvd {{.*}}(%rip), %ymm0, %ymm0 1235; AVX512VLBW-NEXT: retq 1236; 1237; AVX512VBMI2-LABEL: constant_funnnel_v8i32: 1238; AVX512VBMI2: # %bb.0: 1239; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1240; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,6,7,8,9,10,11] 1241; AVX512VBMI2-NEXT: vprolvd %zmm1, %zmm0, %zmm0 1242; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1243; AVX512VBMI2-NEXT: retq 1244; 1245; AVX512VLVBMI2-LABEL: constant_funnnel_v8i32: 1246; AVX512VLVBMI2: # %bb.0: 1247; AVX512VLVBMI2-NEXT: vprolvd {{.*}}(%rip), %ymm0, %ymm0 1248; AVX512VLVBMI2-NEXT: retq 1249; 1250; XOPAVX1-LABEL: constant_funnnel_v8i32: 1251; XOPAVX1: # %bb.0: 1252; XOPAVX1-NEXT: vprotd {{.*}}(%rip), %xmm0, %xmm1 1253; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1254; XOPAVX1-NEXT: vprotd {{.*}}(%rip), %xmm0, %xmm0 1255; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1256; XOPAVX1-NEXT: retq 1257; 1258; XOPAVX2-LABEL: constant_funnnel_v8i32: 1259; XOPAVX2: # %bb.0: 1260; XOPAVX2-NEXT: vprotd {{.*}}(%rip), %xmm0, %xmm1 1261; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1262; XOPAVX2-NEXT: vprotd {{.*}}(%rip), %xmm0, %xmm0 1263; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1264; XOPAVX2-NEXT: retq 1265 %res = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>) 1266 ret <8 x i32> %res 1267} 1268 1269define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x) nounwind { 1270; AVX1-LABEL: constant_funnnel_v16i16: 1271; AVX1: # %bb.0: 1272; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1273; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768] 1274; AVX1-NEXT: vpmulhuw %xmm2, %xmm1, %xmm3 1275; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1 1276; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1 1277; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128] 1278; AVX1-NEXT: vpmulhuw %xmm2, %xmm0, %xmm3 1279; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0 1280; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 1281; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1282; AVX1-NEXT: retq 1283; 1284; AVX2-LABEL: constant_funnnel_v16i16: 1285; AVX2: # %bb.0: 1286; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 1287; AVX2-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 1288; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1289; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 1290; AVX2-NEXT: retq 1291; 1292; AVX512F-LABEL: constant_funnnel_v16i16: 1293; AVX512F: # %bb.0: 1294; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 1295; AVX512F-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 1296; AVX512F-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1297; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0 1298; AVX512F-NEXT: retq 1299; 1300; AVX512VL-LABEL: constant_funnnel_v16i16: 1301; AVX512VL: # %bb.0: 1302; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 1303; AVX512VL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 1304; AVX512VL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1305; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0 1306; AVX512VL-NEXT: retq 1307; 1308; AVX512BW-LABEL: constant_funnnel_v16i16: 1309; AVX512BW: # %bb.0: 1310; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1311; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1] 1312; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1 1313; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1314; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 1315; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 1316; AVX512BW-NEXT: retq 1317; 1318; AVX512VLBW-LABEL: constant_funnnel_v16i16: 1319; AVX512VLBW: # %bb.0: 1320; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm1 1321; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0 1322; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 1323; AVX512VLBW-NEXT: retq 1324; 1325; AVX512VBMI2-LABEL: constant_funnnel_v16i16: 1326; AVX512VBMI2: # %bb.0: 1327; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1328; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1329; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 1330; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1331; AVX512VBMI2-NEXT: retq 1332; 1333; AVX512VLVBMI2-LABEL: constant_funnnel_v16i16: 1334; AVX512VLVBMI2: # %bb.0: 1335; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %ymm0, %ymm0 1336; AVX512VLVBMI2-NEXT: retq 1337; 1338; XOPAVX1-LABEL: constant_funnnel_v16i16: 1339; XOPAVX1: # %bb.0: 1340; XOPAVX1-NEXT: vprotw {{.*}}(%rip), %xmm0, %xmm1 1341; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1342; XOPAVX1-NEXT: vprotw {{.*}}(%rip), %xmm0, %xmm0 1343; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1344; XOPAVX1-NEXT: retq 1345; 1346; XOPAVX2-LABEL: constant_funnnel_v16i16: 1347; XOPAVX2: # %bb.0: 1348; XOPAVX2-NEXT: vprotw {{.*}}(%rip), %xmm0, %xmm1 1349; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1350; XOPAVX2-NEXT: vprotw {{.*}}(%rip), %xmm0, %xmm0 1351; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1352; XOPAVX2-NEXT: retq 1353 %res = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>) 1354 ret <16 x i16> %res 1355} 1356 1357define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind { 1358; AVX1-LABEL: constant_funnnel_v32i8: 1359; AVX1: # %bb.0: 1360; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1361; AVX1-NEXT: vpxor %xmm8, %xmm8, %xmm8 1362; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm8[8],xmm1[9],xmm8[9],xmm1[10],xmm8[10],xmm1[11],xmm8[11],xmm1[12],xmm8[12],xmm1[13],xmm8[13],xmm1[14],xmm8[14],xmm1[15],xmm8[15] 1363; AVX1-NEXT: vmovdqa {{.*#+}} xmm9 = [256,128,64,32,16,8,4,2] 1364; AVX1-NEXT: vpmullw %xmm3, %xmm9, %xmm3 1365; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 1366; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 1367; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [256,2,4,8,16,32,64,128] 1368; AVX1-NEXT: vpmullw %xmm6, %xmm5, %xmm7 1369; AVX1-NEXT: vpsrlw $8, %xmm7, %xmm7 1370; AVX1-NEXT: vpackuswb %xmm3, %xmm7, %xmm3 1371; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1372; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [1,128,64,32,16,8,4,2] 1373; AVX1-NEXT: vpmullw %xmm7, %xmm1, %xmm1 1374; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 1375; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 1376; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128] 1377; AVX1-NEXT: vpmullw %xmm4, %xmm5, %xmm5 1378; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm5 1379; AVX1-NEXT: vpackuswb %xmm1, %xmm5, %xmm1 1380; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1 1381; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm8[8],xmm0[9],xmm8[9],xmm0[10],xmm8[10],xmm0[11],xmm8[11],xmm0[12],xmm8[12],xmm0[13],xmm8[13],xmm0[14],xmm8[14],xmm0[15],xmm8[15] 1382; AVX1-NEXT: vpmullw %xmm3, %xmm9, %xmm3 1383; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 1384; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1385; AVX1-NEXT: vpmullw %xmm6, %xmm5, %xmm6 1386; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6 1387; AVX1-NEXT: vpackuswb %xmm3, %xmm6, %xmm3 1388; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1389; AVX1-NEXT: vpmullw %xmm7, %xmm0, %xmm0 1390; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1391; AVX1-NEXT: vpmullw %xmm4, %xmm5, %xmm4 1392; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm2 1393; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0 1394; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 1395; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1396; AVX1-NEXT: retq 1397; 1398; AVX2-LABEL: constant_funnnel_v32i8: 1399; AVX2: # %bb.0: 1400; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1 1401; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 1402; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256] 1403; AVX2-NEXT: # ymm2 = mem[0,1,0,1] 1404; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1 1405; AVX2-NEXT: vpsllw $2, %ymm1, %ymm3 1406; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 1407; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1408; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1 1409; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm3 1410; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1411; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1 1412; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 1413; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31] 1414; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm3, %ymm3 1415; AVX2-NEXT: vpsrlw $8, %ymm3, %ymm3 1416; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23] 1417; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 1418; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 1419; AVX2-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 1420; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 1421; AVX2-NEXT: retq 1422; 1423; AVX512F-LABEL: constant_funnnel_v32i8: 1424; AVX512F: # %bb.0: 1425; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1 1426; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 1427; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256] 1428; AVX512F-NEXT: # ymm2 = mem[0,1,0,1] 1429; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1 1430; AVX512F-NEXT: vpsllw $2, %ymm1, %ymm3 1431; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 1432; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1433; AVX512F-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1 1434; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm3 1435; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1436; AVX512F-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1 1437; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 1438; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31] 1439; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm3, %ymm3 1440; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 1441; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23] 1442; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 1443; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 1444; AVX512F-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 1445; AVX512F-NEXT: vpor %ymm0, %ymm1, %ymm0 1446; AVX512F-NEXT: retq 1447; 1448; AVX512VL-LABEL: constant_funnnel_v32i8: 1449; AVX512VL: # %bb.0: 1450; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 1451; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 1452; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256] 1453; AVX512VL-NEXT: # ymm2 = mem[0,1,0,1] 1454; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1 1455; AVX512VL-NEXT: vpsllw $2, %ymm1, %ymm3 1456; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 1457; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1458; AVX512VL-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1 1459; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm3 1460; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1461; AVX512VL-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1 1462; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 1463; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31] 1464; AVX512VL-NEXT: vpmullw {{.*}}(%rip), %ymm3, %ymm3 1465; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3 1466; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23] 1467; AVX512VL-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 1468; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 1469; AVX512VL-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 1470; AVX512VL-NEXT: vpor %ymm0, %ymm1, %ymm0 1471; AVX512VL-NEXT: retq 1472; 1473; AVX512BW-LABEL: constant_funnnel_v32i8: 1474; AVX512BW: # %bb.0: 1475; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1476; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1 1477; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0 1478; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 1479; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 1480; AVX512BW-NEXT: retq 1481; 1482; AVX512VLBW-LABEL: constant_funnnel_v32i8: 1483; AVX512VLBW: # %bb.0: 1484; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1485; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1 1486; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0 1487; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 1488; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0 1489; AVX512VLBW-NEXT: retq 1490; 1491; AVX512VBMI2-LABEL: constant_funnnel_v32i8: 1492; AVX512VBMI2: # %bb.0: 1493; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1494; AVX512VBMI2-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1 1495; AVX512VBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0 1496; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0 1497; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0 1498; AVX512VBMI2-NEXT: retq 1499; 1500; AVX512VLVBMI2-LABEL: constant_funnnel_v32i8: 1501; AVX512VLVBMI2: # %bb.0: 1502; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1503; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1 1504; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0 1505; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0 1506; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0 1507; AVX512VLVBMI2-NEXT: retq 1508; 1509; XOPAVX1-LABEL: constant_funnnel_v32i8: 1510; XOPAVX1: # %bb.0: 1511; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1512; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,0,7,6,5,4,3,2,1] 1513; XOPAVX1-NEXT: vprotb %xmm2, %xmm1, %xmm1 1514; XOPAVX1-NEXT: vprotb %xmm2, %xmm0, %xmm0 1515; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1516; XOPAVX1-NEXT: retq 1517; 1518; XOPAVX2-LABEL: constant_funnnel_v32i8: 1519; XOPAVX2: # %bb.0: 1520; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1521; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,0,7,6,5,4,3,2,1] 1522; XOPAVX2-NEXT: vprotb %xmm2, %xmm1, %xmm1 1523; XOPAVX2-NEXT: vprotb %xmm2, %xmm0, %xmm0 1524; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1525; XOPAVX2-NEXT: retq 1526 %res = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>) 1527 ret <32 x i8> %res 1528} 1529 1530; 1531; Uniform Constant Shifts 1532; 1533 1534define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x) nounwind { 1535; AVX1-LABEL: splatconstant_funnnel_v4i64: 1536; AVX1: # %bb.0: 1537; AVX1-NEXT: vpsrlq $50, %xmm0, %xmm1 1538; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1539; AVX1-NEXT: vpsrlq $50, %xmm2, %xmm3 1540; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 1541; AVX1-NEXT: vpsllq $14, %xmm0, %xmm0 1542; AVX1-NEXT: vpsllq $14, %xmm2, %xmm2 1543; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1544; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 1545; AVX1-NEXT: retq 1546; 1547; AVX2-LABEL: splatconstant_funnnel_v4i64: 1548; AVX2: # %bb.0: 1549; AVX2-NEXT: vpsrlq $50, %ymm0, %ymm1 1550; AVX2-NEXT: vpsllq $14, %ymm0, %ymm0 1551; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1552; AVX2-NEXT: retq 1553; 1554; AVX512F-LABEL: splatconstant_funnnel_v4i64: 1555; AVX512F: # %bb.0: 1556; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1557; AVX512F-NEXT: vprolq $14, %zmm0, %zmm0 1558; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1559; AVX512F-NEXT: retq 1560; 1561; AVX512VL-LABEL: splatconstant_funnnel_v4i64: 1562; AVX512VL: # %bb.0: 1563; AVX512VL-NEXT: vprolq $14, %ymm0, %ymm0 1564; AVX512VL-NEXT: retq 1565; 1566; AVX512BW-LABEL: splatconstant_funnnel_v4i64: 1567; AVX512BW: # %bb.0: 1568; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1569; AVX512BW-NEXT: vprolq $14, %zmm0, %zmm0 1570; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1571; AVX512BW-NEXT: retq 1572; 1573; AVX512VLBW-LABEL: splatconstant_funnnel_v4i64: 1574; AVX512VLBW: # %bb.0: 1575; AVX512VLBW-NEXT: vprolq $14, %ymm0, %ymm0 1576; AVX512VLBW-NEXT: retq 1577; 1578; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i64: 1579; AVX512VBMI2: # %bb.0: 1580; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1581; AVX512VBMI2-NEXT: vprolq $14, %zmm0, %zmm0 1582; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1583; AVX512VBMI2-NEXT: retq 1584; 1585; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i64: 1586; AVX512VLVBMI2: # %bb.0: 1587; AVX512VLVBMI2-NEXT: vprolq $14, %ymm0, %ymm0 1588; AVX512VLVBMI2-NEXT: retq 1589; 1590; XOPAVX1-LABEL: splatconstant_funnnel_v4i64: 1591; XOPAVX1: # %bb.0: 1592; XOPAVX1-NEXT: vprotq $14, %xmm0, %xmm1 1593; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1594; XOPAVX1-NEXT: vprotq $14, %xmm0, %xmm0 1595; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1596; XOPAVX1-NEXT: retq 1597; 1598; XOPAVX2-LABEL: splatconstant_funnnel_v4i64: 1599; XOPAVX2: # %bb.0: 1600; XOPAVX2-NEXT: vprotq $14, %xmm0, %xmm1 1601; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1602; XOPAVX2-NEXT: vprotq $14, %xmm0, %xmm0 1603; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1604; XOPAVX2-NEXT: retq 1605 %res = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x, <4 x i64> %x, <4 x i64> <i64 14, i64 14, i64 14, i64 14>) 1606 ret <4 x i64> %res 1607} 1608 1609define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x) nounwind { 1610; AVX1-LABEL: splatconstant_funnnel_v8i32: 1611; AVX1: # %bb.0: 1612; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1613; AVX1-NEXT: vpsrld $28, %xmm1, %xmm2 1614; AVX1-NEXT: vpslld $4, %xmm1, %xmm1 1615; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1 1616; AVX1-NEXT: vpsrld $28, %xmm0, %xmm2 1617; AVX1-NEXT: vpslld $4, %xmm0, %xmm0 1618; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1619; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1620; AVX1-NEXT: retq 1621; 1622; AVX2-LABEL: splatconstant_funnnel_v8i32: 1623; AVX2: # %bb.0: 1624; AVX2-NEXT: vpsrld $28, %ymm0, %ymm1 1625; AVX2-NEXT: vpslld $4, %ymm0, %ymm0 1626; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1627; AVX2-NEXT: retq 1628; 1629; AVX512F-LABEL: splatconstant_funnnel_v8i32: 1630; AVX512F: # %bb.0: 1631; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1632; AVX512F-NEXT: vprold $4, %zmm0, %zmm0 1633; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1634; AVX512F-NEXT: retq 1635; 1636; AVX512VL-LABEL: splatconstant_funnnel_v8i32: 1637; AVX512VL: # %bb.0: 1638; AVX512VL-NEXT: vprold $4, %ymm0, %ymm0 1639; AVX512VL-NEXT: retq 1640; 1641; AVX512BW-LABEL: splatconstant_funnnel_v8i32: 1642; AVX512BW: # %bb.0: 1643; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1644; AVX512BW-NEXT: vprold $4, %zmm0, %zmm0 1645; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1646; AVX512BW-NEXT: retq 1647; 1648; AVX512VLBW-LABEL: splatconstant_funnnel_v8i32: 1649; AVX512VLBW: # %bb.0: 1650; AVX512VLBW-NEXT: vprold $4, %ymm0, %ymm0 1651; AVX512VLBW-NEXT: retq 1652; 1653; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i32: 1654; AVX512VBMI2: # %bb.0: 1655; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1656; AVX512VBMI2-NEXT: vprold $4, %zmm0, %zmm0 1657; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1658; AVX512VBMI2-NEXT: retq 1659; 1660; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i32: 1661; AVX512VLVBMI2: # %bb.0: 1662; AVX512VLVBMI2-NEXT: vprold $4, %ymm0, %ymm0 1663; AVX512VLVBMI2-NEXT: retq 1664; 1665; XOPAVX1-LABEL: splatconstant_funnnel_v8i32: 1666; XOPAVX1: # %bb.0: 1667; XOPAVX1-NEXT: vprotd $4, %xmm0, %xmm1 1668; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1669; XOPAVX1-NEXT: vprotd $4, %xmm0, %xmm0 1670; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1671; XOPAVX1-NEXT: retq 1672; 1673; XOPAVX2-LABEL: splatconstant_funnnel_v8i32: 1674; XOPAVX2: # %bb.0: 1675; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm1 1676; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1677; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm0 1678; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1679; XOPAVX2-NEXT: retq 1680 %res = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x, <8 x i32> %x, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>) 1681 ret <8 x i32> %res 1682} 1683 1684define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x) nounwind { 1685; AVX1-LABEL: splatconstant_funnnel_v16i16: 1686; AVX1: # %bb.0: 1687; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1688; AVX1-NEXT: vpsrlw $9, %xmm1, %xmm2 1689; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1 1690; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1 1691; AVX1-NEXT: vpsrlw $9, %xmm0, %xmm2 1692; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 1693; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1694; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1695; AVX1-NEXT: retq 1696; 1697; AVX2-LABEL: splatconstant_funnnel_v16i16: 1698; AVX2: # %bb.0: 1699; AVX2-NEXT: vpsrlw $9, %ymm0, %ymm1 1700; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 1701; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1702; AVX2-NEXT: retq 1703; 1704; AVX512F-LABEL: splatconstant_funnnel_v16i16: 1705; AVX512F: # %bb.0: 1706; AVX512F-NEXT: vpsrlw $9, %ymm0, %ymm1 1707; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 1708; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 1709; AVX512F-NEXT: retq 1710; 1711; AVX512VL-LABEL: splatconstant_funnnel_v16i16: 1712; AVX512VL: # %bb.0: 1713; AVX512VL-NEXT: vpsrlw $9, %ymm0, %ymm1 1714; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 1715; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 1716; AVX512VL-NEXT: retq 1717; 1718; AVX512BW-LABEL: splatconstant_funnnel_v16i16: 1719; AVX512BW: # %bb.0: 1720; AVX512BW-NEXT: vpsrlw $9, %ymm0, %ymm1 1721; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0 1722; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 1723; AVX512BW-NEXT: retq 1724; 1725; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16: 1726; AVX512VLBW: # %bb.0: 1727; AVX512VLBW-NEXT: vpsrlw $9, %ymm0, %ymm1 1728; AVX512VLBW-NEXT: vpsllw $7, %ymm0, %ymm0 1729; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 1730; AVX512VLBW-NEXT: retq 1731; 1732; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16: 1733; AVX512VBMI2: # %bb.0: 1734; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1735; AVX512VBMI2-NEXT: vpshldw $7, %zmm0, %zmm0, %zmm0 1736; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1737; AVX512VBMI2-NEXT: retq 1738; 1739; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16: 1740; AVX512VLVBMI2: # %bb.0: 1741; AVX512VLVBMI2-NEXT: vpshldw $7, %ymm0, %ymm0, %ymm0 1742; AVX512VLVBMI2-NEXT: retq 1743; 1744; XOPAVX1-LABEL: splatconstant_funnnel_v16i16: 1745; XOPAVX1: # %bb.0: 1746; XOPAVX1-NEXT: vprotw $7, %xmm0, %xmm1 1747; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1748; XOPAVX1-NEXT: vprotw $7, %xmm0, %xmm0 1749; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1750; XOPAVX1-NEXT: retq 1751; 1752; XOPAVX2-LABEL: splatconstant_funnnel_v16i16: 1753; XOPAVX2: # %bb.0: 1754; XOPAVX2-NEXT: vprotw $7, %xmm0, %xmm1 1755; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1756; XOPAVX2-NEXT: vprotw $7, %xmm0, %xmm0 1757; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1758; XOPAVX2-NEXT: retq 1759 %res = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x, <16 x i16> %x, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 1760 ret <16 x i16> %res 1761} 1762 1763define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind { 1764; AVX1-LABEL: splatconstant_funnnel_v32i8: 1765; AVX1: # %bb.0: 1766; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1767; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2 1768; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 1769; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2 1770; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1 1771; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 1772; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1 1773; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2 1774; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2 1775; AVX1-NEXT: vpsllw $4, %xmm0, %xmm0 1776; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 1777; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1778; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1779; AVX1-NEXT: retq 1780; 1781; AVX2-LABEL: splatconstant_funnnel_v32i8: 1782; AVX2: # %bb.0: 1783; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm1 1784; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 1785; AVX2-NEXT: vpsllw $4, %ymm0, %ymm0 1786; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1787; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1788; AVX2-NEXT: retq 1789; 1790; AVX512F-LABEL: splatconstant_funnnel_v32i8: 1791; AVX512F: # %bb.0: 1792; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm1 1793; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 1794; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 1795; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1796; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 1797; AVX512F-NEXT: retq 1798; 1799; AVX512VL-LABEL: splatconstant_funnnel_v32i8: 1800; AVX512VL: # %bb.0: 1801; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 1802; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 1803; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 1804; AVX512VL-NEXT: retq 1805; 1806; AVX512BW-LABEL: splatconstant_funnnel_v32i8: 1807; AVX512BW: # %bb.0: 1808; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm1 1809; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 1810; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 1811; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1812; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 1813; AVX512BW-NEXT: retq 1814; 1815; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: 1816; AVX512VLBW: # %bb.0: 1817; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1 1818; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0 1819; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 1820; AVX512VLBW-NEXT: retq 1821; 1822; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: 1823; AVX512VBMI2: # %bb.0: 1824; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm1 1825; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 1826; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 1827; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1828; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 1829; AVX512VBMI2-NEXT: retq 1830; 1831; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8: 1832; AVX512VLVBMI2: # %bb.0: 1833; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm1 1834; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0 1835; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 1836; AVX512VLVBMI2-NEXT: retq 1837; 1838; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: 1839; XOPAVX1: # %bb.0: 1840; XOPAVX1-NEXT: vprotb $4, %xmm0, %xmm1 1841; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1842; XOPAVX1-NEXT: vprotb $4, %xmm0, %xmm0 1843; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1844; XOPAVX1-NEXT: retq 1845; 1846; XOPAVX2-LABEL: splatconstant_funnnel_v32i8: 1847; XOPAVX2: # %bb.0: 1848; XOPAVX2-NEXT: vprotb $4, %xmm0, %xmm1 1849; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1850; XOPAVX2-NEXT: vprotb $4, %xmm0, %xmm0 1851; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1852; XOPAVX2-NEXT: retq 1853 %res = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %x, <32 x i8> %x, <32 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>) 1854 ret <32 x i8> %res 1855} 1856