1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512DQVL 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512BWVL 10; 11; 32-bit runs to make sure we do reasonable things for i64 shifts. 12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=X32-AVX --check-prefix=X32-AVX1 13; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-AVX --check-prefix=X32-AVX2 14 15; 16; Variable Shifts 17; 18 19define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 20; AVX1-LABEL: var_shift_v4i64: 21; AVX1: # %bb.0: 22; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 23; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 24; AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm4 25; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 26; AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm2 27; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7] 28; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm3 29; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 30; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 31; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7] 32; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 33; AVX1-NEXT: retq 34; 35; AVX2-LABEL: var_shift_v4i64: 36; AVX2: # %bb.0: 37; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 38; AVX2-NEXT: retq 39; 40; XOPAVX1-LABEL: var_shift_v4i64: 41; XOPAVX1: # %bb.0: 42; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 43; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 44; XOPAVX1-NEXT: vpshlq %xmm2, %xmm3, %xmm2 45; XOPAVX1-NEXT: vpshlq %xmm1, %xmm0, %xmm0 46; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 47; XOPAVX1-NEXT: retq 48; 49; XOPAVX2-LABEL: var_shift_v4i64: 50; XOPAVX2: # %bb.0: 51; XOPAVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 52; XOPAVX2-NEXT: retq 53; 54; AVX512-LABEL: var_shift_v4i64: 55; AVX512: # %bb.0: 56; AVX512-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 57; AVX512-NEXT: retq 58; 59; AVX512VL-LABEL: var_shift_v4i64: 60; AVX512VL: # %bb.0: 61; AVX512VL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 62; AVX512VL-NEXT: retq 63; 64; X32-AVX1-LABEL: var_shift_v4i64: 65; X32-AVX1: # %bb.0: 66; X32-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 67; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 68; X32-AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm4 69; X32-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 70; X32-AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm2 71; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7] 72; X32-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm3 73; X32-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 74; X32-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 75; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7] 76; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 77; X32-AVX1-NEXT: retl 78; 79; X32-AVX2-LABEL: var_shift_v4i64: 80; X32-AVX2: # %bb.0: 81; X32-AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 82; X32-AVX2-NEXT: retl 83 %shift = shl <4 x i64> %a, %b 84 ret <4 x i64> %shift 85} 86 87define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 88; AVX1-LABEL: var_shift_v8i32: 89; AVX1: # %bb.0: 90; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 91; AVX1-NEXT: vpslld $23, %xmm2, %xmm2 92; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216] 93; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 94; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 95; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 96; AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 97; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 98; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 99; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 100; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 101; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 102; AVX1-NEXT: retq 103; 104; AVX2-LABEL: var_shift_v8i32: 105; AVX2: # %bb.0: 106; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 107; AVX2-NEXT: retq 108; 109; XOPAVX1-LABEL: var_shift_v8i32: 110; XOPAVX1: # %bb.0: 111; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 112; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 113; XOPAVX1-NEXT: vpshld %xmm2, %xmm3, %xmm2 114; XOPAVX1-NEXT: vpshld %xmm1, %xmm0, %xmm0 115; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 116; XOPAVX1-NEXT: retq 117; 118; XOPAVX2-LABEL: var_shift_v8i32: 119; XOPAVX2: # %bb.0: 120; XOPAVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 121; XOPAVX2-NEXT: retq 122; 123; AVX512-LABEL: var_shift_v8i32: 124; AVX512: # %bb.0: 125; AVX512-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 126; AVX512-NEXT: retq 127; 128; AVX512VL-LABEL: var_shift_v8i32: 129; AVX512VL: # %bb.0: 130; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 131; AVX512VL-NEXT: retq 132; 133; X32-AVX1-LABEL: var_shift_v8i32: 134; X32-AVX1: # %bb.0: 135; X32-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 136; X32-AVX1-NEXT: vpslld $23, %xmm2, %xmm2 137; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216] 138; X32-AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 139; X32-AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 140; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 141; X32-AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 142; X32-AVX1-NEXT: vpslld $23, %xmm1, %xmm1 143; X32-AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 144; X32-AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 145; X32-AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 146; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 147; X32-AVX1-NEXT: retl 148; 149; X32-AVX2-LABEL: var_shift_v8i32: 150; X32-AVX2: # %bb.0: 151; X32-AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 152; X32-AVX2-NEXT: retl 153 %shift = shl <8 x i32> %a, %b 154 ret <8 x i32> %shift 155} 156 157define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 158; AVX1-LABEL: var_shift_v16i16: 159; AVX1: # %bb.0: 160; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 161; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 162; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] 163; AVX1-NEXT: vpslld $23, %xmm4, %xmm4 164; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1065353216,1065353216,1065353216,1065353216] 165; AVX1-NEXT: vpaddd %xmm5, %xmm4, %xmm4 166; AVX1-NEXT: vcvttps2dq %xmm4, %xmm4 167; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero 168; AVX1-NEXT: vpslld $23, %xmm2, %xmm2 169; AVX1-NEXT: vpaddd %xmm5, %xmm2, %xmm2 170; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 171; AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2 172; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 173; AVX1-NEXT: vpmullw %xmm2, %xmm4, %xmm2 174; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 175; AVX1-NEXT: vpslld $23, %xmm3, %xmm3 176; AVX1-NEXT: vpaddd %xmm5, %xmm3, %xmm3 177; AVX1-NEXT: vcvttps2dq %xmm3, %xmm3 178; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 179; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 180; AVX1-NEXT: vpaddd %xmm5, %xmm1, %xmm1 181; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 182; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 183; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 184; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 185; AVX1-NEXT: retq 186; 187; AVX2-LABEL: var_shift_v16i16: 188; AVX2: # %bb.0: 189; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 190; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 191; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] 192; AVX2-NEXT: vpsllvd %ymm3, %ymm4, %ymm3 193; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3 194; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 195; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] 196; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 197; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 198; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 199; AVX2-NEXT: retq 200; 201; XOPAVX1-LABEL: var_shift_v16i16: 202; XOPAVX1: # %bb.0: 203; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 204; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 205; XOPAVX1-NEXT: vpshlw %xmm2, %xmm3, %xmm2 206; XOPAVX1-NEXT: vpshlw %xmm1, %xmm0, %xmm0 207; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 208; XOPAVX1-NEXT: retq 209; 210; XOPAVX2-LABEL: var_shift_v16i16: 211; XOPAVX2: # %bb.0: 212; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 213; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 214; XOPAVX2-NEXT: vpshlw %xmm2, %xmm3, %xmm2 215; XOPAVX2-NEXT: vpshlw %xmm1, %xmm0, %xmm0 216; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 217; XOPAVX2-NEXT: retq 218; 219; AVX512DQ-LABEL: var_shift_v16i16: 220; AVX512DQ: # %bb.0: 221; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 222; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 223; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 224; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 225; AVX512DQ-NEXT: retq 226; 227; AVX512BW-LABEL: var_shift_v16i16: 228; AVX512BW: # %bb.0: 229; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 230; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 231; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 232; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 233; AVX512BW-NEXT: retq 234; 235; AVX512DQVL-LABEL: var_shift_v16i16: 236; AVX512DQVL: # %bb.0: 237; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 238; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 239; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 240; AVX512DQVL-NEXT: vpmovdw %zmm0, %ymm0 241; AVX512DQVL-NEXT: retq 242; 243; AVX512BWVL-LABEL: var_shift_v16i16: 244; AVX512BWVL: # %bb.0: 245; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 246; AVX512BWVL-NEXT: retq 247; 248; X32-AVX1-LABEL: var_shift_v16i16: 249; X32-AVX1: # %bb.0: 250; X32-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 251; X32-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 252; X32-AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] 253; X32-AVX1-NEXT: vpslld $23, %xmm4, %xmm4 254; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1065353216,1065353216,1065353216,1065353216] 255; X32-AVX1-NEXT: vpaddd %xmm5, %xmm4, %xmm4 256; X32-AVX1-NEXT: vcvttps2dq %xmm4, %xmm4 257; X32-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero 258; X32-AVX1-NEXT: vpslld $23, %xmm2, %xmm2 259; X32-AVX1-NEXT: vpaddd %xmm5, %xmm2, %xmm2 260; X32-AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 261; X32-AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2 262; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 263; X32-AVX1-NEXT: vpmullw %xmm2, %xmm4, %xmm2 264; X32-AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 265; X32-AVX1-NEXT: vpslld $23, %xmm3, %xmm3 266; X32-AVX1-NEXT: vpaddd %xmm5, %xmm3, %xmm3 267; X32-AVX1-NEXT: vcvttps2dq %xmm3, %xmm3 268; X32-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 269; X32-AVX1-NEXT: vpslld $23, %xmm1, %xmm1 270; X32-AVX1-NEXT: vpaddd %xmm5, %xmm1, %xmm1 271; X32-AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 272; X32-AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 273; X32-AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 274; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 275; X32-AVX1-NEXT: retl 276; 277; X32-AVX2-LABEL: var_shift_v16i16: 278; X32-AVX2: # %bb.0: 279; X32-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 280; X32-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 281; X32-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] 282; X32-AVX2-NEXT: vpsllvd %ymm3, %ymm4, %ymm3 283; X32-AVX2-NEXT: vpsrld $16, %ymm3, %ymm3 284; X32-AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 285; X32-AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] 286; X32-AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 287; X32-AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 288; X32-AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 289; X32-AVX2-NEXT: retl 290 %shift = shl <16 x i16> %a, %b 291 ret <16 x i16> %shift 292} 293 294define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 295; AVX1-LABEL: var_shift_v32i8: 296; AVX1: # %bb.0: 297; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 298; AVX1-NEXT: vpsllw $4, %xmm2, %xmm3 299; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 300; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 301; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 302; AVX1-NEXT: vpsllw $5, %xmm5, %xmm5 303; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 304; AVX1-NEXT: vpsllw $2, %xmm2, %xmm3 305; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 306; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 307; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 308; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 309; AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm3 310; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 311; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 312; AVX1-NEXT: vpsllw $4, %xmm0, %xmm3 313; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 314; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 315; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 316; AVX1-NEXT: vpsllw $2, %xmm0, %xmm3 317; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 318; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 319; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 320; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm3 321; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 322; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 323; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 324; AVX1-NEXT: retq 325; 326; AVX2-LABEL: var_shift_v32i8: 327; AVX2: # %bb.0: 328; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 329; AVX2-NEXT: vpsllw $4, %ymm0, %ymm2 330; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 331; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 332; AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 333; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 334; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 335; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 336; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 337; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 338; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 339; AVX2-NEXT: retq 340; 341; XOPAVX1-LABEL: var_shift_v32i8: 342; XOPAVX1: # %bb.0: 343; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 344; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 345; XOPAVX1-NEXT: vpshlb %xmm2, %xmm3, %xmm2 346; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 347; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 348; XOPAVX1-NEXT: retq 349; 350; XOPAVX2-LABEL: var_shift_v32i8: 351; XOPAVX2: # %bb.0: 352; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 353; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 354; XOPAVX2-NEXT: vpshlb %xmm2, %xmm3, %xmm2 355; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0 356; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 357; XOPAVX2-NEXT: retq 358; 359; AVX512DQ-LABEL: var_shift_v32i8: 360; AVX512DQ: # %bb.0: 361; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 362; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2 363; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 364; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 365; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2 366; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 367; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 368; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 369; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2 370; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 371; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 372; AVX512DQ-NEXT: retq 373; 374; AVX512BW-LABEL: var_shift_v32i8: 375; AVX512BW: # %bb.0: 376; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 377; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 378; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 379; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 380; AVX512BW-NEXT: retq 381; 382; AVX512DQVL-LABEL: var_shift_v32i8: 383; AVX512DQVL: # %bb.0: 384; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1 385; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm2 386; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 387; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 388; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm2 389; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 390; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 391; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 392; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 393; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 394; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 395; AVX512DQVL-NEXT: retq 396; 397; AVX512BWVL-LABEL: var_shift_v32i8: 398; AVX512BWVL: # %bb.0: 399; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 400; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 401; AVX512BWVL-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 402; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 403; AVX512BWVL-NEXT: retq 404; 405; X32-AVX1-LABEL: var_shift_v32i8: 406; X32-AVX1: # %bb.0: 407; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 408; X32-AVX1-NEXT: vpsllw $4, %xmm2, %xmm3 409; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 410; X32-AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 411; X32-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 412; X32-AVX1-NEXT: vpsllw $5, %xmm5, %xmm5 413; X32-AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 414; X32-AVX1-NEXT: vpsllw $2, %xmm2, %xmm3 415; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 416; X32-AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 417; X32-AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 418; X32-AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 419; X32-AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm3 420; X32-AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 421; X32-AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 422; X32-AVX1-NEXT: vpsllw $4, %xmm0, %xmm3 423; X32-AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 424; X32-AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 425; X32-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 426; X32-AVX1-NEXT: vpsllw $2, %xmm0, %xmm3 427; X32-AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 428; X32-AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 429; X32-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 430; X32-AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm3 431; X32-AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 432; X32-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 433; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 434; X32-AVX1-NEXT: retl 435; 436; X32-AVX2-LABEL: var_shift_v32i8: 437; X32-AVX2: # %bb.0: 438; X32-AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 439; X32-AVX2-NEXT: vpsllw $4, %ymm0, %ymm2 440; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 441; X32-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 442; X32-AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 443; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 444; X32-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 445; X32-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 446; X32-AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 447; X32-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 448; X32-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 449; X32-AVX2-NEXT: retl 450 %shift = shl <32 x i8> %a, %b 451 ret <32 x i8> %shift 452} 453 454; 455; Uniform Variable Shifts 456; 457 458define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 459; AVX1-LABEL: splatvar_shift_v4i64: 460; AVX1: # %bb.0: 461; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 462; AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 463; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 464; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 465; AVX1-NEXT: retq 466; 467; AVX2-LABEL: splatvar_shift_v4i64: 468; AVX2: # %bb.0: 469; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 470; AVX2-NEXT: retq 471; 472; XOPAVX1-LABEL: splatvar_shift_v4i64: 473; XOPAVX1: # %bb.0: 474; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 475; XOPAVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 476; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 477; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 478; XOPAVX1-NEXT: retq 479; 480; XOPAVX2-LABEL: splatvar_shift_v4i64: 481; XOPAVX2: # %bb.0: 482; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 483; XOPAVX2-NEXT: retq 484; 485; AVX512-LABEL: splatvar_shift_v4i64: 486; AVX512: # %bb.0: 487; AVX512-NEXT: vpsllq %xmm1, %ymm0, %ymm0 488; AVX512-NEXT: retq 489; 490; AVX512VL-LABEL: splatvar_shift_v4i64: 491; AVX512VL: # %bb.0: 492; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 493; AVX512VL-NEXT: retq 494; 495; X32-AVX1-LABEL: splatvar_shift_v4i64: 496; X32-AVX1: # %bb.0: 497; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 498; X32-AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 499; X32-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 500; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 501; X32-AVX1-NEXT: retl 502; 503; X32-AVX2-LABEL: splatvar_shift_v4i64: 504; X32-AVX2: # %bb.0: 505; X32-AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 506; X32-AVX2-NEXT: retl 507 %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer 508 %shift = shl <4 x i64> %a, %splat 509 ret <4 x i64> %shift 510} 511 512define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 513; AVX1-LABEL: splatvar_shift_v8i32: 514; AVX1: # %bb.0: 515; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 516; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 517; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 518; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 519; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 520; AVX1-NEXT: retq 521; 522; AVX2-LABEL: splatvar_shift_v8i32: 523; AVX2: # %bb.0: 524; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 525; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 526; AVX2-NEXT: retq 527; 528; XOPAVX1-LABEL: splatvar_shift_v8i32: 529; XOPAVX1: # %bb.0: 530; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 531; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 532; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 533; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 534; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 535; XOPAVX1-NEXT: retq 536; 537; XOPAVX2-LABEL: splatvar_shift_v8i32: 538; XOPAVX2: # %bb.0: 539; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 540; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 541; XOPAVX2-NEXT: retq 542; 543; AVX512-LABEL: splatvar_shift_v8i32: 544; AVX512: # %bb.0: 545; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 546; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0 547; AVX512-NEXT: retq 548; 549; AVX512VL-LABEL: splatvar_shift_v8i32: 550; AVX512VL: # %bb.0: 551; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 552; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0 553; AVX512VL-NEXT: retq 554; 555; X32-AVX1-LABEL: splatvar_shift_v8i32: 556; X32-AVX1: # %bb.0: 557; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 558; X32-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 559; X32-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 560; X32-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 561; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 562; X32-AVX1-NEXT: retl 563; 564; X32-AVX2-LABEL: splatvar_shift_v8i32: 565; X32-AVX2: # %bb.0: 566; X32-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 567; X32-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 568; X32-AVX2-NEXT: retl 569 %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer 570 %shift = shl <8 x i32> %a, %splat 571 ret <8 x i32> %shift 572} 573 574define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 575; AVX1-LABEL: splatvar_shift_v16i16: 576; AVX1: # %bb.0: 577; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 578; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 579; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 580; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 581; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 582; AVX1-NEXT: retq 583; 584; AVX2-LABEL: splatvar_shift_v16i16: 585; AVX2: # %bb.0: 586; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 587; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 588; AVX2-NEXT: retq 589; 590; XOPAVX1-LABEL: splatvar_shift_v16i16: 591; XOPAVX1: # %bb.0: 592; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 593; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 594; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 595; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 596; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 597; XOPAVX1-NEXT: retq 598; 599; XOPAVX2-LABEL: splatvar_shift_v16i16: 600; XOPAVX2: # %bb.0: 601; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 602; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 603; XOPAVX2-NEXT: retq 604; 605; AVX512-LABEL: splatvar_shift_v16i16: 606; AVX512: # %bb.0: 607; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 608; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0 609; AVX512-NEXT: retq 610; 611; AVX512VL-LABEL: splatvar_shift_v16i16: 612; AVX512VL: # %bb.0: 613; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 614; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 615; AVX512VL-NEXT: retq 616; 617; X32-AVX1-LABEL: splatvar_shift_v16i16: 618; X32-AVX1: # %bb.0: 619; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 620; X32-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 621; X32-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 622; X32-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 623; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 624; X32-AVX1-NEXT: retl 625; 626; X32-AVX2-LABEL: splatvar_shift_v16i16: 627; X32-AVX2: # %bb.0: 628; X32-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 629; X32-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 630; X32-AVX2-NEXT: retl 631 %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer 632 %shift = shl <16 x i16> %a, %splat 633 ret <16 x i16> %shift 634} 635 636define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 637; AVX1-LABEL: splatvar_shift_v32i8: 638; AVX1: # %bb.0: 639; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 640; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 641; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 642; AVX1-NEXT: vpsllw $4, %xmm2, %xmm3 643; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 644; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 645; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 646; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm2, %xmm2 647; AVX1-NEXT: vpsllw $2, %xmm2, %xmm3 648; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 649; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 650; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm6 651; AVX1-NEXT: vpblendvb %xmm6, %xmm3, %xmm2, %xmm2 652; AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm3 653; AVX1-NEXT: vpaddb %xmm6, %xmm6, %xmm7 654; AVX1-NEXT: vpblendvb %xmm7, %xmm3, %xmm2, %xmm2 655; AVX1-NEXT: vpsllw $4, %xmm0, %xmm3 656; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 657; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 658; AVX1-NEXT: vpsllw $2, %xmm0, %xmm1 659; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 660; AVX1-NEXT: vpblendvb %xmm6, %xmm1, %xmm0, %xmm0 661; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm1 662; AVX1-NEXT: vpblendvb %xmm7, %xmm1, %xmm0, %xmm0 663; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 664; AVX1-NEXT: retq 665; 666; AVX2-LABEL: splatvar_shift_v32i8: 667; AVX2: # %bb.0: 668; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 669; AVX2-NEXT: vpsllw $4, %ymm0, %ymm2 670; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 671; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 672; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 673; AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 674; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 675; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 676; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 677; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 678; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 679; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 680; AVX2-NEXT: retq 681; 682; XOPAVX1-LABEL: splatvar_shift_v32i8: 683; XOPAVX1: # %bb.0: 684; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 685; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 686; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 687; XOPAVX1-NEXT: vpshlb %xmm1, %xmm2, %xmm2 688; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 689; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 690; XOPAVX1-NEXT: retq 691; 692; XOPAVX2-LABEL: splatvar_shift_v32i8: 693; XOPAVX2: # %bb.0: 694; XOPAVX2-NEXT: vpbroadcastb %xmm1, %ymm1 695; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 696; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 697; XOPAVX2-NEXT: vpshlb %xmm3, %xmm2, %xmm2 698; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0 699; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 700; XOPAVX2-NEXT: retq 701; 702; AVX512DQ-LABEL: splatvar_shift_v32i8: 703; AVX512DQ: # %bb.0: 704; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1 705; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2 706; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 707; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 708; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 709; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2 710; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 711; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 712; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 713; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2 714; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 715; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 716; AVX512DQ-NEXT: retq 717; 718; AVX512BW-LABEL: splatvar_shift_v32i8: 719; AVX512BW: # %bb.0: 720; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1 721; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 722; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 723; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 724; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 725; AVX512BW-NEXT: retq 726; 727; AVX512DQVL-LABEL: splatvar_shift_v32i8: 728; AVX512DQVL: # %bb.0: 729; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %ymm1 730; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm2 731; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 732; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1 733; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 734; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm2 735; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 736; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 737; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 738; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 739; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 740; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 741; AVX512DQVL-NEXT: retq 742; 743; AVX512BWVL-LABEL: splatvar_shift_v32i8: 744; AVX512BWVL: # %bb.0: 745; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1 746; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 747; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 748; AVX512BWVL-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 749; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 750; AVX512BWVL-NEXT: retq 751; 752; X32-AVX1-LABEL: splatvar_shift_v32i8: 753; X32-AVX1: # %bb.0: 754; X32-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 755; X32-AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 756; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 757; X32-AVX1-NEXT: vpsllw $4, %xmm2, %xmm3 758; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 759; X32-AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 760; X32-AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 761; X32-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm2, %xmm2 762; X32-AVX1-NEXT: vpsllw $2, %xmm2, %xmm3 763; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 764; X32-AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 765; X32-AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm6 766; X32-AVX1-NEXT: vpblendvb %xmm6, %xmm3, %xmm2, %xmm2 767; X32-AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm3 768; X32-AVX1-NEXT: vpaddb %xmm6, %xmm6, %xmm7 769; X32-AVX1-NEXT: vpblendvb %xmm7, %xmm3, %xmm2, %xmm2 770; X32-AVX1-NEXT: vpsllw $4, %xmm0, %xmm3 771; X32-AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 772; X32-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 773; X32-AVX1-NEXT: vpsllw $2, %xmm0, %xmm1 774; X32-AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 775; X32-AVX1-NEXT: vpblendvb %xmm6, %xmm1, %xmm0, %xmm0 776; X32-AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm1 777; X32-AVX1-NEXT: vpblendvb %xmm7, %xmm1, %xmm0, %xmm0 778; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 779; X32-AVX1-NEXT: retl 780; 781; X32-AVX2-LABEL: splatvar_shift_v32i8: 782; X32-AVX2: # %bb.0: 783; X32-AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 784; X32-AVX2-NEXT: vpsllw $4, %ymm0, %ymm2 785; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 786; X32-AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 787; X32-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 788; X32-AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 789; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 790; X32-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 791; X32-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 792; X32-AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 793; X32-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 794; X32-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 795; X32-AVX2-NEXT: retl 796 %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer 797 %shift = shl <32 x i8> %a, %splat 798 ret <32 x i8> %shift 799} 800 801; 802; Constant Shifts 803; 804 805define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind { 806; AVX1-LABEL: constant_shift_v4i64: 807; AVX1: # %bb.0: 808; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 809; AVX1-NEXT: vpsllq $62, %xmm1, %xmm2 810; AVX1-NEXT: vpsllq $31, %xmm1, %xmm1 811; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 812; AVX1-NEXT: vpsllq $7, %xmm0, %xmm2 813; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 814; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 815; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 816; AVX1-NEXT: retq 817; 818; AVX2-LABEL: constant_shift_v4i64: 819; AVX2: # %bb.0: 820; AVX2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 821; AVX2-NEXT: retq 822; 823; XOPAVX1-LABEL: constant_shift_v4i64: 824; XOPAVX1: # %bb.0: 825; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm1 826; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 827; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm0 828; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 829; XOPAVX1-NEXT: retq 830; 831; XOPAVX2-LABEL: constant_shift_v4i64: 832; XOPAVX2: # %bb.0: 833; XOPAVX2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 834; XOPAVX2-NEXT: retq 835; 836; AVX512-LABEL: constant_shift_v4i64: 837; AVX512: # %bb.0: 838; AVX512-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 839; AVX512-NEXT: retq 840; 841; AVX512VL-LABEL: constant_shift_v4i64: 842; AVX512VL: # %bb.0: 843; AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 844; AVX512VL-NEXT: retq 845; 846; X32-AVX1-LABEL: constant_shift_v4i64: 847; X32-AVX1: # %bb.0: 848; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,0,62,0] 849; X32-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] 850; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 851; X32-AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm2 852; X32-AVX1-NEXT: vpsllq %xmm1, %xmm3, %xmm1 853; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 854; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,7,0] 855; X32-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,0,1] 856; X32-AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3 857; X32-AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 858; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] 859; X32-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 860; X32-AVX1-NEXT: retl 861; 862; X32-AVX2-LABEL: constant_shift_v4i64: 863; X32-AVX2: # %bb.0: 864; X32-AVX2-NEXT: vpsllvq {{\.LCPI.*}}, %ymm0, %ymm0 865; X32-AVX2-NEXT: retl 866 %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62> 867 ret <4 x i64> %shift 868} 869 870define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind { 871; AVX1-LABEL: constant_shift_v8i32: 872; AVX1: # %bb.0: 873; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm1 874; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 875; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 876; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 877; AVX1-NEXT: retq 878; 879; AVX2-LABEL: constant_shift_v8i32: 880; AVX2: # %bb.0: 881; AVX2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 882; AVX2-NEXT: retq 883; 884; XOPAVX1-LABEL: constant_shift_v8i32: 885; XOPAVX1: # %bb.0: 886; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm1 887; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 888; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0 889; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 890; XOPAVX1-NEXT: retq 891; 892; XOPAVX2-LABEL: constant_shift_v8i32: 893; XOPAVX2: # %bb.0: 894; XOPAVX2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 895; XOPAVX2-NEXT: retq 896; 897; AVX512-LABEL: constant_shift_v8i32: 898; AVX512: # %bb.0: 899; AVX512-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 900; AVX512-NEXT: retq 901; 902; AVX512VL-LABEL: constant_shift_v8i32: 903; AVX512VL: # %bb.0: 904; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 905; AVX512VL-NEXT: retq 906; 907; X32-AVX1-LABEL: constant_shift_v8i32: 908; X32-AVX1: # %bb.0: 909; X32-AVX1-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm1 910; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 911; X32-AVX1-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 912; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 913; X32-AVX1-NEXT: retl 914; 915; X32-AVX2-LABEL: constant_shift_v8i32: 916; X32-AVX2: # %bb.0: 917; X32-AVX2-NEXT: vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0 918; X32-AVX2-NEXT: retl 919 %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7> 920 ret <8 x i32> %shift 921} 922 923define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { 924; AVX1-LABEL: constant_shift_v16i16: 925; AVX1: # %bb.0: 926; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm1 927; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 928; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 929; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 930; AVX1-NEXT: retq 931; 932; AVX2-LABEL: constant_shift_v16i16: 933; AVX2: # %bb.0: 934; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 935; AVX2-NEXT: retq 936; 937; XOPAVX1-LABEL: constant_shift_v16i16: 938; XOPAVX1: # %bb.0: 939; XOPAVX1-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm1 940; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 941; XOPAVX1-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0 942; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 943; XOPAVX1-NEXT: retq 944; 945; XOPAVX2-LABEL: constant_shift_v16i16: 946; XOPAVX2: # %bb.0: 947; XOPAVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 948; XOPAVX2-NEXT: retq 949; 950; AVX512DQ-LABEL: constant_shift_v16i16: 951; AVX512DQ: # %bb.0: 952; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 953; AVX512DQ-NEXT: retq 954; 955; AVX512BW-LABEL: constant_shift_v16i16: 956; AVX512BW: # %bb.0: 957; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 958; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 959; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 960; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 961; AVX512BW-NEXT: retq 962; 963; AVX512DQVL-LABEL: constant_shift_v16i16: 964; AVX512DQVL: # %bb.0: 965; AVX512DQVL-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 966; AVX512DQVL-NEXT: retq 967; 968; AVX512BWVL-LABEL: constant_shift_v16i16: 969; AVX512BWVL: # %bb.0: 970; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0 971; AVX512BWVL-NEXT: retq 972; 973; X32-AVX1-LABEL: constant_shift_v16i16: 974; X32-AVX1: # %bb.0: 975; X32-AVX1-NEXT: vpmullw {{\.LCPI.*}}, %xmm0, %xmm1 976; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 977; X32-AVX1-NEXT: vpmullw {{\.LCPI.*}}, %xmm0, %xmm0 978; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 979; X32-AVX1-NEXT: retl 980; 981; X32-AVX2-LABEL: constant_shift_v16i16: 982; X32-AVX2: # %bb.0: 983; X32-AVX2-NEXT: vpmullw {{\.LCPI.*}}, %ymm0, %ymm0 984; X32-AVX2-NEXT: retl 985 %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 986 ret <16 x i16> %shift 987} 988 989define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { 990; AVX1-LABEL: constant_shift_v32i8: 991; AVX1: # %bb.0: 992; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,128,64,32,16,8,4,2,1] 993; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 994; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 995; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 996; AVX1-NEXT: vpmullw %xmm1, %xmm3, %xmm3 997; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 998; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 999; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero 1000; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128] 1001; AVX1-NEXT: vpmullw %xmm5, %xmm2, %xmm2 1002; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 1003; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 1004; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1005; AVX1-NEXT: vpmullw %xmm1, %xmm3, %xmm1 1006; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 1007; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1008; AVX1-NEXT: vpmullw %xmm5, %xmm0, %xmm0 1009; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 1010; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 1011; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1012; AVX1-NEXT: retq 1013; 1014; AVX2-LABEL: constant_shift_v32i8: 1015; AVX2: # %bb.0: 1016; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1 1017; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 1018; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] 1019; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1020; AVX2-NEXT: vpsllw $2, %ymm0, %ymm1 1021; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 1022; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1023; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1024; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm1 1025; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1026; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1027; AVX2-NEXT: retq 1028; 1029; XOPAVX1-LABEL: constant_shift_v32i8: 1030; XOPAVX1: # %bb.0: 1031; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1032; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] 1033; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1 1034; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0 1035; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1036; XOPAVX1-NEXT: retq 1037; 1038; XOPAVX2-LABEL: constant_shift_v32i8: 1039; XOPAVX2: # %bb.0: 1040; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1041; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] 1042; XOPAVX2-NEXT: vpshlb %xmm2, %xmm1, %xmm1 1043; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm0 1044; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1045; XOPAVX2-NEXT: retq 1046; 1047; AVX512DQ-LABEL: constant_shift_v32i8: 1048; AVX512DQ: # %bb.0: 1049; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm1 1050; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 1051; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] 1052; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1053; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm1 1054; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 1055; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1056; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1057; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm1 1058; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1059; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1060; AVX512DQ-NEXT: retq 1061; 1062; AVX512BW-LABEL: constant_shift_v32i8: 1063; AVX512BW: # %bb.0: 1064; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1065; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0 1066; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 1067; AVX512BW-NEXT: retq 1068; 1069; AVX512DQVL-LABEL: constant_shift_v32i8: 1070; AVX512DQVL: # %bb.0: 1071; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm1 1072; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 1073; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] 1074; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1075; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm1 1076; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 1077; AVX512DQVL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1078; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1079; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm1 1080; AVX512DQVL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1081; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1082; AVX512DQVL-NEXT: retq 1083; 1084; AVX512BWVL-LABEL: constant_shift_v32i8: 1085; AVX512BWVL: # %bb.0: 1086; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1087; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0 1088; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 1089; AVX512BWVL-NEXT: retq 1090; 1091; X32-AVX1-LABEL: constant_shift_v32i8: 1092; X32-AVX1: # %bb.0: 1093; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,128,64,32,16,8,4,2,1] 1094; X32-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1095; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1096; X32-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1097; X32-AVX1-NEXT: vpmullw %xmm1, %xmm3, %xmm3 1098; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 1099; X32-AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 1100; X32-AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero 1101; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128] 1102; X32-AVX1-NEXT: vpmullw %xmm5, %xmm2, %xmm2 1103; X32-AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 1104; X32-AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 1105; X32-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1106; X32-AVX1-NEXT: vpmullw %xmm1, %xmm3, %xmm1 1107; X32-AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 1108; X32-AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1109; X32-AVX1-NEXT: vpmullw %xmm5, %xmm0, %xmm0 1110; X32-AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 1111; X32-AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 1112; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1113; X32-AVX1-NEXT: retl 1114; 1115; X32-AVX2-LABEL: constant_shift_v32i8: 1116; X32-AVX2: # %bb.0: 1117; X32-AVX2-NEXT: vpsllw $4, %ymm0, %ymm1 1118; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm1, %ymm1 1119; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] 1120; X32-AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1121; X32-AVX2-NEXT: vpsllw $2, %ymm0, %ymm1 1122; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm1, %ymm1 1123; X32-AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1124; X32-AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1125; X32-AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm1 1126; X32-AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1127; X32-AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1128; X32-AVX2-NEXT: retl 1129 %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 1130 ret <32 x i8> %shift 1131} 1132 1133; 1134; Uniform Constant Shifts 1135; 1136 1137define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind { 1138; AVX1-LABEL: splatconstant_shift_v4i64: 1139; AVX1: # %bb.0: 1140; AVX1-NEXT: vpsllq $7, %xmm0, %xmm1 1141; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1142; AVX1-NEXT: vpsllq $7, %xmm0, %xmm0 1143; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1144; AVX1-NEXT: retq 1145; 1146; AVX2-LABEL: splatconstant_shift_v4i64: 1147; AVX2: # %bb.0: 1148; AVX2-NEXT: vpsllq $7, %ymm0, %ymm0 1149; AVX2-NEXT: retq 1150; 1151; XOPAVX1-LABEL: splatconstant_shift_v4i64: 1152; XOPAVX1: # %bb.0: 1153; XOPAVX1-NEXT: vpsllq $7, %xmm0, %xmm1 1154; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1155; XOPAVX1-NEXT: vpsllq $7, %xmm0, %xmm0 1156; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1157; XOPAVX1-NEXT: retq 1158; 1159; XOPAVX2-LABEL: splatconstant_shift_v4i64: 1160; XOPAVX2: # %bb.0: 1161; XOPAVX2-NEXT: vpsllq $7, %ymm0, %ymm0 1162; XOPAVX2-NEXT: retq 1163; 1164; AVX512-LABEL: splatconstant_shift_v4i64: 1165; AVX512: # %bb.0: 1166; AVX512-NEXT: vpsllq $7, %ymm0, %ymm0 1167; AVX512-NEXT: retq 1168; 1169; AVX512VL-LABEL: splatconstant_shift_v4i64: 1170; AVX512VL: # %bb.0: 1171; AVX512VL-NEXT: vpsllq $7, %ymm0, %ymm0 1172; AVX512VL-NEXT: retq 1173; 1174; X32-AVX1-LABEL: splatconstant_shift_v4i64: 1175; X32-AVX1: # %bb.0: 1176; X32-AVX1-NEXT: vpsllq $7, %xmm0, %xmm1 1177; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1178; X32-AVX1-NEXT: vpsllq $7, %xmm0, %xmm0 1179; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1180; X32-AVX1-NEXT: retl 1181; 1182; X32-AVX2-LABEL: splatconstant_shift_v4i64: 1183; X32-AVX2: # %bb.0: 1184; X32-AVX2-NEXT: vpsllq $7, %ymm0, %ymm0 1185; X32-AVX2-NEXT: retl 1186 %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7> 1187 ret <4 x i64> %shift 1188} 1189 1190define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind { 1191; AVX1-LABEL: splatconstant_shift_v8i32: 1192; AVX1: # %bb.0: 1193; AVX1-NEXT: vpslld $5, %xmm0, %xmm1 1194; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1195; AVX1-NEXT: vpslld $5, %xmm0, %xmm0 1196; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1197; AVX1-NEXT: retq 1198; 1199; AVX2-LABEL: splatconstant_shift_v8i32: 1200; AVX2: # %bb.0: 1201; AVX2-NEXT: vpslld $5, %ymm0, %ymm0 1202; AVX2-NEXT: retq 1203; 1204; XOPAVX1-LABEL: splatconstant_shift_v8i32: 1205; XOPAVX1: # %bb.0: 1206; XOPAVX1-NEXT: vpslld $5, %xmm0, %xmm1 1207; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1208; XOPAVX1-NEXT: vpslld $5, %xmm0, %xmm0 1209; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1210; XOPAVX1-NEXT: retq 1211; 1212; XOPAVX2-LABEL: splatconstant_shift_v8i32: 1213; XOPAVX2: # %bb.0: 1214; XOPAVX2-NEXT: vpslld $5, %ymm0, %ymm0 1215; XOPAVX2-NEXT: retq 1216; 1217; AVX512-LABEL: splatconstant_shift_v8i32: 1218; AVX512: # %bb.0: 1219; AVX512-NEXT: vpslld $5, %ymm0, %ymm0 1220; AVX512-NEXT: retq 1221; 1222; AVX512VL-LABEL: splatconstant_shift_v8i32: 1223; AVX512VL: # %bb.0: 1224; AVX512VL-NEXT: vpslld $5, %ymm0, %ymm0 1225; AVX512VL-NEXT: retq 1226; 1227; X32-AVX1-LABEL: splatconstant_shift_v8i32: 1228; X32-AVX1: # %bb.0: 1229; X32-AVX1-NEXT: vpslld $5, %xmm0, %xmm1 1230; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1231; X32-AVX1-NEXT: vpslld $5, %xmm0, %xmm0 1232; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1233; X32-AVX1-NEXT: retl 1234; 1235; X32-AVX2-LABEL: splatconstant_shift_v8i32: 1236; X32-AVX2: # %bb.0: 1237; X32-AVX2-NEXT: vpslld $5, %ymm0, %ymm0 1238; X32-AVX2-NEXT: retl 1239 %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 1240 ret <8 x i32> %shift 1241} 1242 1243define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind { 1244; AVX1-LABEL: splatconstant_shift_v16i16: 1245; AVX1: # %bb.0: 1246; AVX1-NEXT: vpsllw $3, %xmm0, %xmm1 1247; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1248; AVX1-NEXT: vpsllw $3, %xmm0, %xmm0 1249; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1250; AVX1-NEXT: retq 1251; 1252; AVX2-LABEL: splatconstant_shift_v16i16: 1253; AVX2: # %bb.0: 1254; AVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1255; AVX2-NEXT: retq 1256; 1257; XOPAVX1-LABEL: splatconstant_shift_v16i16: 1258; XOPAVX1: # %bb.0: 1259; XOPAVX1-NEXT: vpsllw $3, %xmm0, %xmm1 1260; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1261; XOPAVX1-NEXT: vpsllw $3, %xmm0, %xmm0 1262; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1263; XOPAVX1-NEXT: retq 1264; 1265; XOPAVX2-LABEL: splatconstant_shift_v16i16: 1266; XOPAVX2: # %bb.0: 1267; XOPAVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1268; XOPAVX2-NEXT: retq 1269; 1270; AVX512-LABEL: splatconstant_shift_v16i16: 1271; AVX512: # %bb.0: 1272; AVX512-NEXT: vpsllw $3, %ymm0, %ymm0 1273; AVX512-NEXT: retq 1274; 1275; AVX512VL-LABEL: splatconstant_shift_v16i16: 1276; AVX512VL: # %bb.0: 1277; AVX512VL-NEXT: vpsllw $3, %ymm0, %ymm0 1278; AVX512VL-NEXT: retq 1279; 1280; X32-AVX1-LABEL: splatconstant_shift_v16i16: 1281; X32-AVX1: # %bb.0: 1282; X32-AVX1-NEXT: vpsllw $3, %xmm0, %xmm1 1283; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1284; X32-AVX1-NEXT: vpsllw $3, %xmm0, %xmm0 1285; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1286; X32-AVX1-NEXT: retl 1287; 1288; X32-AVX2-LABEL: splatconstant_shift_v16i16: 1289; X32-AVX2: # %bb.0: 1290; X32-AVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1291; X32-AVX2-NEXT: retl 1292 %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 1293 ret <16 x i16> %shift 1294} 1295 1296define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind { 1297; AVX1-LABEL: splatconstant_shift_v32i8: 1298; AVX1: # %bb.0: 1299; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1300; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 1301; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248] 1302; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 1303; AVX1-NEXT: vpsllw $3, %xmm0, %xmm0 1304; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1305; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1306; AVX1-NEXT: retq 1307; 1308; AVX2-LABEL: splatconstant_shift_v32i8: 1309; AVX2: # %bb.0: 1310; AVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1311; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1312; AVX2-NEXT: retq 1313; 1314; XOPAVX1-LABEL: splatconstant_shift_v32i8: 1315; XOPAVX1: # %bb.0: 1316; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1317; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] 1318; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1 1319; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0 1320; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1321; XOPAVX1-NEXT: retq 1322; 1323; XOPAVX2-LABEL: splatconstant_shift_v32i8: 1324; XOPAVX2: # %bb.0: 1325; XOPAVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1326; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1327; XOPAVX2-NEXT: retq 1328; 1329; AVX512-LABEL: splatconstant_shift_v32i8: 1330; AVX512: # %bb.0: 1331; AVX512-NEXT: vpsllw $3, %ymm0, %ymm0 1332; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1333; AVX512-NEXT: retq 1334; 1335; AVX512VL-LABEL: splatconstant_shift_v32i8: 1336; AVX512VL: # %bb.0: 1337; AVX512VL-NEXT: vpsllw $3, %ymm0, %ymm0 1338; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1339; AVX512VL-NEXT: retq 1340; 1341; X32-AVX1-LABEL: splatconstant_shift_v32i8: 1342; X32-AVX1: # %bb.0: 1343; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1344; X32-AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 1345; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248] 1346; X32-AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 1347; X32-AVX1-NEXT: vpsllw $3, %xmm0, %xmm0 1348; X32-AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1349; X32-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1350; X32-AVX1-NEXT: retl 1351; 1352; X32-AVX2-LABEL: splatconstant_shift_v32i8: 1353; X32-AVX2: # %bb.0: 1354; X32-AVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1355; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0 1356; X32-AVX2-NEXT: retl 1357 %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 1358 ret <32 x i8> %shift 1359} 1360