1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=XOPAVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=XOPAVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512DQVL 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL 10; 11; 32-bit runs to make sure we do reasonable things for i64 shifts. 12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X86-AVX1 13; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X86-AVX2 14 15; 16; Variable Shifts 17; 18 19define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 20; AVX1-LABEL: var_shift_v4i64: 21; AVX1: # %bb.0: 22; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 23; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 24; AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm4 25; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 26; AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm2 27; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7] 28; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm3 29; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 30; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 31; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7] 32; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 33; AVX1-NEXT: retq 34; 35; AVX2-LABEL: var_shift_v4i64: 36; AVX2: # %bb.0: 37; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 38; AVX2-NEXT: retq 39; 40; XOPAVX1-LABEL: var_shift_v4i64: 41; XOPAVX1: # %bb.0: 42; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 43; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 44; XOPAVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 45; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 46; XOPAVX1-NEXT: vpshlq %xmm2, %xmm4, %xmm2 47; XOPAVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm1 48; XOPAVX1-NEXT: vpshlq %xmm1, %xmm0, %xmm0 49; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 50; XOPAVX1-NEXT: retq 51; 52; XOPAVX2-LABEL: var_shift_v4i64: 53; XOPAVX2: # %bb.0: 54; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 55; XOPAVX2-NEXT: retq 56; 57; AVX512-LABEL: var_shift_v4i64: 58; AVX512: # %bb.0: 59; AVX512-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 60; AVX512-NEXT: retq 61; 62; AVX512VL-LABEL: var_shift_v4i64: 63; AVX512VL: # %bb.0: 64; AVX512VL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 65; AVX512VL-NEXT: retq 66; 67; X86-AVX1-LABEL: var_shift_v4i64: 68; X86-AVX1: # %bb.0: 69; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 70; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 71; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm4 72; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 73; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm2 74; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7] 75; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm3 76; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 77; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 78; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7] 79; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 80; X86-AVX1-NEXT: retl 81; 82; X86-AVX2-LABEL: var_shift_v4i64: 83; X86-AVX2: # %bb.0: 84; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 85; X86-AVX2-NEXT: retl 86 %shift = lshr <4 x i64> %a, %b 87 ret <4 x i64> %shift 88} 89 90define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 91; AVX1-LABEL: var_shift_v8i32: 92; AVX1: # %bb.0: 93; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 94; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 95; AVX1-NEXT: vpsrldq {{.*#+}} xmm4 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 96; AVX1-NEXT: vpsrld %xmm4, %xmm2, %xmm4 97; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm5 98; AVX1-NEXT: vpsrld %xmm5, %xmm2, %xmm5 99; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7] 100; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 101; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm6 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] 102; AVX1-NEXT: vpsrld %xmm6, %xmm2, %xmm6 103; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero 104; AVX1-NEXT: vpsrld %xmm3, %xmm2, %xmm2 105; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7] 106; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] 107; AVX1-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 108; AVX1-NEXT: vpsrld %xmm3, %xmm0, %xmm3 109; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm4 110; AVX1-NEXT: vpsrld %xmm4, %xmm0, %xmm4 111; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7] 112; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm4 = xmm1[2],xmm5[2],xmm1[3],xmm5[3] 113; AVX1-NEXT: vpsrld %xmm4, %xmm0, %xmm4 114; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 115; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 116; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7] 117; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7] 118; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 119; AVX1-NEXT: retq 120; 121; AVX2-LABEL: var_shift_v8i32: 122; AVX2: # %bb.0: 123; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 124; AVX2-NEXT: retq 125; 126; XOPAVX1-LABEL: var_shift_v8i32: 127; XOPAVX1: # %bb.0: 128; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 129; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 130; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2 131; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 132; XOPAVX1-NEXT: vpshld %xmm2, %xmm4, %xmm2 133; XOPAVX1-NEXT: vpsubd %xmm1, %xmm3, %xmm1 134; XOPAVX1-NEXT: vpshld %xmm1, %xmm0, %xmm0 135; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 136; XOPAVX1-NEXT: retq 137; 138; XOPAVX2-LABEL: var_shift_v8i32: 139; XOPAVX2: # %bb.0: 140; XOPAVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 141; XOPAVX2-NEXT: retq 142; 143; AVX512-LABEL: var_shift_v8i32: 144; AVX512: # %bb.0: 145; AVX512-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 146; AVX512-NEXT: retq 147; 148; AVX512VL-LABEL: var_shift_v8i32: 149; AVX512VL: # %bb.0: 150; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 151; AVX512VL-NEXT: retq 152; 153; X86-AVX1-LABEL: var_shift_v8i32: 154; X86-AVX1: # %bb.0: 155; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 156; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 157; X86-AVX1-NEXT: vpsrldq {{.*#+}} xmm4 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 158; X86-AVX1-NEXT: vpsrld %xmm4, %xmm2, %xmm4 159; X86-AVX1-NEXT: vpsrlq $32, %xmm3, %xmm5 160; X86-AVX1-NEXT: vpsrld %xmm5, %xmm2, %xmm5 161; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7] 162; X86-AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 163; X86-AVX1-NEXT: vpunpckhdq {{.*#+}} xmm6 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] 164; X86-AVX1-NEXT: vpsrld %xmm6, %xmm2, %xmm6 165; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero 166; X86-AVX1-NEXT: vpsrld %xmm3, %xmm2, %xmm2 167; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7] 168; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] 169; X86-AVX1-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 170; X86-AVX1-NEXT: vpsrld %xmm3, %xmm0, %xmm3 171; X86-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm4 172; X86-AVX1-NEXT: vpsrld %xmm4, %xmm0, %xmm4 173; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7] 174; X86-AVX1-NEXT: vpunpckhdq {{.*#+}} xmm4 = xmm1[2],xmm5[2],xmm1[3],xmm5[3] 175; X86-AVX1-NEXT: vpsrld %xmm4, %xmm0, %xmm4 176; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 177; X86-AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 178; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7] 179; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7] 180; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 181; X86-AVX1-NEXT: retl 182; 183; X86-AVX2-LABEL: var_shift_v8i32: 184; X86-AVX2: # %bb.0: 185; X86-AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 186; X86-AVX2-NEXT: retl 187 %shift = lshr <8 x i32> %a, %b 188 ret <8 x i32> %shift 189} 190 191define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 192; AVX1-LABEL: var_shift_v16i16: 193; AVX1: # %bb.0: 194; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 195; AVX1-NEXT: vpsllw $12, %xmm2, %xmm3 196; AVX1-NEXT: vpsllw $4, %xmm2, %xmm2 197; AVX1-NEXT: vpor %xmm3, %xmm2, %xmm2 198; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm3 199; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 200; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm5 201; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2 202; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm4 203; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 204; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm4 205; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 206; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 207; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm4 208; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 209; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 210; AVX1-NEXT: vpsllw $12, %xmm1, %xmm3 211; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1 212; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1 213; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm3 214; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm4 215; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 216; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1 217; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 218; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1 219; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 220; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 221; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1 222; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 223; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 224; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 225; AVX1-NEXT: retq 226; 227; AVX2-LABEL: var_shift_v16i16: 228; AVX2: # %bb.0: 229; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 230; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 231; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] 232; AVX2-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3 233; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3 234; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 235; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] 236; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 237; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 238; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 239; AVX2-NEXT: retq 240; 241; XOPAVX1-LABEL: var_shift_v16i16: 242; XOPAVX1: # %bb.0: 243; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 244; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 245; XOPAVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2 246; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 247; XOPAVX1-NEXT: vpshlw %xmm2, %xmm4, %xmm2 248; XOPAVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1 249; XOPAVX1-NEXT: vpshlw %xmm1, %xmm0, %xmm0 250; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 251; XOPAVX1-NEXT: retq 252; 253; XOPAVX2-LABEL: var_shift_v16i16: 254; XOPAVX2: # %bb.0: 255; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 256; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 257; XOPAVX2-NEXT: vpsubw %xmm2, %xmm3, %xmm2 258; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 259; XOPAVX2-NEXT: vpshlw %xmm2, %xmm4, %xmm2 260; XOPAVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 261; XOPAVX2-NEXT: vpshlw %xmm1, %xmm0, %xmm0 262; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 263; XOPAVX2-NEXT: retq 264; 265; AVX512DQ-LABEL: var_shift_v16i16: 266; AVX512DQ: # %bb.0: 267; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 268; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 269; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 270; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 271; AVX512DQ-NEXT: retq 272; 273; AVX512BW-LABEL: var_shift_v16i16: 274; AVX512BW: # %bb.0: 275; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 276; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 277; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 278; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 279; AVX512BW-NEXT: retq 280; 281; AVX512DQVL-LABEL: var_shift_v16i16: 282; AVX512DQVL: # %bb.0: 283; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 284; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 285; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 286; AVX512DQVL-NEXT: vpmovdw %zmm0, %ymm0 287; AVX512DQVL-NEXT: retq 288; 289; AVX512BWVL-LABEL: var_shift_v16i16: 290; AVX512BWVL: # %bb.0: 291; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 292; AVX512BWVL-NEXT: retq 293; 294; X86-AVX1-LABEL: var_shift_v16i16: 295; X86-AVX1: # %bb.0: 296; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 297; X86-AVX1-NEXT: vpsllw $12, %xmm2, %xmm3 298; X86-AVX1-NEXT: vpsllw $4, %xmm2, %xmm2 299; X86-AVX1-NEXT: vpor %xmm3, %xmm2, %xmm2 300; X86-AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm3 301; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 302; X86-AVX1-NEXT: vpsrlw $8, %xmm4, %xmm5 303; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2 304; X86-AVX1-NEXT: vpsrlw $4, %xmm2, %xmm4 305; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 306; X86-AVX1-NEXT: vpsrlw $2, %xmm2, %xmm4 307; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 308; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 309; X86-AVX1-NEXT: vpsrlw $1, %xmm2, %xmm4 310; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 311; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 312; X86-AVX1-NEXT: vpsllw $12, %xmm1, %xmm3 313; X86-AVX1-NEXT: vpsllw $4, %xmm1, %xmm1 314; X86-AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1 315; X86-AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm3 316; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm4 317; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 318; X86-AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1 319; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 320; X86-AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1 321; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 322; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 323; X86-AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1 324; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 325; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 326; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 327; X86-AVX1-NEXT: retl 328; 329; X86-AVX2-LABEL: var_shift_v16i16: 330; X86-AVX2: # %bb.0: 331; X86-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 332; X86-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 333; X86-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] 334; X86-AVX2-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3 335; X86-AVX2-NEXT: vpsrld $16, %ymm3, %ymm3 336; X86-AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 337; X86-AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] 338; X86-AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 339; X86-AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 340; X86-AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 341; X86-AVX2-NEXT: retl 342 %shift = lshr <16 x i16> %a, %b 343 ret <16 x i16> %shift 344} 345 346define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 347; AVX1-LABEL: var_shift_v32i8: 348; AVX1: # %bb.0: 349; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 350; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3 351; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 352; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 353; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 354; AVX1-NEXT: vpsllw $5, %xmm5, %xmm5 355; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 356; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm3 357; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 358; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 359; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 360; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 361; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm3 362; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 363; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 364; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 365; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 366; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3 367; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 368; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 369; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 370; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm3 371; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 372; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 373; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 374; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm3 375; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 376; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 377; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 378; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 379; AVX1-NEXT: retq 380; 381; AVX2-LABEL: var_shift_v32i8: 382; AVX2: # %bb.0: 383; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 384; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2 385; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 386; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 387; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm2 388; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 389; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 390; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 391; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm2 392; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 393; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 394; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 395; AVX2-NEXT: retq 396; 397; XOPAVX1-LABEL: var_shift_v32i8: 398; XOPAVX1: # %bb.0: 399; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 400; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 401; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm2 402; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 403; XOPAVX1-NEXT: vpshlb %xmm2, %xmm4, %xmm2 404; XOPAVX1-NEXT: vpsubb %xmm1, %xmm3, %xmm1 405; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 406; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 407; XOPAVX1-NEXT: retq 408; 409; XOPAVX2-LABEL: var_shift_v32i8: 410; XOPAVX2: # %bb.0: 411; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 412; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 413; XOPAVX2-NEXT: vpsubb %xmm2, %xmm3, %xmm2 414; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 415; XOPAVX2-NEXT: vpshlb %xmm2, %xmm4, %xmm2 416; XOPAVX2-NEXT: vpsubb %xmm1, %xmm3, %xmm1 417; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0 418; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 419; XOPAVX2-NEXT: retq 420; 421; AVX512DQ-LABEL: var_shift_v32i8: 422; AVX512DQ: # %bb.0: 423; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 424; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm2 425; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 426; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 427; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm2 428; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 429; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 430; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 431; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm2 432; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 433; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 434; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 435; AVX512DQ-NEXT: retq 436; 437; AVX512BW-LABEL: var_shift_v32i8: 438; AVX512BW: # %bb.0: 439; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 440; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 441; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 442; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 443; AVX512BW-NEXT: retq 444; 445; AVX512DQVL-LABEL: var_shift_v32i8: 446; AVX512DQVL: # %bb.0: 447; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1 448; AVX512DQVL-NEXT: vpsrlw $4, %ymm0, %ymm2 449; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 450; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 451; AVX512DQVL-NEXT: vpsrlw $2, %ymm0, %ymm2 452; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 453; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 454; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 455; AVX512DQVL-NEXT: vpsrlw $1, %ymm0, %ymm2 456; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 457; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 458; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 459; AVX512DQVL-NEXT: retq 460; 461; AVX512BWVL-LABEL: var_shift_v32i8: 462; AVX512BWVL: # %bb.0: 463; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 464; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 465; AVX512BWVL-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 466; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 467; AVX512BWVL-NEXT: retq 468; 469; X86-AVX1-LABEL: var_shift_v32i8: 470; X86-AVX1: # %bb.0: 471; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 472; X86-AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3 473; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 474; X86-AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 475; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 476; X86-AVX1-NEXT: vpsllw $5, %xmm5, %xmm5 477; X86-AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 478; X86-AVX1-NEXT: vpsrlw $2, %xmm2, %xmm3 479; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 480; X86-AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 481; X86-AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 482; X86-AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 483; X86-AVX1-NEXT: vpsrlw $1, %xmm2, %xmm3 484; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 485; X86-AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 486; X86-AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 487; X86-AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 488; X86-AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3 489; X86-AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 490; X86-AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 491; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 492; X86-AVX1-NEXT: vpsrlw $2, %xmm0, %xmm3 493; X86-AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 494; X86-AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 495; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 496; X86-AVX1-NEXT: vpsrlw $1, %xmm0, %xmm3 497; X86-AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 498; X86-AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 499; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 500; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 501; X86-AVX1-NEXT: retl 502; 503; X86-AVX2-LABEL: var_shift_v32i8: 504; X86-AVX2: # %bb.0: 505; X86-AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 506; X86-AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2 507; X86-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 508; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 509; X86-AVX2-NEXT: vpsrlw $2, %ymm0, %ymm2 510; X86-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 511; X86-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 512; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 513; X86-AVX2-NEXT: vpsrlw $1, %ymm0, %ymm2 514; X86-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 515; X86-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 516; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 517; X86-AVX2-NEXT: retl 518 %shift = lshr <32 x i8> %a, %b 519 ret <32 x i8> %shift 520} 521 522; 523; Uniform Variable Shifts 524; 525 526define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 527; AVX1-LABEL: splatvar_shift_v4i64: 528; AVX1: # %bb.0: 529; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 530; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 531; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 532; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 533; AVX1-NEXT: retq 534; 535; AVX2-LABEL: splatvar_shift_v4i64: 536; AVX2: # %bb.0: 537; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 538; AVX2-NEXT: retq 539; 540; XOPAVX1-LABEL: splatvar_shift_v4i64: 541; XOPAVX1: # %bb.0: 542; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 543; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 544; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 545; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 546; XOPAVX1-NEXT: retq 547; 548; XOPAVX2-LABEL: splatvar_shift_v4i64: 549; XOPAVX2: # %bb.0: 550; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 551; XOPAVX2-NEXT: retq 552; 553; AVX512-LABEL: splatvar_shift_v4i64: 554; AVX512: # %bb.0: 555; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 556; AVX512-NEXT: retq 557; 558; AVX512VL-LABEL: splatvar_shift_v4i64: 559; AVX512VL: # %bb.0: 560; AVX512VL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 561; AVX512VL-NEXT: retq 562; 563; X86-AVX1-LABEL: splatvar_shift_v4i64: 564; X86-AVX1: # %bb.0: 565; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 566; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 567; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 568; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 569; X86-AVX1-NEXT: retl 570; 571; X86-AVX2-LABEL: splatvar_shift_v4i64: 572; X86-AVX2: # %bb.0: 573; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 574; X86-AVX2-NEXT: retl 575 %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer 576 %shift = lshr <4 x i64> %a, %splat 577 ret <4 x i64> %shift 578} 579 580define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 581; AVX1-LABEL: splatvar_shift_v8i32: 582; AVX1: # %bb.0: 583; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 584; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 585; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 586; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 587; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 588; AVX1-NEXT: retq 589; 590; AVX2-LABEL: splatvar_shift_v8i32: 591; AVX2: # %bb.0: 592; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 593; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0 594; AVX2-NEXT: retq 595; 596; XOPAVX1-LABEL: splatvar_shift_v8i32: 597; XOPAVX1: # %bb.0: 598; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 599; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 600; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 601; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 602; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 603; XOPAVX1-NEXT: retq 604; 605; XOPAVX2-LABEL: splatvar_shift_v8i32: 606; XOPAVX2: # %bb.0: 607; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 608; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0 609; XOPAVX2-NEXT: retq 610; 611; AVX512-LABEL: splatvar_shift_v8i32: 612; AVX512: # %bb.0: 613; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 614; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0 615; AVX512-NEXT: retq 616; 617; AVX512VL-LABEL: splatvar_shift_v8i32: 618; AVX512VL: # %bb.0: 619; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 620; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 621; AVX512VL-NEXT: retq 622; 623; X86-AVX1-LABEL: splatvar_shift_v8i32: 624; X86-AVX1: # %bb.0: 625; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 626; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 627; X86-AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 628; X86-AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 629; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 630; X86-AVX1-NEXT: retl 631; 632; X86-AVX2-LABEL: splatvar_shift_v8i32: 633; X86-AVX2: # %bb.0: 634; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 635; X86-AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0 636; X86-AVX2-NEXT: retl 637 %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer 638 %shift = lshr <8 x i32> %a, %splat 639 ret <8 x i32> %shift 640} 641 642define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 643; AVX1-LABEL: splatvar_shift_v16i16: 644; AVX1: # %bb.0: 645; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 646; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 647; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 648; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 649; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 650; AVX1-NEXT: retq 651; 652; AVX2-LABEL: splatvar_shift_v16i16: 653; AVX2: # %bb.0: 654; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 655; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 656; AVX2-NEXT: retq 657; 658; XOPAVX1-LABEL: splatvar_shift_v16i16: 659; XOPAVX1: # %bb.0: 660; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 661; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 662; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 663; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 664; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 665; XOPAVX1-NEXT: retq 666; 667; XOPAVX2-LABEL: splatvar_shift_v16i16: 668; XOPAVX2: # %bb.0: 669; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 670; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 671; XOPAVX2-NEXT: retq 672; 673; AVX512-LABEL: splatvar_shift_v16i16: 674; AVX512: # %bb.0: 675; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 676; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 677; AVX512-NEXT: retq 678; 679; AVX512VL-LABEL: splatvar_shift_v16i16: 680; AVX512VL: # %bb.0: 681; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 682; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 683; AVX512VL-NEXT: retq 684; 685; X86-AVX1-LABEL: splatvar_shift_v16i16: 686; X86-AVX1: # %bb.0: 687; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 688; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 689; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 690; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 691; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 692; X86-AVX1-NEXT: retl 693; 694; X86-AVX2-LABEL: splatvar_shift_v16i16: 695; X86-AVX2: # %bb.0: 696; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 697; X86-AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 698; X86-AVX2-NEXT: retl 699 %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer 700 %shift = lshr <16 x i16> %a, %splat 701 ret <16 x i16> %shift 702} 703 704define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 705; AVX1-LABEL: splatvar_shift_v32i8: 706; AVX1: # %bb.0: 707; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 708; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 709; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 710; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 711; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3 712; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 713; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 714; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 715; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 716; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 717; AVX1-NEXT: retq 718; 719; AVX2-LABEL: splatvar_shift_v32i8: 720; AVX2: # %bb.0: 721; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 722; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 723; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 724; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm1 725; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1 726; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 727; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 728; AVX2-NEXT: retq 729; 730; XOPAVX1-LABEL: splatvar_shift_v32i8: 731; XOPAVX1: # %bb.0: 732; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 733; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 734; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1 735; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 736; XOPAVX1-NEXT: vpshlb %xmm1, %xmm2, %xmm2 737; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 738; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 739; XOPAVX1-NEXT: retq 740; 741; XOPAVX2-LABEL: splatvar_shift_v32i8: 742; XOPAVX2: # %bb.0: 743; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1 744; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 745; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1 746; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 747; XOPAVX2-NEXT: vpshlb %xmm1, %xmm2, %xmm2 748; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0 749; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 750; XOPAVX2-NEXT: retq 751; 752; AVX512DQ-LABEL: splatvar_shift_v32i8: 753; AVX512DQ: # %bb.0: 754; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 755; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 756; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 757; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm2, %xmm1 758; AVX512DQ-NEXT: vpsrlw $8, %xmm1, %xmm1 759; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1 760; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0 761; AVX512DQ-NEXT: retq 762; 763; AVX512BW-LABEL: splatvar_shift_v32i8: 764; AVX512BW: # %bb.0: 765; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 766; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 767; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 768; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 769; AVX512BW-NEXT: retq 770; 771; AVX512DQVL-LABEL: splatvar_shift_v32i8: 772; AVX512DQVL: # %bb.0: 773; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 774; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 775; AVX512DQVL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 776; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm2, %xmm1 777; AVX512DQVL-NEXT: vpsrlw $8, %xmm1, %xmm1 778; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %ymm1 779; AVX512DQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 780; AVX512DQVL-NEXT: retq 781; 782; AVX512BWVL-LABEL: splatvar_shift_v32i8: 783; AVX512BWVL: # %bb.0: 784; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 785; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 786; AVX512BWVL-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 787; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 788; AVX512BWVL-NEXT: retq 789; 790; X86-AVX1-LABEL: splatvar_shift_v32i8: 791; X86-AVX1: # %bb.0: 792; X86-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 793; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 794; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 795; X86-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 796; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3 797; X86-AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 798; X86-AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 799; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 800; X86-AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 801; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 802; X86-AVX1-NEXT: retl 803; 804; X86-AVX2-LABEL: splatvar_shift_v32i8: 805; X86-AVX2: # %bb.0: 806; X86-AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 807; X86-AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 808; X86-AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 809; X86-AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm1 810; X86-AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1 811; X86-AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 812; X86-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 813; X86-AVX2-NEXT: retl 814 %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer 815 %shift = lshr <32 x i8> %a, %splat 816 ret <32 x i8> %shift 817} 818 819; 820; Constant Shifts 821; 822 823define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind { 824; AVX1-LABEL: constant_shift_v4i64: 825; AVX1: # %bb.0: 826; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 827; AVX1-NEXT: vpsrlq $62, %xmm1, %xmm2 828; AVX1-NEXT: vpsrlq $31, %xmm1, %xmm1 829; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 830; AVX1-NEXT: vpsrlq $7, %xmm0, %xmm2 831; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 832; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 833; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 834; AVX1-NEXT: retq 835; 836; AVX2-LABEL: constant_shift_v4i64: 837; AVX2: # %bb.0: 838; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 839; AVX2-NEXT: retq 840; 841; XOPAVX1-LABEL: constant_shift_v4i64: 842; XOPAVX1: # %bb.0: 843; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm1 844; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 845; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm0 846; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 847; XOPAVX1-NEXT: retq 848; 849; XOPAVX2-LABEL: constant_shift_v4i64: 850; XOPAVX2: # %bb.0: 851; XOPAVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 852; XOPAVX2-NEXT: retq 853; 854; AVX512-LABEL: constant_shift_v4i64: 855; AVX512: # %bb.0: 856; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 857; AVX512-NEXT: retq 858; 859; AVX512VL-LABEL: constant_shift_v4i64: 860; AVX512VL: # %bb.0: 861; AVX512VL-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 862; AVX512VL-NEXT: retq 863; 864; X86-AVX1-LABEL: constant_shift_v4i64: 865; X86-AVX1: # %bb.0: 866; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 867; X86-AVX1-NEXT: vpsrlq $62, %xmm1, %xmm2 868; X86-AVX1-NEXT: vpsrlq $31, %xmm1, %xmm1 869; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 870; X86-AVX1-NEXT: vpsrlq $7, %xmm0, %xmm2 871; X86-AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 872; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 873; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 874; X86-AVX1-NEXT: retl 875; 876; X86-AVX2-LABEL: constant_shift_v4i64: 877; X86-AVX2: # %bb.0: 878; X86-AVX2-NEXT: vpsrlvq {{\.LCPI.*}}, %ymm0, %ymm0 879; X86-AVX2-NEXT: retl 880 %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62> 881 ret <4 x i64> %shift 882} 883 884define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind { 885; AVX1-LABEL: constant_shift_v8i32: 886; AVX1: # %bb.0: 887; AVX1-NEXT: vpsrld $7, %xmm0, %xmm1 888; AVX1-NEXT: vpsrld $5, %xmm0, %xmm2 889; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 890; AVX1-NEXT: vpsrld $6, %xmm0, %xmm2 891; AVX1-NEXT: vpsrld $4, %xmm0, %xmm3 892; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 893; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 894; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 895; AVX1-NEXT: vpsrld $7, %xmm0, %xmm2 896; AVX1-NEXT: vpsrld $9, %xmm0, %xmm3 897; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 898; AVX1-NEXT: vpsrld $8, %xmm0, %xmm0 899; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 900; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 901; AVX1-NEXT: retq 902; 903; AVX2-LABEL: constant_shift_v8i32: 904; AVX2: # %bb.0: 905; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 906; AVX2-NEXT: retq 907; 908; XOPAVX1-LABEL: constant_shift_v8i32: 909; XOPAVX1: # %bb.0: 910; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm1 911; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 912; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0 913; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 914; XOPAVX1-NEXT: retq 915; 916; XOPAVX2-LABEL: constant_shift_v8i32: 917; XOPAVX2: # %bb.0: 918; XOPAVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 919; XOPAVX2-NEXT: retq 920; 921; AVX512-LABEL: constant_shift_v8i32: 922; AVX512: # %bb.0: 923; AVX512-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 924; AVX512-NEXT: retq 925; 926; AVX512VL-LABEL: constant_shift_v8i32: 927; AVX512VL: # %bb.0: 928; AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 929; AVX512VL-NEXT: retq 930; 931; X86-AVX1-LABEL: constant_shift_v8i32: 932; X86-AVX1: # %bb.0: 933; X86-AVX1-NEXT: vpsrld $7, %xmm0, %xmm1 934; X86-AVX1-NEXT: vpsrld $5, %xmm0, %xmm2 935; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 936; X86-AVX1-NEXT: vpsrld $6, %xmm0, %xmm2 937; X86-AVX1-NEXT: vpsrld $4, %xmm0, %xmm3 938; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 939; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 940; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 941; X86-AVX1-NEXT: vpsrld $7, %xmm0, %xmm2 942; X86-AVX1-NEXT: vpsrld $9, %xmm0, %xmm3 943; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 944; X86-AVX1-NEXT: vpsrld $8, %xmm0, %xmm0 945; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 946; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 947; X86-AVX1-NEXT: retl 948; 949; X86-AVX2-LABEL: constant_shift_v8i32: 950; X86-AVX2: # %bb.0: 951; X86-AVX2-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm0, %ymm0 952; X86-AVX2-NEXT: retl 953 %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7> 954 ret <8 x i32> %shift 955} 956 957define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { 958; AVX1-LABEL: constant_shift_v16i16: 959; AVX1: # %bb.0: 960; AVX1-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1 961; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] 962; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 963; AVX1-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm0 964; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 965; AVX1-NEXT: retq 966; 967; AVX2-LABEL: constant_shift_v16i16: 968; AVX2: # %bb.0: 969; AVX2-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1 970; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] 971; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 972; AVX2-NEXT: retq 973; 974; XOPAVX1-LABEL: constant_shift_v16i16: 975; XOPAVX1: # %bb.0: 976; XOPAVX1-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm1 977; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 978; XOPAVX1-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0 979; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 980; XOPAVX1-NEXT: retq 981; 982; XOPAVX2-LABEL: constant_shift_v16i16: 983; XOPAVX2: # %bb.0: 984; XOPAVX2-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1 985; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] 986; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 987; XOPAVX2-NEXT: retq 988; 989; AVX512DQ-LABEL: constant_shift_v16i16: 990; AVX512DQ: # %bb.0: 991; AVX512DQ-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1 992; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] 993; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 994; AVX512DQ-NEXT: retq 995; 996; AVX512BW-LABEL: constant_shift_v16i16: 997; AVX512BW: # %bb.0: 998; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 999; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1000; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 1001; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1002; AVX512BW-NEXT: retq 1003; 1004; AVX512DQVL-LABEL: constant_shift_v16i16: 1005; AVX512DQVL: # %bb.0: 1006; AVX512DQVL-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1 1007; AVX512DQVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] 1008; AVX512DQVL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1009; AVX512DQVL-NEXT: retq 1010; 1011; AVX512BWVL-LABEL: constant_shift_v16i16: 1012; AVX512BWVL: # %bb.0: 1013; AVX512BWVL-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm0 1014; AVX512BWVL-NEXT: retq 1015; 1016; X86-AVX1-LABEL: constant_shift_v16i16: 1017; X86-AVX1: # %bb.0: 1018; X86-AVX1-NEXT: vpmulhuw {{\.LCPI.*}}, %xmm0, %xmm1 1019; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] 1020; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1021; X86-AVX1-NEXT: vpmulhuw {{\.LCPI.*}}, %xmm0, %xmm0 1022; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1023; X86-AVX1-NEXT: retl 1024; 1025; X86-AVX2-LABEL: constant_shift_v16i16: 1026; X86-AVX2: # %bb.0: 1027; X86-AVX2-NEXT: vpmulhuw {{\.LCPI.*}}, %ymm0, %ymm1 1028; X86-AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] 1029; X86-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1030; X86-AVX2-NEXT: retl 1031 %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 1032 ret <16 x i16> %shift 1033} 1034 1035define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { 1036; AVX1-LABEL: constant_shift_v32i8: 1037; AVX1: # %bb.0: 1038; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1039; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1040; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1041; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [2,4,8,16,32,64,128,256] 1042; AVX1-NEXT: vpmullw %xmm4, %xmm3, %xmm3 1043; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 1044; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 1045; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [256,128,64,32,16,8,4,2] 1046; AVX1-NEXT: vpmullw %xmm5, %xmm1, %xmm1 1047; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 1048; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 1049; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] 1050; AVX1-NEXT: vpmullw %xmm4, %xmm2, %xmm2 1051; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 1052; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1053; AVX1-NEXT: vpmullw %xmm5, %xmm0, %xmm0 1054; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 1055; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 1056; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1057; AVX1-NEXT: retq 1058; 1059; AVX2-LABEL: constant_shift_v32i8: 1060; AVX2: # %bb.0: 1061; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1062; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 1063; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm2, %ymm2 1064; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 1065; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 1066; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 1067; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 1068; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 1069; AVX2-NEXT: retq 1070; 1071; XOPAVX1-LABEL: constant_shift_v32i8: 1072; XOPAVX1: # %bb.0: 1073; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1074; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,254,253,252,251,250,249,249,250,251,252,253,254,255,0] 1075; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1 1076; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0 1077; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1078; XOPAVX1-NEXT: retq 1079; 1080; XOPAVX2-LABEL: constant_shift_v32i8: 1081; XOPAVX2: # %bb.0: 1082; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1083; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,254,253,252,251,250,249,249,250,251,252,253,254,255,0] 1084; XOPAVX2-NEXT: vpshlb %xmm2, %xmm1, %xmm1 1085; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm0 1086; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1087; XOPAVX2-NEXT: retq 1088; 1089; AVX512DQ-LABEL: constant_shift_v32i8: 1090; AVX512DQ: # %bb.0: 1091; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 1092; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 1093; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %ymm2, %ymm2 1094; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 1095; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 1096; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 1097; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 1098; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 1099; AVX512DQ-NEXT: retq 1100; 1101; AVX512BW-LABEL: constant_shift_v32i8: 1102; AVX512BW: # %bb.0: 1103; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1104; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0 1105; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 1106; AVX512BW-NEXT: retq 1107; 1108; AVX512DQVL-LABEL: constant_shift_v32i8: 1109; AVX512DQVL: # %bb.0: 1110; AVX512DQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1111; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 1112; AVX512DQVL-NEXT: vpmullw {{.*}}(%rip), %ymm2, %ymm2 1113; AVX512DQVL-NEXT: vpsrlw $8, %ymm2, %ymm2 1114; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 1115; AVX512DQVL-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 1116; AVX512DQVL-NEXT: vpsrlw $8, %ymm0, %ymm0 1117; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 1118; AVX512DQVL-NEXT: retq 1119; 1120; AVX512BWVL-LABEL: constant_shift_v32i8: 1121; AVX512BWVL: # %bb.0: 1122; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1123; AVX512BWVL-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0 1124; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 1125; AVX512BWVL-NEXT: retq 1126; 1127; X86-AVX1-LABEL: constant_shift_v32i8: 1128; X86-AVX1: # %bb.0: 1129; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1130; X86-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1131; X86-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1132; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [2,4,8,16,32,64,128,256] 1133; X86-AVX1-NEXT: vpmullw %xmm4, %xmm3, %xmm3 1134; X86-AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 1135; X86-AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 1136; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [256,128,64,32,16,8,4,2] 1137; X86-AVX1-NEXT: vpmullw %xmm5, %xmm1, %xmm1 1138; X86-AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 1139; X86-AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 1140; X86-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] 1141; X86-AVX1-NEXT: vpmullw %xmm4, %xmm2, %xmm2 1142; X86-AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 1143; X86-AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1144; X86-AVX1-NEXT: vpmullw %xmm5, %xmm0, %xmm0 1145; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 1146; X86-AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 1147; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1148; X86-AVX1-NEXT: retl 1149; 1150; X86-AVX2-LABEL: constant_shift_v32i8: 1151; X86-AVX2: # %bb.0: 1152; X86-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1153; X86-AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 1154; X86-AVX2-NEXT: vpmullw {{\.LCPI.*}}, %ymm2, %ymm2 1155; X86-AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 1156; X86-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 1157; X86-AVX2-NEXT: vpmullw {{\.LCPI.*}}, %ymm0, %ymm0 1158; X86-AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 1159; X86-AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 1160; X86-AVX2-NEXT: retl 1161 %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 1162 ret <32 x i8> %shift 1163} 1164 1165; 1166; Uniform Constant Shifts 1167; 1168 1169define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind { 1170; AVX1-LABEL: splatconstant_shift_v4i64: 1171; AVX1: # %bb.0: 1172; AVX1-NEXT: vpsrlq $7, %xmm0, %xmm1 1173; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1174; AVX1-NEXT: vpsrlq $7, %xmm0, %xmm0 1175; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1176; AVX1-NEXT: retq 1177; 1178; AVX2-LABEL: splatconstant_shift_v4i64: 1179; AVX2: # %bb.0: 1180; AVX2-NEXT: vpsrlq $7, %ymm0, %ymm0 1181; AVX2-NEXT: retq 1182; 1183; XOPAVX1-LABEL: splatconstant_shift_v4i64: 1184; XOPAVX1: # %bb.0: 1185; XOPAVX1-NEXT: vpsrlq $7, %xmm0, %xmm1 1186; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1187; XOPAVX1-NEXT: vpsrlq $7, %xmm0, %xmm0 1188; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1189; XOPAVX1-NEXT: retq 1190; 1191; XOPAVX2-LABEL: splatconstant_shift_v4i64: 1192; XOPAVX2: # %bb.0: 1193; XOPAVX2-NEXT: vpsrlq $7, %ymm0, %ymm0 1194; XOPAVX2-NEXT: retq 1195; 1196; AVX512-LABEL: splatconstant_shift_v4i64: 1197; AVX512: # %bb.0: 1198; AVX512-NEXT: vpsrlq $7, %ymm0, %ymm0 1199; AVX512-NEXT: retq 1200; 1201; AVX512VL-LABEL: splatconstant_shift_v4i64: 1202; AVX512VL: # %bb.0: 1203; AVX512VL-NEXT: vpsrlq $7, %ymm0, %ymm0 1204; AVX512VL-NEXT: retq 1205; 1206; X86-AVX1-LABEL: splatconstant_shift_v4i64: 1207; X86-AVX1: # %bb.0: 1208; X86-AVX1-NEXT: vpsrlq $7, %xmm0, %xmm1 1209; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1210; X86-AVX1-NEXT: vpsrlq $7, %xmm0, %xmm0 1211; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1212; X86-AVX1-NEXT: retl 1213; 1214; X86-AVX2-LABEL: splatconstant_shift_v4i64: 1215; X86-AVX2: # %bb.0: 1216; X86-AVX2-NEXT: vpsrlq $7, %ymm0, %ymm0 1217; X86-AVX2-NEXT: retl 1218 %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7> 1219 ret <4 x i64> %shift 1220} 1221 1222define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind { 1223; AVX1-LABEL: splatconstant_shift_v8i32: 1224; AVX1: # %bb.0: 1225; AVX1-NEXT: vpsrld $5, %xmm0, %xmm1 1226; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1227; AVX1-NEXT: vpsrld $5, %xmm0, %xmm0 1228; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1229; AVX1-NEXT: retq 1230; 1231; AVX2-LABEL: splatconstant_shift_v8i32: 1232; AVX2: # %bb.0: 1233; AVX2-NEXT: vpsrld $5, %ymm0, %ymm0 1234; AVX2-NEXT: retq 1235; 1236; XOPAVX1-LABEL: splatconstant_shift_v8i32: 1237; XOPAVX1: # %bb.0: 1238; XOPAVX1-NEXT: vpsrld $5, %xmm0, %xmm1 1239; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1240; XOPAVX1-NEXT: vpsrld $5, %xmm0, %xmm0 1241; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1242; XOPAVX1-NEXT: retq 1243; 1244; XOPAVX2-LABEL: splatconstant_shift_v8i32: 1245; XOPAVX2: # %bb.0: 1246; XOPAVX2-NEXT: vpsrld $5, %ymm0, %ymm0 1247; XOPAVX2-NEXT: retq 1248; 1249; AVX512-LABEL: splatconstant_shift_v8i32: 1250; AVX512: # %bb.0: 1251; AVX512-NEXT: vpsrld $5, %ymm0, %ymm0 1252; AVX512-NEXT: retq 1253; 1254; AVX512VL-LABEL: splatconstant_shift_v8i32: 1255; AVX512VL: # %bb.0: 1256; AVX512VL-NEXT: vpsrld $5, %ymm0, %ymm0 1257; AVX512VL-NEXT: retq 1258; 1259; X86-AVX1-LABEL: splatconstant_shift_v8i32: 1260; X86-AVX1: # %bb.0: 1261; X86-AVX1-NEXT: vpsrld $5, %xmm0, %xmm1 1262; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1263; X86-AVX1-NEXT: vpsrld $5, %xmm0, %xmm0 1264; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1265; X86-AVX1-NEXT: retl 1266; 1267; X86-AVX2-LABEL: splatconstant_shift_v8i32: 1268; X86-AVX2: # %bb.0: 1269; X86-AVX2-NEXT: vpsrld $5, %ymm0, %ymm0 1270; X86-AVX2-NEXT: retl 1271 %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 1272 ret <8 x i32> %shift 1273} 1274 1275define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind { 1276; AVX1-LABEL: splatconstant_shift_v16i16: 1277; AVX1: # %bb.0: 1278; AVX1-NEXT: vpsrlw $3, %xmm0, %xmm1 1279; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1280; AVX1-NEXT: vpsrlw $3, %xmm0, %xmm0 1281; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1282; AVX1-NEXT: retq 1283; 1284; AVX2-LABEL: splatconstant_shift_v16i16: 1285; AVX2: # %bb.0: 1286; AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0 1287; AVX2-NEXT: retq 1288; 1289; XOPAVX1-LABEL: splatconstant_shift_v16i16: 1290; XOPAVX1: # %bb.0: 1291; XOPAVX1-NEXT: vpsrlw $3, %xmm0, %xmm1 1292; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1293; XOPAVX1-NEXT: vpsrlw $3, %xmm0, %xmm0 1294; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1295; XOPAVX1-NEXT: retq 1296; 1297; XOPAVX2-LABEL: splatconstant_shift_v16i16: 1298; XOPAVX2: # %bb.0: 1299; XOPAVX2-NEXT: vpsrlw $3, %ymm0, %ymm0 1300; XOPAVX2-NEXT: retq 1301; 1302; AVX512-LABEL: splatconstant_shift_v16i16: 1303; AVX512: # %bb.0: 1304; AVX512-NEXT: vpsrlw $3, %ymm0, %ymm0 1305; AVX512-NEXT: retq 1306; 1307; AVX512VL-LABEL: splatconstant_shift_v16i16: 1308; AVX512VL: # %bb.0: 1309; AVX512VL-NEXT: vpsrlw $3, %ymm0, %ymm0 1310; AVX512VL-NEXT: retq 1311; 1312; X86-AVX1-LABEL: splatconstant_shift_v16i16: 1313; X86-AVX1: # %bb.0: 1314; X86-AVX1-NEXT: vpsrlw $3, %xmm0, %xmm1 1315; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1316; X86-AVX1-NEXT: vpsrlw $3, %xmm0, %xmm0 1317; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1318; X86-AVX1-NEXT: retl 1319; 1320; X86-AVX2-LABEL: splatconstant_shift_v16i16: 1321; X86-AVX2: # %bb.0: 1322; X86-AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0 1323; X86-AVX2-NEXT: retl 1324 %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 1325 ret <16 x i16> %shift 1326} 1327 1328define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind { 1329; AVX1-LABEL: splatconstant_shift_v32i8: 1330; AVX1: # %bb.0: 1331; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1332; AVX1-NEXT: vpsrlw $3, %xmm1, %xmm1 1333; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] 1334; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 1335; AVX1-NEXT: vpsrlw $3, %xmm0, %xmm0 1336; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1337; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1338; AVX1-NEXT: retq 1339; 1340; AVX2-LABEL: splatconstant_shift_v32i8: 1341; AVX2: # %bb.0: 1342; AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0 1343; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1344; AVX2-NEXT: retq 1345; 1346; XOPAVX1-LABEL: splatconstant_shift_v32i8: 1347; XOPAVX1: # %bb.0: 1348; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1349; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253] 1350; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1 1351; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0 1352; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1353; XOPAVX1-NEXT: retq 1354; 1355; XOPAVX2-LABEL: splatconstant_shift_v32i8: 1356; XOPAVX2: # %bb.0: 1357; XOPAVX2-NEXT: vpsrlw $3, %ymm0, %ymm0 1358; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1359; XOPAVX2-NEXT: retq 1360; 1361; AVX512-LABEL: splatconstant_shift_v32i8: 1362; AVX512: # %bb.0: 1363; AVX512-NEXT: vpsrlw $3, %ymm0, %ymm0 1364; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1365; AVX512-NEXT: retq 1366; 1367; AVX512VL-LABEL: splatconstant_shift_v32i8: 1368; AVX512VL: # %bb.0: 1369; AVX512VL-NEXT: vpsrlw $3, %ymm0, %ymm0 1370; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1371; AVX512VL-NEXT: retq 1372; 1373; X86-AVX1-LABEL: splatconstant_shift_v32i8: 1374; X86-AVX1: # %bb.0: 1375; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1376; X86-AVX1-NEXT: vpsrlw $3, %xmm1, %xmm1 1377; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] 1378; X86-AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 1379; X86-AVX1-NEXT: vpsrlw $3, %xmm0, %xmm0 1380; X86-AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1381; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1382; X86-AVX1-NEXT: retl 1383; 1384; X86-AVX2-LABEL: splatconstant_shift_v32i8: 1385; X86-AVX2: # %bb.0: 1386; X86-AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0 1387; X86-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0 1388; X86-AVX2-NEXT: retl 1389 %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 1390 ret <32 x i8> %shift 1391} 1392 1393define <4 x i32> @sh_trunc_sh_vec(<4 x i64> %x) { 1394; AVX1-LABEL: sh_trunc_sh_vec: 1395; AVX1: # %bb.0: 1396; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1397; AVX1-NEXT: vpsrlq $36, %xmm1, %xmm1 1398; AVX1-NEXT: vpsrlq $36, %xmm0, %xmm0 1399; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1400; AVX1-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 1401; AVX1-NEXT: vzeroupper 1402; AVX1-NEXT: retq 1403; 1404; AVX2-LABEL: sh_trunc_sh_vec: 1405; AVX2: # %bb.0: 1406; AVX2-NEXT: vpsrlq $36, %ymm0, %ymm0 1407; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1408; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1409; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [1048575,1048575,1048575,1048575] 1410; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 1411; AVX2-NEXT: vzeroupper 1412; AVX2-NEXT: retq 1413; 1414; XOPAVX1-LABEL: sh_trunc_sh_vec: 1415; XOPAVX1: # %bb.0: 1416; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1417; XOPAVX1-NEXT: vpsrlq $36, %xmm1, %xmm1 1418; XOPAVX1-NEXT: vpsrlq $36, %xmm0, %xmm0 1419; XOPAVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1420; XOPAVX1-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 1421; XOPAVX1-NEXT: vzeroupper 1422; XOPAVX1-NEXT: retq 1423; 1424; XOPAVX2-LABEL: sh_trunc_sh_vec: 1425; XOPAVX2: # %bb.0: 1426; XOPAVX2-NEXT: vpsrlq $36, %ymm0, %ymm0 1427; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1428; XOPAVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1429; XOPAVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [1048575,1048575,1048575,1048575] 1430; XOPAVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 1431; XOPAVX2-NEXT: vzeroupper 1432; XOPAVX2-NEXT: retq 1433; 1434; AVX512-LABEL: sh_trunc_sh_vec: 1435; AVX512: # %bb.0: 1436; AVX512-NEXT: vpsrlq $36, %ymm0, %ymm0 1437; AVX512-NEXT: vpmovqd %zmm0, %ymm0 1438; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1048575,1048575,1048575,1048575] 1439; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 1440; AVX512-NEXT: vzeroupper 1441; AVX512-NEXT: retq 1442; 1443; AVX512VL-LABEL: sh_trunc_sh_vec: 1444; AVX512VL: # %bb.0: 1445; AVX512VL-NEXT: vpsrlq $36, %ymm0, %ymm0 1446; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0 1447; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 1448; AVX512VL-NEXT: vzeroupper 1449; AVX512VL-NEXT: retq 1450; 1451; X86-AVX1-LABEL: sh_trunc_sh_vec: 1452; X86-AVX1: # %bb.0: 1453; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1454; X86-AVX1-NEXT: vpsrlq $36, %xmm1, %xmm1 1455; X86-AVX1-NEXT: vpsrlq $36, %xmm0, %xmm0 1456; X86-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1457; X86-AVX1-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 1458; X86-AVX1-NEXT: vzeroupper 1459; X86-AVX1-NEXT: retl 1460; 1461; X86-AVX2-LABEL: sh_trunc_sh_vec: 1462; X86-AVX2: # %bb.0: 1463; X86-AVX2-NEXT: vpsrlq $36, %ymm0, %ymm0 1464; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1465; X86-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1466; X86-AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [1048575,1048575,1048575,1048575] 1467; X86-AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 1468; X86-AVX2-NEXT: vzeroupper 1469; X86-AVX2-NEXT: retl 1470 %s = lshr <4 x i64> %x, <i64 24, i64 24, i64 24, i64 24> 1471 %t = trunc <4 x i64> %s to <4 x i32> 1472 %r = lshr <4 x i32> %t, <i32 12, i32 12, i32 12, i32 12> 1473 ret <4 x i32> %r 1474} 1475