1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2 6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW 7; 8; Variable Shifts 9; 10 11define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 12; AVX1-LABEL: var_shift_v4i64: 13; AVX1: # BB#0: 14; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 15; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 16; AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm4 17; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,0,1] 18; AVX1-NEXT: vpsrlq %xmm5, %xmm3, %xmm6 19; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm6[4,5,6,7] 20; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 21; AVX1-NEXT: vpsrlq %xmm2, %xmm6, %xmm2 22; AVX1-NEXT: vpsrlq %xmm5, %xmm6, %xmm5 23; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm5[4,5,6,7] 24; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 25; AVX1-NEXT: vpsubq %xmm4, %xmm2, %xmm2 26; AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm4 27; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm1[2,3,0,1] 28; AVX1-NEXT: vpsrlq %xmm5, %xmm3, %xmm3 29; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7] 30; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm1 31; AVX1-NEXT: vpsrlq %xmm5, %xmm0, %xmm0 32; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 33; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 34; AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0 35; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 36; AVX1-NEXT: retq 37; 38; AVX2-LABEL: var_shift_v4i64: 39; AVX2: # BB#0: 40; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 41; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3 42; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 43; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 44; AVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0 45; AVX2-NEXT: retq 46; 47; XOPAVX1-LABEL: var_shift_v4i64: 48; XOPAVX1: # BB#0: 49; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 50; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 51; XOPAVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 52; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 53; XOPAVX1-NEXT: vpshaq %xmm2, %xmm4, %xmm2 54; XOPAVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm1 55; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0 56; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 57; XOPAVX1-NEXT: retq 58; 59; XOPAVX2-LABEL: var_shift_v4i64: 60; XOPAVX2: # BB#0: 61; XOPAVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 62; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3 63; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 64; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 65; XOPAVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0 66; XOPAVX2-NEXT: retq 67; 68; AVX512-LABEL: var_shift_v4i64: 69; AVX512: ## BB#0: 70; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 71; AVX512-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3 72; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm0 73; AVX512-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 74; AVX512-NEXT: vpsubq %ymm3, %ymm0, %ymm0 75; AVX512-NEXT: retq 76 %shift = ashr <4 x i64> %a, %b 77 ret <4 x i64> %shift 78} 79 80define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 81; AVX1-LABEL: var_shift_v8i32: 82; AVX1: # BB#0: 83; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 84; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 85; AVX1-NEXT: vpsrldq {{.*#+}} xmm4 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 86; AVX1-NEXT: vpsrad %xmm4, %xmm2, %xmm4 87; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm5 88; AVX1-NEXT: vpsrad %xmm5, %xmm2, %xmm5 89; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7] 90; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 91; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm6 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] 92; AVX1-NEXT: vpsrad %xmm6, %xmm2, %xmm6 93; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero 94; AVX1-NEXT: vpsrad %xmm3, %xmm2, %xmm2 95; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7] 96; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] 97; AVX1-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 98; AVX1-NEXT: vpsrad %xmm3, %xmm0, %xmm3 99; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm4 100; AVX1-NEXT: vpsrad %xmm4, %xmm0, %xmm4 101; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7] 102; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm4 = xmm1[2],xmm5[2],xmm1[3],xmm5[3] 103; AVX1-NEXT: vpsrad %xmm4, %xmm0, %xmm4 104; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 105; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 106; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7] 107; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7] 108; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 109; AVX1-NEXT: retq 110; 111; AVX2-LABEL: var_shift_v8i32: 112; AVX2: # BB#0: 113; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0 114; AVX2-NEXT: retq 115; 116; XOPAVX1-LABEL: var_shift_v8i32: 117; XOPAVX1: # BB#0: 118; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 119; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 120; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2 121; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 122; XOPAVX1-NEXT: vpshad %xmm2, %xmm4, %xmm2 123; XOPAVX1-NEXT: vpsubd %xmm1, %xmm3, %xmm1 124; XOPAVX1-NEXT: vpshad %xmm1, %xmm0, %xmm0 125; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 126; XOPAVX1-NEXT: retq 127; 128; XOPAVX2-LABEL: var_shift_v8i32: 129; XOPAVX2: # BB#0: 130; XOPAVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0 131; XOPAVX2-NEXT: retq 132; 133; AVX512-LABEL: var_shift_v8i32: 134; AVX512: ## BB#0: 135; AVX512-NEXT: vpsravd %ymm1, %ymm0, %ymm0 136; AVX512-NEXT: retq 137 %shift = ashr <8 x i32> %a, %b 138 ret <8 x i32> %shift 139} 140 141define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 142; AVX1-LABEL: var_shift_v16i16: 143; AVX1: # BB#0: 144; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 145; AVX1-NEXT: vpsllw $12, %xmm2, %xmm3 146; AVX1-NEXT: vpsllw $4, %xmm2, %xmm2 147; AVX1-NEXT: vpor %xmm3, %xmm2, %xmm2 148; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm3 149; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 150; AVX1-NEXT: vpsraw $8, %xmm4, %xmm5 151; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2 152; AVX1-NEXT: vpsraw $4, %xmm2, %xmm4 153; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 154; AVX1-NEXT: vpsraw $2, %xmm2, %xmm4 155; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 156; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 157; AVX1-NEXT: vpsraw $1, %xmm2, %xmm4 158; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 159; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 160; AVX1-NEXT: vpsllw $12, %xmm1, %xmm3 161; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1 162; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1 163; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm3 164; AVX1-NEXT: vpsraw $8, %xmm0, %xmm4 165; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 166; AVX1-NEXT: vpsraw $4, %xmm0, %xmm1 167; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 168; AVX1-NEXT: vpsraw $2, %xmm0, %xmm1 169; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 170; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 171; AVX1-NEXT: vpsraw $1, %xmm0, %xmm1 172; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 173; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 174; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 175; AVX1-NEXT: retq 176; 177; AVX2-LABEL: var_shift_v16i16: 178; AVX2: # BB#0: 179; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2 180; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 181; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] 182; AVX2-NEXT: vpsravd %ymm3, %ymm4, %ymm3 183; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3 184; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 185; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] 186; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0 187; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 188; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 189; AVX2-NEXT: retq 190; 191; XOPAVX1-LABEL: var_shift_v16i16: 192; XOPAVX1: # BB#0: 193; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 194; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 195; XOPAVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2 196; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 197; XOPAVX1-NEXT: vpshaw %xmm2, %xmm4, %xmm2 198; XOPAVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1 199; XOPAVX1-NEXT: vpshaw %xmm1, %xmm0, %xmm0 200; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 201; XOPAVX1-NEXT: retq 202; 203; XOPAVX2-LABEL: var_shift_v16i16: 204; XOPAVX2: # BB#0: 205; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 206; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 207; XOPAVX2-NEXT: vpsubw %xmm2, %xmm3, %xmm2 208; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 209; XOPAVX2-NEXT: vpshaw %xmm2, %xmm4, %xmm2 210; XOPAVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 211; XOPAVX2-NEXT: vpshaw %xmm1, %xmm0, %xmm0 212; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 213; XOPAVX2-NEXT: retq 214; 215; AVX512-LABEL: var_shift_v16i16: 216; AVX512: ## BB#0: 217; AVX512-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 218; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 219; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0 220; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 221; AVX512-NEXT: retq 222 %shift = ashr <16 x i16> %a, %b 223 ret <16 x i16> %shift 224} 225 226define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 227; AVX1-LABEL: var_shift_v32i8: 228; AVX1: # BB#0: 229; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 230; AVX1-NEXT: vpsllw $5, %xmm2, %xmm2 231; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] 232; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 233; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8],xmm4[8],xmm0[9],xmm4[9],xmm0[10],xmm4[10],xmm0[11],xmm4[11],xmm0[12],xmm4[12],xmm0[13],xmm4[13],xmm0[14],xmm4[14],xmm0[15],xmm4[15] 234; AVX1-NEXT: vpsraw $4, %xmm5, %xmm6 235; AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm5 236; AVX1-NEXT: vpsraw $2, %xmm5, %xmm6 237; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 238; AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm5 239; AVX1-NEXT: vpsraw $1, %xmm5, %xmm6 240; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 241; AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm3 242; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 243; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 244; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 245; AVX1-NEXT: vpsraw $4, %xmm4, %xmm5 246; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4 247; AVX1-NEXT: vpsraw $2, %xmm4, %xmm5 248; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 249; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4 250; AVX1-NEXT: vpsraw $1, %xmm4, %xmm5 251; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 252; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2 253; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 254; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 255; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 256; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 257; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 258; AVX1-NEXT: vpsraw $4, %xmm4, %xmm5 259; AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm4 260; AVX1-NEXT: vpsraw $2, %xmm4, %xmm5 261; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 262; AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm4 263; AVX1-NEXT: vpsraw $1, %xmm4, %xmm5 264; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 265; AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3 266; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 267; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 268; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 269; AVX1-NEXT: vpsraw $4, %xmm0, %xmm4 270; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 271; AVX1-NEXT: vpsraw $2, %xmm0, %xmm4 272; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm1 273; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 274; AVX1-NEXT: vpsraw $1, %xmm0, %xmm4 275; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm1 276; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 277; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 278; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 279; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 280; AVX1-NEXT: retq 281; 282; AVX2-LABEL: var_shift_v32i8: 283; AVX2: # BB#0: 284; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 285; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 286; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 287; AVX2-NEXT: vpsraw $4, %ymm3, %ymm4 288; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 289; AVX2-NEXT: vpsraw $2, %ymm3, %ymm4 290; AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 291; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 292; AVX2-NEXT: vpsraw $1, %ymm3, %ymm4 293; AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 294; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 295; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 296; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 297; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 298; AVX2-NEXT: vpsraw $4, %ymm0, %ymm3 299; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 300; AVX2-NEXT: vpsraw $2, %ymm0, %ymm3 301; AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1 302; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 303; AVX2-NEXT: vpsraw $1, %ymm0, %ymm3 304; AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1 305; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 306; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 307; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 308; AVX2-NEXT: retq 309; 310; XOPAVX1-LABEL: var_shift_v32i8: 311; XOPAVX1: # BB#0: 312; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 313; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 314; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm2 315; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 316; XOPAVX1-NEXT: vpshab %xmm2, %xmm4, %xmm2 317; XOPAVX1-NEXT: vpsubb %xmm1, %xmm3, %xmm1 318; XOPAVX1-NEXT: vpshab %xmm1, %xmm0, %xmm0 319; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 320; XOPAVX1-NEXT: retq 321; 322; XOPAVX2-LABEL: var_shift_v32i8: 323; XOPAVX2: # BB#0: 324; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 325; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 326; XOPAVX2-NEXT: vpsubb %xmm2, %xmm3, %xmm2 327; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 328; XOPAVX2-NEXT: vpshab %xmm2, %xmm4, %xmm2 329; XOPAVX2-NEXT: vpsubb %xmm1, %xmm3, %xmm1 330; XOPAVX2-NEXT: vpshab %xmm1, %xmm0, %xmm0 331; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 332; XOPAVX2-NEXT: retq 333; 334; AVX512-LABEL: var_shift_v32i8: 335; AVX512: ## BB#0: 336; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1 337; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 338; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 339; AVX512-NEXT: vpsraw $4, %ymm3, %ymm4 340; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 341; AVX512-NEXT: vpsraw $2, %ymm3, %ymm4 342; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2 343; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 344; AVX512-NEXT: vpsraw $1, %ymm3, %ymm4 345; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2 346; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 347; AVX512-NEXT: vpsrlw $8, %ymm2, %ymm2 348; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 349; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 350; AVX512-NEXT: vpsraw $4, %ymm0, %ymm3 351; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 352; AVX512-NEXT: vpsraw $2, %ymm0, %ymm3 353; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1 354; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 355; AVX512-NEXT: vpsraw $1, %ymm0, %ymm3 356; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1 357; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 358; AVX512-NEXT: vpsrlw $8, %ymm0, %ymm0 359; AVX512-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 360; AVX512-NEXT: retq 361 %shift = ashr <32 x i8> %a, %b 362 ret <32 x i8> %shift 363} 364 365; 366; Uniform Variable Shifts 367; 368 369define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 370; AVX1-LABEL: splatvar_shift_v4i64: 371; AVX1: # BB#0: 372; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 373; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 374; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 375; AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3 376; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm3 377; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm3 378; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 379; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 380; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 381; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 382; AVX1-NEXT: retq 383; 384; AVX2-LABEL: splatvar_shift_v4i64: 385; AVX2: # BB#0: 386; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 387; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2 388; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 389; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 390; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0 391; AVX2-NEXT: retq 392; 393; XOPAVX1-LABEL: splatvar_shift_v4i64: 394; XOPAVX1: # BB#0: 395; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 396; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 397; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1 398; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 399; XOPAVX1-NEXT: vpshaq %xmm1, %xmm2, %xmm2 400; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0 401; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 402; XOPAVX1-NEXT: retq 403; 404; XOPAVX2-LABEL: splatvar_shift_v4i64: 405; XOPAVX2: # BB#0: 406; XOPAVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 407; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2 408; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 409; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 410; XOPAVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0 411; XOPAVX2-NEXT: retq 412; 413; AVX512-LABEL: splatvar_shift_v4i64: 414; AVX512: ## BB#0: 415; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 416; AVX512-NEXT: vpsrlq %xmm1, %ymm2, %ymm2 417; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 418; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm0 419; AVX512-NEXT: vpsubq %ymm2, %ymm0, %ymm0 420; AVX512-NEXT: retq 421 %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer 422 %shift = ashr <4 x i64> %a, %splat 423 ret <4 x i64> %shift 424} 425 426define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 427; AVX1-LABEL: splatvar_shift_v8i32: 428; AVX1: # BB#0: 429; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 430; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] 431; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 432; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2 433; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 434; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 435; AVX1-NEXT: retq 436; 437; AVX2-LABEL: splatvar_shift_v8i32: 438; AVX2: # BB#0: 439; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 440; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] 441; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0 442; AVX2-NEXT: retq 443; 444; XOPAVX1-LABEL: splatvar_shift_v8i32: 445; XOPAVX1: # BB#0: 446; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 447; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] 448; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 449; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2 450; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 451; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 452; XOPAVX1-NEXT: retq 453; 454; XOPAVX2-LABEL: splatvar_shift_v8i32: 455; XOPAVX2: # BB#0: 456; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 457; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] 458; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0 459; XOPAVX2-NEXT: retq 460; 461; AVX512-LABEL: splatvar_shift_v8i32: 462; AVX512: ## BB#0: 463; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 464; AVX512-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] 465; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0 466; AVX512-NEXT: retq 467 %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer 468 %shift = ashr <8 x i32> %a, %splat 469 ret <8 x i32> %shift 470} 471 472define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 473; AVX1-LABEL: splatvar_shift_v16i16: 474; AVX1: # BB#0: 475; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 476; AVX1-NEXT: vmovd %xmm1, %eax 477; AVX1-NEXT: movzwl %ax, %eax 478; AVX1-NEXT: vmovd %eax, %xmm1 479; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2 480; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 481; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 482; AVX1-NEXT: retq 483; 484; AVX2-LABEL: splatvar_shift_v16i16: 485; AVX2: # BB#0: 486; AVX2-NEXT: vmovd %xmm1, %eax 487; AVX2-NEXT: movzwl %ax, %eax 488; AVX2-NEXT: vmovd %eax, %xmm1 489; AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0 490; AVX2-NEXT: retq 491; 492; XOPAVX1-LABEL: splatvar_shift_v16i16: 493; XOPAVX1: # BB#0: 494; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 495; XOPAVX1-NEXT: vmovd %xmm1, %eax 496; XOPAVX1-NEXT: movzwl %ax, %eax 497; XOPAVX1-NEXT: vmovd %eax, %xmm1 498; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2 499; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 500; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 501; XOPAVX1-NEXT: retq 502; 503; XOPAVX2-LABEL: splatvar_shift_v16i16: 504; XOPAVX2: # BB#0: 505; XOPAVX2-NEXT: vmovd %xmm1, %eax 506; XOPAVX2-NEXT: movzwl %ax, %eax 507; XOPAVX2-NEXT: vmovd %eax, %xmm1 508; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0 509; XOPAVX2-NEXT: retq 510; 511; AVX512-LABEL: splatvar_shift_v16i16: 512; AVX512: ## BB#0: 513; AVX512-NEXT: vmovd %xmm1, %eax 514; AVX512-NEXT: movzwl %ax, %eax 515; AVX512-NEXT: vmovd %eax, %xmm1 516; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0 517; AVX512-NEXT: retq 518 %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer 519 %shift = ashr <16 x i16> %a, %splat 520 ret <16 x i16> %shift 521} 522 523define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 524; AVX1-LABEL: splatvar_shift_v32i8: 525; AVX1: # BB#0: 526; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 527; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 528; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 529; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 530; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 531; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm0[8],xmm3[8],xmm0[9],xmm3[9],xmm0[10],xmm3[10],xmm0[11],xmm3[11],xmm0[12],xmm3[12],xmm0[13],xmm3[13],xmm0[14],xmm3[14],xmm0[15],xmm3[15] 532; AVX1-NEXT: vpsraw $4, %xmm4, %xmm5 533; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4 534; AVX1-NEXT: vpsraw $2, %xmm4, %xmm5 535; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm6 536; AVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4 537; AVX1-NEXT: vpsraw $1, %xmm4, %xmm5 538; AVX1-NEXT: vpaddw %xmm6, %xmm6, %xmm9 539; AVX1-NEXT: vpblendvb %xmm9, %xmm5, %xmm4, %xmm4 540; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm8 541; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 542; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 543; AVX1-NEXT: vpsraw $4, %xmm3, %xmm5 544; AVX1-NEXT: vpblendvb %xmm1, %xmm5, %xmm3, %xmm3 545; AVX1-NEXT: vpsraw $2, %xmm3, %xmm5 546; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm4 547; AVX1-NEXT: vpblendvb %xmm4, %xmm5, %xmm3, %xmm3 548; AVX1-NEXT: vpsraw $1, %xmm3, %xmm5 549; AVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm7 550; AVX1-NEXT: vpblendvb %xmm7, %xmm5, %xmm3, %xmm3 551; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 552; AVX1-NEXT: vpackuswb %xmm8, %xmm3, %xmm8 553; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 554; AVX1-NEXT: vpsraw $4, %xmm5, %xmm3 555; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm5, %xmm2 556; AVX1-NEXT: vpsraw $2, %xmm2, %xmm3 557; AVX1-NEXT: vpblendvb %xmm6, %xmm3, %xmm2, %xmm2 558; AVX1-NEXT: vpsraw $1, %xmm2, %xmm3 559; AVX1-NEXT: vpblendvb %xmm9, %xmm3, %xmm2, %xmm2 560; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 561; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 562; AVX1-NEXT: vpsraw $4, %xmm0, %xmm3 563; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 564; AVX1-NEXT: vpsraw $2, %xmm0, %xmm1 565; AVX1-NEXT: vpblendvb %xmm4, %xmm1, %xmm0, %xmm0 566; AVX1-NEXT: vpsraw $1, %xmm0, %xmm1 567; AVX1-NEXT: vpblendvb %xmm7, %xmm1, %xmm0, %xmm0 568; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 569; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 570; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm0, %ymm0 571; AVX1-NEXT: retq 572; 573; AVX2-LABEL: splatvar_shift_v32i8: 574; AVX2: # BB#0: 575; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 576; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 577; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 578; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 579; AVX2-NEXT: vpsraw $4, %ymm3, %ymm4 580; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 581; AVX2-NEXT: vpsraw $2, %ymm3, %ymm4 582; AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 583; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 584; AVX2-NEXT: vpsraw $1, %ymm3, %ymm4 585; AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 586; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 587; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 588; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 589; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 590; AVX2-NEXT: vpsraw $4, %ymm0, %ymm3 591; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 592; AVX2-NEXT: vpsraw $2, %ymm0, %ymm3 593; AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1 594; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 595; AVX2-NEXT: vpsraw $1, %ymm0, %ymm3 596; AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1 597; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 598; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 599; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 600; AVX2-NEXT: retq 601; 602; XOPAVX1-LABEL: splatvar_shift_v32i8: 603; XOPAVX1: # BB#0: 604; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 605; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 606; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1 607; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 608; XOPAVX1-NEXT: vpshab %xmm1, %xmm2, %xmm2 609; XOPAVX1-NEXT: vpshab %xmm1, %xmm0, %xmm0 610; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 611; XOPAVX1-NEXT: retq 612; 613; XOPAVX2-LABEL: splatvar_shift_v32i8: 614; XOPAVX2: # BB#0: 615; XOPAVX2-NEXT: vpbroadcastb %xmm1, %ymm1 616; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 617; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 618; XOPAVX2-NEXT: vpsubb %xmm2, %xmm3, %xmm2 619; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 620; XOPAVX2-NEXT: vpshab %xmm2, %xmm4, %xmm2 621; XOPAVX2-NEXT: vpsubb %xmm1, %xmm3, %xmm1 622; XOPAVX2-NEXT: vpshab %xmm1, %xmm0, %xmm0 623; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 624; XOPAVX2-NEXT: retq 625; 626; AVX512-LABEL: splatvar_shift_v32i8: 627; AVX512: ## BB#0: 628; AVX512-NEXT: vpbroadcastb %xmm1, %ymm1 629; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1 630; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 631; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 632; AVX512-NEXT: vpsraw $4, %ymm3, %ymm4 633; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 634; AVX512-NEXT: vpsraw $2, %ymm3, %ymm4 635; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2 636; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 637; AVX512-NEXT: vpsraw $1, %ymm3, %ymm4 638; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2 639; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 640; AVX512-NEXT: vpsrlw $8, %ymm2, %ymm2 641; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 642; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 643; AVX512-NEXT: vpsraw $4, %ymm0, %ymm3 644; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 645; AVX512-NEXT: vpsraw $2, %ymm0, %ymm3 646; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1 647; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 648; AVX512-NEXT: vpsraw $1, %ymm0, %ymm3 649; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1 650; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 651; AVX512-NEXT: vpsrlw $8, %ymm0, %ymm0 652; AVX512-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 653; AVX512-NEXT: retq 654 %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer 655 %shift = ashr <32 x i8> %a, %splat 656 ret <32 x i8> %shift 657} 658 659; 660; Constant Shifts 661; 662 663define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind { 664; AVX1-LABEL: constant_shift_v4i64: 665; AVX1: # BB#0: 666; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 667; AVX1-NEXT: vpsrlq $62, %xmm1, %xmm2 668; AVX1-NEXT: vpsrlq $31, %xmm1, %xmm1 669; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 670; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4294967296,2] 671; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 672; AVX1-NEXT: vpsubq %xmm2, %xmm1, %xmm1 673; AVX1-NEXT: vpsrlq $7, %xmm0, %xmm2 674; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 675; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 676; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4611686018427387904,72057594037927936] 677; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 678; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 679; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 680; AVX1-NEXT: retq 681; 682; AVX2-LABEL: constant_shift_v4i64: 683; AVX2: # BB#0: 684; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 685; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2] 686; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 687; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 688; AVX2-NEXT: retq 689; 690; XOPAVX1-LABEL: constant_shift_v4i64: 691; XOPAVX1: # BB#0: 692; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 693; XOPAVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm2 694; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 695; XOPAVX1-NEXT: vpshaq %xmm2, %xmm3, %xmm2 696; XOPAVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1 697; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0 698; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 699; XOPAVX1-NEXT: retq 700; 701; XOPAVX2-LABEL: constant_shift_v4i64: 702; XOPAVX2: # BB#0: 703; XOPAVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 704; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2] 705; XOPAVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 706; XOPAVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 707; XOPAVX2-NEXT: retq 708; 709; AVX512-LABEL: constant_shift_v4i64: 710; AVX512: ## BB#0: 711; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 712; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2] 713; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 714; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm0 715; AVX512-NEXT: retq 716 %shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62> 717 ret <4 x i64> %shift 718} 719 720define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind { 721; AVX1-LABEL: constant_shift_v8i32: 722; AVX1: # BB#0: 723; AVX1-NEXT: vpsrad $7, %xmm0, %xmm1 724; AVX1-NEXT: vpsrad $5, %xmm0, %xmm2 725; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 726; AVX1-NEXT: vpsrad $6, %xmm0, %xmm2 727; AVX1-NEXT: vpsrad $4, %xmm0, %xmm3 728; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 729; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 730; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 731; AVX1-NEXT: vpsrad $7, %xmm0, %xmm2 732; AVX1-NEXT: vpsrad $9, %xmm0, %xmm3 733; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 734; AVX1-NEXT: vpsrad $8, %xmm0, %xmm0 735; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 736; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 737; AVX1-NEXT: retq 738; 739; AVX2-LABEL: constant_shift_v8i32: 740; AVX2: # BB#0: 741; AVX2-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 742; AVX2-NEXT: retq 743; 744; XOPAVX1-LABEL: constant_shift_v8i32: 745; XOPAVX1: # BB#0: 746; XOPAVX1-NEXT: vpshad {{.*}}(%rip), %xmm0, %xmm1 747; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 748; XOPAVX1-NEXT: vpshad {{.*}}(%rip), %xmm0, %xmm0 749; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 750; XOPAVX1-NEXT: retq 751; 752; XOPAVX2-LABEL: constant_shift_v8i32: 753; XOPAVX2: # BB#0: 754; XOPAVX2-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 755; XOPAVX2-NEXT: retq 756; 757; AVX512-LABEL: constant_shift_v8i32: 758; AVX512: ## BB#0: 759; AVX512-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 760; AVX512-NEXT: retq 761 %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7> 762 ret <8 x i32> %shift 763} 764 765define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { 766; AVX1-LABEL: constant_shift_v16i16: 767; AVX1: # BB#0: 768; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 769; AVX1-NEXT: vpsraw $8, %xmm1, %xmm1 770; AVX1-NEXT: vpsraw $4, %xmm1, %xmm2 771; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 772; AVX1-NEXT: vpsraw $2, %xmm1, %xmm2 773; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 774; AVX1-NEXT: vpsraw $1, %xmm1, %xmm2 775; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 776; AVX1-NEXT: vpsraw $4, %xmm0, %xmm2 777; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 778; AVX1-NEXT: vpsraw $2, %xmm0, %xmm2 779; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 780; AVX1-NEXT: vpsraw $1, %xmm0, %xmm2 781; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 782; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 783; AVX1-NEXT: retq 784; 785; AVX2-LABEL: constant_shift_v16i16: 786; AVX2: # BB#0: 787; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 788; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 789; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15] 790; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15] 791; AVX2-NEXT: vpsravd %ymm3, %ymm4, %ymm3 792; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3 793; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11] 794; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11] 795; AVX2-NEXT: vpsravd %ymm2, %ymm0, %ymm0 796; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 797; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 798; AVX2-NEXT: retq 799; 800; XOPAVX1-LABEL: constant_shift_v16i16: 801; XOPAVX1: # BB#0: 802; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 803; XOPAVX1-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm2 804; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 805; XOPAVX1-NEXT: vpshaw %xmm2, %xmm3, %xmm2 806; XOPAVX1-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm1 807; XOPAVX1-NEXT: vpshaw %xmm1, %xmm0, %xmm0 808; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 809; XOPAVX1-NEXT: retq 810; 811; XOPAVX2-LABEL: constant_shift_v16i16: 812; XOPAVX2: # BB#0: 813; XOPAVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 814; XOPAVX2-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm2 815; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 816; XOPAVX2-NEXT: vpshaw %xmm2, %xmm3, %xmm2 817; XOPAVX2-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm1 818; XOPAVX2-NEXT: vpshaw %xmm1, %xmm0, %xmm0 819; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 820; XOPAVX2-NEXT: retq 821; 822; AVX512-LABEL: constant_shift_v16i16: 823; AVX512: ## BB#0: 824; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 825; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 826; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0 827; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 828; AVX512-NEXT: retq 829 %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 830 ret <16 x i16> %shift 831} 832 833define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { 834; AVX1-LABEL: constant_shift_v32i8: 835; AVX1: # BB#0: 836; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] 837; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 838; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 839; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 840; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm0[8],xmm3[8],xmm0[9],xmm3[9],xmm0[10],xmm3[10],xmm0[11],xmm3[11],xmm0[12],xmm3[12],xmm0[13],xmm3[13],xmm0[14],xmm3[14],xmm0[15],xmm3[15] 841; AVX1-NEXT: vpsraw $4, %xmm4, %xmm5 842; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4 843; AVX1-NEXT: vpsraw $2, %xmm4, %xmm5 844; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm6 845; AVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4 846; AVX1-NEXT: vpsraw $1, %xmm4, %xmm5 847; AVX1-NEXT: vpaddw %xmm6, %xmm6, %xmm9 848; AVX1-NEXT: vpblendvb %xmm9, %xmm5, %xmm4, %xmm4 849; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm8 850; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 851; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 852; AVX1-NEXT: vpsraw $4, %xmm3, %xmm5 853; AVX1-NEXT: vpblendvb %xmm1, %xmm5, %xmm3, %xmm3 854; AVX1-NEXT: vpsraw $2, %xmm3, %xmm5 855; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm4 856; AVX1-NEXT: vpblendvb %xmm4, %xmm5, %xmm3, %xmm3 857; AVX1-NEXT: vpsraw $1, %xmm3, %xmm5 858; AVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm7 859; AVX1-NEXT: vpblendvb %xmm7, %xmm5, %xmm3, %xmm3 860; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 861; AVX1-NEXT: vpackuswb %xmm8, %xmm3, %xmm8 862; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 863; AVX1-NEXT: vpsraw $4, %xmm5, %xmm3 864; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm5, %xmm2 865; AVX1-NEXT: vpsraw $2, %xmm2, %xmm3 866; AVX1-NEXT: vpblendvb %xmm6, %xmm3, %xmm2, %xmm2 867; AVX1-NEXT: vpsraw $1, %xmm2, %xmm3 868; AVX1-NEXT: vpblendvb %xmm9, %xmm3, %xmm2, %xmm2 869; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 870; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 871; AVX1-NEXT: vpsraw $4, %xmm0, %xmm3 872; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 873; AVX1-NEXT: vpsraw $2, %xmm0, %xmm1 874; AVX1-NEXT: vpblendvb %xmm4, %xmm1, %xmm0, %xmm0 875; AVX1-NEXT: vpsraw $1, %xmm0, %xmm1 876; AVX1-NEXT: vpblendvb %xmm7, %xmm1, %xmm0, %xmm0 877; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 878; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 879; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm0, %ymm0 880; AVX1-NEXT: retq 881; 882; AVX2-LABEL: constant_shift_v32i8: 883; AVX2: # BB#0: 884; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] 885; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 886; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 887; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 888; AVX2-NEXT: vpsraw $4, %ymm3, %ymm4 889; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 890; AVX2-NEXT: vpsraw $2, %ymm3, %ymm4 891; AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 892; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 893; AVX2-NEXT: vpsraw $1, %ymm3, %ymm4 894; AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 895; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 896; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 897; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 898; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 899; AVX2-NEXT: vpsraw $4, %ymm0, %ymm3 900; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 901; AVX2-NEXT: vpsraw $2, %ymm0, %ymm3 902; AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1 903; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 904; AVX2-NEXT: vpsraw $1, %ymm0, %ymm3 905; AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1 906; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 907; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 908; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 909; AVX2-NEXT: retq 910; 911; XOPAVX1-LABEL: constant_shift_v32i8: 912; XOPAVX1: # BB#0: 913; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 914; XOPAVX1-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1 915; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 916; XOPAVX1-NEXT: vpshab %xmm1, %xmm2, %xmm2 917; XOPAVX1-NEXT: vpshab %xmm1, %xmm0, %xmm0 918; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 919; XOPAVX1-NEXT: retq 920; 921; XOPAVX2-LABEL: constant_shift_v32i8: 922; XOPAVX2: # BB#0: 923; XOPAVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 924; XOPAVX2-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1 925; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 926; XOPAVX2-NEXT: vpshab %xmm1, %xmm2, %xmm2 927; XOPAVX2-NEXT: vpshab %xmm1, %xmm0, %xmm0 928; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 929; XOPAVX2-NEXT: retq 930; 931; AVX512-LABEL: constant_shift_v32i8: 932; AVX512: ## BB#0: 933; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] 934; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1 935; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 936; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 937; AVX512-NEXT: vpsraw $4, %ymm3, %ymm4 938; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 939; AVX512-NEXT: vpsraw $2, %ymm3, %ymm4 940; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2 941; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 942; AVX512-NEXT: vpsraw $1, %ymm3, %ymm4 943; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2 944; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 945; AVX512-NEXT: vpsrlw $8, %ymm2, %ymm2 946; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 947; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 948; AVX512-NEXT: vpsraw $4, %ymm0, %ymm3 949; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 950; AVX512-NEXT: vpsraw $2, %ymm0, %ymm3 951; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1 952; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 953; AVX512-NEXT: vpsraw $1, %ymm0, %ymm3 954; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1 955; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 956; AVX512-NEXT: vpsrlw $8, %ymm0, %ymm0 957; AVX512-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 958; AVX512-NEXT: retq 959 %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 960 ret <32 x i8> %shift 961} 962 963; 964; Uniform Constant Shifts 965; 966 967define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind { 968; AVX1-LABEL: splatconstant_shift_v4i64: 969; AVX1: # BB#0: 970; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 971; AVX1-NEXT: vpsrad $7, %xmm1, %xmm2 972; AVX1-NEXT: vpsrlq $7, %xmm1, %xmm1 973; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 974; AVX1-NEXT: vpsrad $7, %xmm0, %xmm2 975; AVX1-NEXT: vpsrlq $7, %xmm0, %xmm0 976; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 977; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 978; AVX1-NEXT: retq 979; 980; AVX2-LABEL: splatconstant_shift_v4i64: 981; AVX2: # BB#0: 982; AVX2-NEXT: vpsrad $7, %ymm0, %ymm1 983; AVX2-NEXT: vpsrlq $7, %ymm0, %ymm0 984; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 985; AVX2-NEXT: retq 986; 987; XOPAVX1-LABEL: splatconstant_shift_v4i64: 988; XOPAVX1: # BB#0: 989; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 990; XOPAVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1 991; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 992; XOPAVX1-NEXT: vpshaq %xmm1, %xmm2, %xmm2 993; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0 994; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 995; XOPAVX1-NEXT: retq 996; 997; XOPAVX2-LABEL: splatconstant_shift_v4i64: 998; XOPAVX2: # BB#0: 999; XOPAVX2-NEXT: vpsrlq $7, %ymm0, %ymm0 1000; XOPAVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1 1001; XOPAVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 1002; XOPAVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 1003; XOPAVX2-NEXT: retq 1004; 1005; AVX512-LABEL: splatconstant_shift_v4i64: 1006; AVX512: ## BB#0: 1007; AVX512-NEXT: vpsrad $7, %ymm0, %ymm1 1008; AVX512-NEXT: vpsrlq $7, %ymm0, %ymm0 1009; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1010; AVX512-NEXT: retq 1011 %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7> 1012 ret <4 x i64> %shift 1013} 1014 1015define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind { 1016; AVX1-LABEL: splatconstant_shift_v8i32: 1017; AVX1: # BB#0: 1018; AVX1-NEXT: vpsrad $5, %xmm0, %xmm1 1019; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1020; AVX1-NEXT: vpsrad $5, %xmm0, %xmm0 1021; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1022; AVX1-NEXT: retq 1023; 1024; AVX2-LABEL: splatconstant_shift_v8i32: 1025; AVX2: # BB#0: 1026; AVX2-NEXT: vpsrad $5, %ymm0, %ymm0 1027; AVX2-NEXT: retq 1028; 1029; XOPAVX1-LABEL: splatconstant_shift_v8i32: 1030; XOPAVX1: # BB#0: 1031; XOPAVX1-NEXT: vpsrad $5, %xmm0, %xmm1 1032; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1033; XOPAVX1-NEXT: vpsrad $5, %xmm0, %xmm0 1034; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1035; XOPAVX1-NEXT: retq 1036; 1037; XOPAVX2-LABEL: splatconstant_shift_v8i32: 1038; XOPAVX2: # BB#0: 1039; XOPAVX2-NEXT: vpsrad $5, %ymm0, %ymm0 1040; XOPAVX2-NEXT: retq 1041; 1042; AVX512-LABEL: splatconstant_shift_v8i32: 1043; AVX512: ## BB#0: 1044; AVX512-NEXT: vpsrad $5, %ymm0, %ymm0 1045; AVX512-NEXT: retq 1046 %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 1047 ret <8 x i32> %shift 1048} 1049 1050define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind { 1051; AVX1-LABEL: splatconstant_shift_v16i16: 1052; AVX1: # BB#0: 1053; AVX1-NEXT: vpsraw $3, %xmm0, %xmm1 1054; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1055; AVX1-NEXT: vpsraw $3, %xmm0, %xmm0 1056; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1057; AVX1-NEXT: retq 1058; 1059; AVX2-LABEL: splatconstant_shift_v16i16: 1060; AVX2: # BB#0: 1061; AVX2-NEXT: vpsraw $3, %ymm0, %ymm0 1062; AVX2-NEXT: retq 1063; 1064; XOPAVX1-LABEL: splatconstant_shift_v16i16: 1065; XOPAVX1: # BB#0: 1066; XOPAVX1-NEXT: vpsraw $3, %xmm0, %xmm1 1067; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1068; XOPAVX1-NEXT: vpsraw $3, %xmm0, %xmm0 1069; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1070; XOPAVX1-NEXT: retq 1071; 1072; XOPAVX2-LABEL: splatconstant_shift_v16i16: 1073; XOPAVX2: # BB#0: 1074; XOPAVX2-NEXT: vpsraw $3, %ymm0, %ymm0 1075; XOPAVX2-NEXT: retq 1076; 1077; AVX512-LABEL: splatconstant_shift_v16i16: 1078; AVX512: ## BB#0: 1079; AVX512-NEXT: vpsraw $3, %ymm0, %ymm0 1080; AVX512-NEXT: retq 1081 %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 1082 ret <16 x i16> %shift 1083} 1084 1085define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind { 1086; AVX1-LABEL: splatconstant_shift_v32i8: 1087; AVX1: # BB#0: 1088; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1089; AVX1-NEXT: vpsrlw $3, %xmm1, %xmm1 1090; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] 1091; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 1092; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1093; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 1094; AVX1-NEXT: vpsubb %xmm3, %xmm1, %xmm1 1095; AVX1-NEXT: vpsrlw $3, %xmm0, %xmm0 1096; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1097; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 1098; AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0 1099; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1100; AVX1-NEXT: retq 1101; 1102; AVX2-LABEL: splatconstant_shift_v32i8: 1103; AVX2: # BB#0: 1104; AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0 1105; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1106; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1107; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 1108; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0 1109; AVX2-NEXT: retq 1110; 1111; XOPAVX1-LABEL: splatconstant_shift_v32i8: 1112; XOPAVX1: # BB#0: 1113; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1114; XOPAVX1-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1 1115; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1116; XOPAVX1-NEXT: vpshab %xmm1, %xmm2, %xmm2 1117; XOPAVX1-NEXT: vpshab %xmm1, %xmm0, %xmm0 1118; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1119; XOPAVX1-NEXT: retq 1120; 1121; XOPAVX2-LABEL: splatconstant_shift_v32i8: 1122; XOPAVX2: # BB#0: 1123; XOPAVX2-NEXT: vpsrlw $3, %ymm0, %ymm0 1124; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1125; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1126; XOPAVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 1127; XOPAVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0 1128; XOPAVX2-NEXT: retq 1129; 1130; AVX512-LABEL: splatconstant_shift_v32i8: 1131; AVX512: ## BB#0: 1132; AVX512-NEXT: vpsrlw $3, %ymm0, %ymm0 1133; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1134; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1135; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 1136; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm0 1137; AVX512-NEXT: retq 1138 %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 1139 ret <32 x i8> %shift 1140} 1141