1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST 7 8define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 9; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 10; AVX1: # %bb.0: 11; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 12; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 13; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 14; AVX1-NEXT: retq 15; 16; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 17; AVX2OR512VL: # %bb.0: 18; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %ymm0 19; AVX2OR512VL-NEXT: retq 20 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 21 ret <16 x i16> %shuffle 22} 23 24define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) { 25; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 26; AVX1: # %bb.0: 27; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] 28; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 29; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7] 30; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1] 31; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 32; AVX1-NEXT: retq 33; 34; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 35; AVX2-SLOW: # %bb.0: 36; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7] 37; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1] 38; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 39; AVX2-SLOW-NEXT: retq 40; 41; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 42; AVX2-FAST: # %bb.0: 43; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 44; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 45; AVX2-FAST-NEXT: retq 46; 47; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 48; AVX512VL-SLOW: # %bb.0: 49; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7] 50; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1] 51; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 52; AVX512VL-SLOW-NEXT: retq 53; 54; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 55; AVX512VL-FAST: # %bb.0: 56; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 57; AVX512VL-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 58; AVX512VL-FAST-NEXT: retq 59 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 60 ret <16 x i16> %shuffle 61} 62 63define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) { 64; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 65; AVX1: # %bb.0: 66; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] 67; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 68; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7] 69; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0] 70; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 71; AVX1-NEXT: retq 72; 73; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 74; AVX2-SLOW: # %bb.0: 75; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7] 76; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0] 77; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 78; AVX2-SLOW-NEXT: retq 79; 80; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 81; AVX2-FAST: # %bb.0: 82; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 83; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 84; AVX2-FAST-NEXT: retq 85; 86; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 87; AVX512VL-SLOW: # %bb.0: 88; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7] 89; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0] 90; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 91; AVX512VL-SLOW-NEXT: retq 92; 93; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 94; AVX512VL-FAST: # %bb.0: 95; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 96; AVX512VL-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 97; AVX512VL-FAST-NEXT: retq 98 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 99 ret <16 x i16> %shuffle 100} 101 102define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) { 103; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 104; AVX1: # %bb.0: 105; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] 106; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 107; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7] 108; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0] 109; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 110; AVX1-NEXT: retq 111; 112; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 113; AVX2-SLOW: # %bb.0: 114; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7] 115; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0] 116; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 117; AVX2-SLOW-NEXT: retq 118; 119; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 120; AVX2-FAST: # %bb.0: 121; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 122; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 123; AVX2-FAST-NEXT: retq 124; 125; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 126; AVX512VL-SLOW: # %bb.0: 127; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7] 128; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0] 129; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 130; AVX512VL-SLOW-NEXT: retq 131; 132; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 133; AVX512VL-FAST: # %bb.0: 134; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 135; AVX512VL-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 136; AVX512VL-FAST-NEXT: retq 137 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 138 ret <16 x i16> %shuffle 139} 140 141define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 142; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 143; AVX1: # %bb.0: 144; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] 145; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 146; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 147; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 148; AVX1-NEXT: retq 149; 150; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 151; AVX2OR512VL: # %bb.0: 152; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9] 153; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 154; AVX2OR512VL-NEXT: retq 155 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 156 ret <16 x i16> %shuffle 157} 158 159define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 160; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 161; AVX1: # %bb.0: 162; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] 163; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 164; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 165; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 166; AVX1-NEXT: retq 167; 168; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 169; AVX2OR512VL: # %bb.0: 170; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1] 171; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 172; AVX2OR512VL-NEXT: retq 173 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 174 ret <16 x i16> %shuffle 175} 176 177define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 178; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 179; AVX1: # %bb.0: 180; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] 181; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 182; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 183; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 184; AVX1-NEXT: retq 185; 186; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 187; AVX2OR512VL: # %bb.0: 188; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1] 189; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 190; AVX2OR512VL-NEXT: retq 191 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 192 ret <16 x i16> %shuffle 193} 194 195define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 196; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 197; AVX1: # %bb.0: 198; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] 199; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 200; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 201; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 202; AVX1-NEXT: retq 203; 204; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 205; AVX2OR512VL: # %bb.0: 206; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1] 207; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 208; AVX2OR512VL-NEXT: retq 209 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 210 ret <16 x i16> %shuffle 211} 212 213define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 214; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 215; AVX1: # %bb.0: 216; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 217; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 218; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,1,4,5,6,7] 219; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1] 220; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 221; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 222; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 223; AVX1-NEXT: retq 224; 225; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 226; AVX2-SLOW: # %bb.0: 227; AVX2-SLOW-NEXT: vpbroadcastw %xmm0, %xmm1 228; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 229; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,3,4,5,6,7,8,8,10,11,12,13,14,15] 230; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 231; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 232; AVX2-SLOW-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 233; AVX2-SLOW-NEXT: retq 234; 235; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 236; AVX2-FAST: # %bb.0: 237; AVX2-FAST-NEXT: vpbroadcastw %xmm0, %xmm1 238; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 239; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 240; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 241; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 242; AVX2-FAST-NEXT: retq 243; 244; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 245; AVX512VL: # %bb.0: 246; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 247; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 248; AVX512VL-NEXT: retq 249 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 250 ret <16 x i16> %shuffle 251} 252 253define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 254; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 255; AVX1: # %bb.0: 256; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 257; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 258; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,3,0,4,5,6,7] 259; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1] 260; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 261; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 262; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 263; AVX1-NEXT: retq 264; 265; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 266; AVX2: # %bb.0: 267; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 268; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u> 269; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 270; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 271; AVX2-NEXT: retq 272; 273; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 274; AVX512VL: # %bb.0: 275; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0] 276; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 277; AVX512VL-NEXT: retq 278 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 279 ret <16 x i16> %shuffle 280} 281 282define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 283; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 284; AVX1: # %bb.0: 285; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 286; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 287; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1] 288; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 289; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 290; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 291; AVX1-NEXT: retq 292; 293; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 294; AVX2-SLOW: # %bb.0: 295; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 296; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 297; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 298; AVX2-SLOW-NEXT: retq 299; 300; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 301; AVX2-FAST: # %bb.0: 302; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,6,7,0,1,2,3] 303; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 304; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 305; AVX2-FAST-NEXT: retq 306; 307; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 308; AVX512VL: # %bb.0: 309; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0] 310; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 311; AVX512VL-NEXT: retq 312 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 313 ret <16 x i16> %shuffle 314} 315 316define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 317; AVX1-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 318; AVX1: # %bb.0: 319; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 320; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 321; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1] 322; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 323; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 324; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 325; AVX1-NEXT: retq 326; 327; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 328; AVX2-SLOW: # %bb.0: 329; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 330; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 331; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 332; AVX2-SLOW-NEXT: retq 333; 334; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 335; AVX2-FAST: # %bb.0: 336; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,6,7,0,1,2,3] 337; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 338; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 339; AVX2-FAST-NEXT: retq 340; 341; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 342; AVX512VL: # %bb.0: 343; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0] 344; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 345; AVX512VL-NEXT: retq 346 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 347 ret <16 x i16> %shuffle 348} 349 350define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 351; AVX1-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 352; AVX1: # %bb.0: 353; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 354; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 355; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 356; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 357; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 358; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 359; AVX1-NEXT: retq 360; 361; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 362; AVX2: # %bb.0: 363; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] 364; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 365; AVX2-NEXT: retq 366; 367; AVX512VL-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 368; AVX512VL: # %bb.0: 369; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0] 370; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 371; AVX512VL-NEXT: retq 372 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 373 ret <16 x i16> %shuffle 374} 375 376define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 377; AVX1-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 378; AVX1: # %bb.0: 379; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 380; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 381; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 382; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 383; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 384; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 385; AVX1-NEXT: retq 386; 387; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 388; AVX2: # %bb.0: 389; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] 390; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 391; AVX2-NEXT: retq 392; 393; AVX512VL-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 394; AVX512VL: # %bb.0: 395; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0] 396; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 397; AVX512VL-NEXT: retq 398 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 399 ret <16 x i16> %shuffle 400} 401 402define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 403; AVX1-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 404; AVX1: # %bb.0: 405; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 406; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 407; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 408; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 409; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 410; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 411; AVX1-NEXT: retq 412; 413; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 414; AVX2: # %bb.0: 415; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] 416; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 417; AVX2-NEXT: retq 418; 419; AVX512VL-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 420; AVX512VL: # %bb.0: 421; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 422; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 423; AVX512VL-NEXT: retq 424 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 425 ret <16 x i16> %shuffle 426} 427 428define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 429; AVX1-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 430; AVX1: # %bb.0: 431; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 432; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 433; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 434; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 435; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 436; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 437; AVX1-NEXT: retq 438; 439; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 440; AVX2: # %bb.0: 441; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] 442; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 443; AVX2-NEXT: retq 444; 445; AVX512VL-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 446; AVX512VL: # %bb.0: 447; AVX512VL-NEXT: movl $15, %eax 448; AVX512VL-NEXT: vmovd %eax, %xmm1 449; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 450; AVX512VL-NEXT: retq 451 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 452 ret <16 x i16> %shuffle 453} 454 455define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 456; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 457; AVX1: # %bb.0: 458; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] 459; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 460; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 461; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 462; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 463; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 464; AVX1-NEXT: retq 465; 466; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 467; AVX2-SLOW: # %bb.0: 468; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,3,4,5,6,7,8,8,10,11,12,13,14,15] 469; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 470; AVX2-SLOW-NEXT: retq 471; 472; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 473; AVX2-FAST: # %bb.0: 474; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 475; AVX2-FAST-NEXT: retq 476; 477; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 478; AVX512VL-SLOW: # %bb.0: 479; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,3,4,5,6,7,8,8,10,11,12,13,14,15] 480; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 481; AVX512VL-SLOW-NEXT: retq 482; 483; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 484; AVX512VL-FAST: # %bb.0: 485; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 486; AVX512VL-FAST-NEXT: retq 487 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 488 ret <16 x i16> %shuffle 489} 490 491define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15(<16 x i16> %a, <16 x i16> %b) { 492; AVX1-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15: 493; AVX1: # %bb.0: 494; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,7,6,7] 495; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,2,2] 496; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 497; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,6,7] 498; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2] 499; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 500; AVX1-NEXT: retq 501; 502; AVX2-SLOW-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15: 503; AVX2-SLOW: # %bb.0: 504; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] 505; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6] 506; AVX2-SLOW-NEXT: retq 507; 508; AVX2-FAST-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15: 509; AVX2-FAST: # %bb.0: 510; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31] 511; AVX2-FAST-NEXT: retq 512; 513; AVX512VL-SLOW-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15: 514; AVX512VL-SLOW: # %bb.0: 515; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] 516; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6] 517; AVX512VL-SLOW-NEXT: retq 518; 519; AVX512VL-FAST-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15: 520; AVX512VL-FAST: # %bb.0: 521; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31] 522; AVX512VL-FAST-NEXT: retq 523 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 524 ret <16 x i16> %shuffle 525} 526 527define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 528; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 529; AVX1: # %bb.0: 530; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 531; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 532; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 533; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 534; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 535; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 536; AVX1-NEXT: retq 537; 538; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 539; AVX2-SLOW: # %bb.0: 540; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 541; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 542; AVX2-SLOW-NEXT: retq 543; 544; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 545; AVX2-FAST: # %bb.0: 546; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25] 547; AVX2-FAST-NEXT: retq 548; 549; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 550; AVX512VL-SLOW: # %bb.0: 551; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 552; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 553; AVX512VL-SLOW-NEXT: retq 554; 555; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 556; AVX512VL-FAST: # %bb.0: 557; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25] 558; AVX512VL-FAST-NEXT: retq 559 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 560 ret <16 x i16> %shuffle 561} 562 563define <16 x i16> @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15(<16 x i16> %a, <16 x i16> %b) { 564; AVX1-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15: 565; AVX1: # %bb.0: 566; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7] 567; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7] 568; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 569; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] 570; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7] 571; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 572; AVX1-NEXT: retq 573; 574; AVX2-SLOW-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15: 575; AVX2-SLOW: # %bb.0: 576; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15] 577; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15] 578; AVX2-SLOW-NEXT: retq 579; 580; AVX2-FAST-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15: 581; AVX2-FAST: # %bb.0: 582; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,6,7,6,7,6,7,14,15,14,15,14,15,14,15,22,23,22,23,22,23,22,23,30,31,30,31,30,31,30,31] 583; AVX2-FAST-NEXT: retq 584; 585; AVX512VL-SLOW-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15: 586; AVX512VL-SLOW: # %bb.0: 587; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15] 588; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15] 589; AVX512VL-SLOW-NEXT: retq 590; 591; AVX512VL-FAST-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15: 592; AVX512VL-FAST: # %bb.0: 593; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,6,7,6,7,6,7,14,15,14,15,14,15,14,15,22,23,22,23,22,23,22,23,30,31,30,31,30,31,30,31] 594; AVX512VL-FAST-NEXT: retq 595 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15> 596 ret <16 x i16> %shuffle 597} 598 599define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x i16> %a, <16 x i16> %b) { 600; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14: 601; AVX1: # %bb.0: 602; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7] 603; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6] 604; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 605; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 606; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6] 607; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 608; AVX1-NEXT: retq 609; 610; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14: 611; AVX2-SLOW: # %bb.0: 612; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15] 613; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14] 614; AVX2-SLOW-NEXT: retq 615; 616; AVX2-FAST-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14: 617; AVX2-FAST: # %bb.0: 618; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,16,17,16,17,20,21,20,21,24,25,24,25,28,29,28,29] 619; AVX2-FAST-NEXT: retq 620; 621; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14: 622; AVX512VL-SLOW: # %bb.0: 623; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15] 624; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14] 625; AVX512VL-SLOW-NEXT: retq 626; 627; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14: 628; AVX512VL-FAST: # %bb.0: 629; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,16,17,16,17,20,21,20,21,24,25,24,25,28,29,28,29] 630; AVX512VL-FAST-NEXT: retq 631 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> 632 ret <16 x i16> %shuffle 633} 634 635define <16 x i16> @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x i16> %a, <16 x i16> %b) { 636; AVX1-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15: 637; AVX1: # %bb.0: 638; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7] 639; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7] 640; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 641; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,3,3,4,5,6,7] 642; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7] 643; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 644; AVX1-NEXT: retq 645; 646; AVX2-SLOW-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15: 647; AVX2-SLOW: # %bb.0: 648; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15] 649; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15] 650; AVX2-SLOW-NEXT: retq 651; 652; AVX2-FAST-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15: 653; AVX2-FAST: # %bb.0: 654; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15,18,19,18,19,22,23,22,23,26,27,26,27,30,31,30,31] 655; AVX2-FAST-NEXT: retq 656; 657; AVX512VL-SLOW-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15: 658; AVX512VL-SLOW: # %bb.0: 659; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15] 660; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15] 661; AVX512VL-SLOW-NEXT: retq 662; 663; AVX512VL-FAST-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15: 664; AVX512VL-FAST: # %bb.0: 665; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15,18,19,18,19,22,23,22,23,26,27,26,27,30,31,30,31] 666; AVX512VL-FAST-NEXT: retq 667 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> 668 ret <16 x i16> %shuffle 669} 670 671define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) { 672; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00: 673; AVX1: # %bb.0: 674; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7] 675; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1] 676; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 677; AVX1-NEXT: retq 678; 679; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00: 680; AVX2-SLOW: # %bb.0: 681; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7] 682; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1] 683; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 684; AVX2-SLOW-NEXT: retq 685; 686; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00: 687; AVX2-FAST: # %bb.0: 688; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 689; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 690; AVX2-FAST-NEXT: retq 691; 692; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00: 693; AVX512VL-SLOW: # %bb.0: 694; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7] 695; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1] 696; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 697; AVX512VL-SLOW-NEXT: retq 698; 699; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00: 700; AVX512VL-FAST: # %bb.0: 701; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 702; AVX512VL-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 703; AVX512VL-FAST-NEXT: retq 704 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 705 ret <16 x i16> %shuffle 706} 707 708define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) { 709; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00: 710; AVX1: # %bb.0: 711; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7] 712; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0] 713; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 714; AVX1-NEXT: retq 715; 716; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00: 717; AVX2-SLOW: # %bb.0: 718; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7] 719; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0] 720; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 721; AVX2-SLOW-NEXT: retq 722; 723; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00: 724; AVX2-FAST: # %bb.0: 725; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 726; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 727; AVX2-FAST-NEXT: retq 728; 729; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00: 730; AVX512VL-SLOW: # %bb.0: 731; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7] 732; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0] 733; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 734; AVX512VL-SLOW-NEXT: retq 735; 736; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00: 737; AVX512VL-FAST: # %bb.0: 738; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 739; AVX512VL-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 740; AVX512VL-FAST-NEXT: retq 741 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 742 ret <16 x i16> %shuffle 743} 744 745define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) { 746; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00: 747; AVX1: # %bb.0: 748; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7] 749; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0] 750; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 751; AVX1-NEXT: retq 752; 753; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00: 754; AVX2-SLOW: # %bb.0: 755; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7] 756; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0] 757; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 758; AVX2-SLOW-NEXT: retq 759; 760; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00: 761; AVX2-FAST: # %bb.0: 762; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 763; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 764; AVX2-FAST-NEXT: retq 765; 766; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00: 767; AVX512VL-SLOW: # %bb.0: 768; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7] 769; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0] 770; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 771; AVX512VL-SLOW-NEXT: retq 772; 773; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00: 774; AVX512VL-FAST: # %bb.0: 775; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 776; AVX512VL-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 777; AVX512VL-FAST-NEXT: retq 778 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 779 ret <16 x i16> %shuffle 780} 781 782define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 783; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: 784; AVX1: # %bb.0: 785; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 786; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 787; AVX1-NEXT: retq 788; 789; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: 790; AVX2OR512VL: # %bb.0: 791; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 792; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 793; AVX2OR512VL-NEXT: retq 794 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 795 ret <16 x i16> %shuffle 796} 797 798define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 799; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: 800; AVX1: # %bb.0: 801; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 802; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 803; AVX1-NEXT: retq 804; 805; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: 806; AVX2OR512VL: # %bb.0: 807; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 808; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 809; AVX2OR512VL-NEXT: retq 810 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 811 ret <16 x i16> %shuffle 812} 813 814define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 815; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 816; AVX1: # %bb.0: 817; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 818; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 819; AVX1-NEXT: retq 820; 821; AVX2OR512VL-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 822; AVX2OR512VL: # %bb.0: 823; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 824; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 825; AVX2OR512VL-NEXT: retq 826 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 827 ret <16 x i16> %shuffle 828} 829 830define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 831; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 832; AVX1: # %bb.0: 833; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 834; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 835; AVX1-NEXT: retq 836; 837; AVX2OR512VL-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 838; AVX2OR512VL: # %bb.0: 839; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 840; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 841; AVX2OR512VL-NEXT: retq 842 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 843 ret <16 x i16> %shuffle 844} 845 846define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) { 847; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 848; AVX1: # %bb.0: 849; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0] 850; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 851; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 852; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 853; AVX1-NEXT: retq 854; 855; AVX2OR512VL-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 856; AVX2OR512VL: # %bb.0: 857; AVX2OR512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 858; AVX2OR512VL-NEXT: retq 859 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 860 ret <16 x i16> %shuffle 861} 862 863define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) { 864; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15: 865; AVX1: # %bb.0: 866; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0] 867; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0 868; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 869; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0 870; AVX1-NEXT: retq 871; 872; AVX2OR512VL-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15: 873; AVX2OR512VL: # %bb.0: 874; AVX2OR512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] 875; AVX2OR512VL-NEXT: retq 876 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 877 ret <16 x i16> %shuffle 878} 879 880define <16 x i16> @shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) { 881; ALL-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31: 882; ALL: # %bb.0: 883; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 884; ALL-NEXT: retq 885 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31> 886 ret <16 x i16> %shuffle 887} 888 889define <16 x i16> @shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 890; ALL-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15: 891; ALL: # %bb.0: 892; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 893; ALL-NEXT: retq 894 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15> 895 ret <16 x i16> %shuffle 896} 897 898define <16 x i16> @shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31(<16 x i16> %a, <16 x i16> %b) { 899; AVX1-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31: 900; AVX1: # %bb.0: 901; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,0] 902; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 903; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 904; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 905; AVX1-NEXT: retq 906; 907; AVX2-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31: 908; AVX2: # %bb.0: 909; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0] 910; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 911; AVX2-NEXT: retq 912; 913; AVX512VL-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31: 914; AVX512VL: # %bb.0: 915; AVX512VL-NEXT: movw $-32768, %ax # imm = 0x8000 916; AVX512VL-NEXT: kmovd %eax, %k1 917; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} 918; AVX512VL-NEXT: retq 919 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31> 920 ret <16 x i16> %shuffle 921} 922 923define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 924; AVX1-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15: 925; AVX1: # %bb.0: 926; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [0,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535] 927; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 928; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 929; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 930; AVX1-NEXT: retq 931; 932; AVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15: 933; AVX2: # %bb.0: 934; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 935; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 936; AVX2-NEXT: retq 937; 938; AVX512VL-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15: 939; AVX512VL: # %bb.0: 940; AVX512VL-NEXT: movw $1, %ax 941; AVX512VL-NEXT: kmovd %eax, %k1 942; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} 943; AVX512VL-NEXT: retq 944 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 945 ret <16 x i16> %shuffle 946} 947 948define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) { 949; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15: 950; AVX1: # %bb.0: 951; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,0,65535,0,65535,0,65535,0,65535] 952; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 953; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 954; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 955; AVX1-NEXT: retq 956; 957; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15: 958; AVX2: # %bb.0: 959; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] 960; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 961; AVX2-NEXT: retq 962; 963; AVX512VL-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15: 964; AVX512VL: # %bb.0: 965; AVX512VL-NEXT: movw $21930, %ax # imm = 0x55AA 966; AVX512VL-NEXT: kmovd %eax, %k1 967; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} 968; AVX512VL-NEXT: retq 969 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 970 ret <16 x i16> %shuffle 971} 972 973define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) { 974; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31: 975; AVX1: # %bb.0: 976; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [0,65535,0,65535,0,65535,0,65535,65535,0,65535,0,65535,0,65535,0] 977; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 978; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 979; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 980; AVX1-NEXT: retq 981; 982; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31: 983; AVX2: # %bb.0: 984; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0] 985; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 986; AVX2-NEXT: retq 987; 988; AVX512VL-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31: 989; AVX512VL: # %bb.0: 990; AVX512VL-NEXT: movw $-21931, %ax # imm = 0xAA55 991; AVX512VL-NEXT: kmovd %eax, %k1 992; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} 993; AVX512VL-NEXT: retq 994 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 995 ret <16 x i16> %shuffle 996} 997 998define <16 x i16> @shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) { 999; ALL-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31: 1000; ALL: # %bb.0: 1001; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7] 1002; ALL-NEXT: retq 1003 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31> 1004 ret <16 x i16> %shuffle 1005} 1006 1007define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16(<16 x i16> %a, <16 x i16> %b) { 1008; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16: 1009; AVX1: # %bb.0: 1010; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1011; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 1012; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1013; AVX1-NEXT: retq 1014; 1015; AVX2OR512VL-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16: 1016; AVX2OR512VL: # %bb.0: 1017; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1018; AVX2OR512VL-NEXT: vpbroadcastd %xmm0, %ymm0 1019; AVX2OR512VL-NEXT: retq 1020 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16> 1021 ret <16 x i16> %shuffle 1022} 1023 1024define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24(<16 x i16> %a, <16 x i16> %b) { 1025; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24: 1026; AVX1: # %bb.0: 1027; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1028; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] 1029; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1030; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1031; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1032; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 1033; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1034; AVX1-NEXT: retq 1035; 1036; AVX2-SLOW-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24: 1037; AVX2-SLOW: # %bb.0: 1038; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 1039; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,2,3,4,5,6,7,8,8,10,11,12,13,14,15] 1040; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4] 1041; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 1042; AVX2-SLOW-NEXT: retq 1043; 1044; AVX2-FAST-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24: 1045; AVX2-FAST: # %bb.0: 1046; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 1047; AVX2-FAST-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 1048; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 1049; AVX2-FAST-NEXT: retq 1050; 1051; AVX512VL-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24: 1052; AVX512VL: # %bb.0: 1053; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,0,16,0,16,0,16,8,24,8,24,8,24,8,24] 1054; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 1055; AVX512VL-NEXT: retq 1056 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24> 1057 ret <16 x i16> %shuffle 1058} 1059 1060define <16 x i16> @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 1061; AVX1-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15: 1062; AVX1: # %bb.0: 1063; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1064; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1065; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7] 1066; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 1067; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1068; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1069; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1070; AVX1-NEXT: retq 1071; 1072; AVX2-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15: 1073; AVX2: # %bb.0: 1074; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1075; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1076; AVX2-NEXT: retq 1077; 1078; AVX512VL-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15: 1079; AVX512VL: # %bb.0: 1080; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,20,21,22,23,8,8,8,8,28,29,30,31] 1081; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 1082; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 1083; AVX512VL-NEXT: retq 1084 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15> 1085 ret <16 x i16> %shuffle 1086} 1087 1088define <16 x i16> @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12(<16 x i16> %a, <16 x i16> %b) { 1089; AVX1-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12: 1090; AVX1: # %bb.0: 1091; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1092; AVX1-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4] 1093; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1094; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7] 1095; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 1096; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 1097; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] 1098; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1099; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1100; AVX1-NEXT: retq 1101; 1102; AVX2-SLOW-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12: 1103; AVX2-SLOW: # %bb.0: 1104; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1105; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] 1106; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] 1107; AVX2-SLOW-NEXT: retq 1108; 1109; AVX2-FAST-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12: 1110; AVX2-FAST: # %bb.0: 1111; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1112; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,14,15,12,13,10,11,8,9,22,23,20,21,18,19,16,17,30,31,28,29,26,27,24,25] 1113; AVX2-FAST-NEXT: retq 1114; 1115; AVX512VL-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12: 1116; AVX512VL: # %bb.0: 1117; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,23,22,21,20,11,10,9,8,31,30,29,28] 1118; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 1119; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 1120; AVX512VL-NEXT: retq 1121 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12> 1122 ret <16 x i16> %shuffle 1123} 1124 1125define <16 x i16> @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08(<16 x i16> %a, <16 x i16> %b) { 1126; AVX1-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08: 1127; AVX1: # %bb.0: 1128; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1129; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1130; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 1131; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [12,13,8,9,4,5,0,1,14,15,10,11,6,7,2,3] 1132; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 1133; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1134; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 1135; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1136; AVX1-NEXT: retq 1137; 1138; AVX2-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08: 1139; AVX2: # %bb.0: 1140; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] 1141; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] 1142; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 1143; AVX2-NEXT: retq 1144; 1145; AVX512VL-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08: 1146; AVX512VL: # %bb.0: 1147; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,19,18,17,16,11,10,9,8,27,26,25,24] 1148; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 1149; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 1150; AVX512VL-NEXT: retq 1151 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8> 1152 ret <16 x i16> %shuffle 1153} 1154 1155define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08(<16 x i16> %a, <16 x i16> %b) { 1156; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08: 1157; AVX1: # %bb.0: 1158; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,0,4,5,6,7] 1159; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1] 1160; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1161; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7] 1162; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1] 1163; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1164; AVX1-NEXT: retq 1165; 1166; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08: 1167; AVX2-SLOW: # %bb.0: 1168; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,1,0,4,5,6,7,8,8,9,8,12,13,14,15] 1169; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 1170; AVX2-SLOW-NEXT: retq 1171; 1172; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08: 1173; AVX2-FAST: # %bb.0: 1174; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17] 1175; AVX2-FAST-NEXT: retq 1176; 1177; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08: 1178; AVX512VL-SLOW: # %bb.0: 1179; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,1,0,4,5,6,7,8,8,9,8,12,13,14,15] 1180; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 1181; AVX512VL-SLOW-NEXT: retq 1182; 1183; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08: 1184; AVX512VL-FAST: # %bb.0: 1185; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17] 1186; AVX512VL-FAST-NEXT: retq 1187 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 8> 1188 ret <16 x i16> %shuffle 1189} 1190 1191define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08(<16 x i16> %a, <16 x i16> %b) { 1192; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08: 1193; AVX1: # %bb.0: 1194; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,2,4,5,6,7] 1195; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0] 1196; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1197; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7] 1198; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0] 1199; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1200; AVX1-NEXT: retq 1201; 1202; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08: 1203; AVX2-SLOW: # %bb.0: 1204; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,2,4,5,6,7,8,8,8,10,12,13,14,15] 1205; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4] 1206; AVX2-SLOW-NEXT: retq 1207; 1208; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08: 1209; AVX2-FAST: # %bb.0: 1210; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17] 1211; AVX2-FAST-NEXT: retq 1212; 1213; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08: 1214; AVX512VL-SLOW: # %bb.0: 1215; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,2,4,5,6,7,8,8,8,10,12,13,14,15] 1216; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4] 1217; AVX512VL-SLOW-NEXT: retq 1218; 1219; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08: 1220; AVX512VL-FAST: # %bb.0: 1221; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17] 1222; AVX512VL-FAST-NEXT: retq 1223 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 10, i32 8, i32 8> 1224 ret <16 x i16> %shuffle 1225} 1226 1227define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1228; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08: 1229; AVX1: # %bb.0: 1230; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,3,0,4,5,6,7] 1231; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0] 1232; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1233; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7] 1234; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0] 1235; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1236; AVX1-NEXT: retq 1237; 1238; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08: 1239; AVX2-SLOW: # %bb.0: 1240; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,0,4,5,6,7,8,8,11,8,12,13,14,15] 1241; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4] 1242; AVX2-SLOW-NEXT: retq 1243; 1244; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08: 1245; AVX2-FAST: # %bb.0: 1246; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17] 1247; AVX2-FAST-NEXT: retq 1248; 1249; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08: 1250; AVX512VL-SLOW: # %bb.0: 1251; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,0,4,5,6,7,8,8,11,8,12,13,14,15] 1252; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4] 1253; AVX512VL-SLOW-NEXT: retq 1254; 1255; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08: 1256; AVX512VL-FAST: # %bb.0: 1257; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17] 1258; AVX512VL-FAST-NEXT: retq 1259 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8> 1260 ret <16 x i16> %shuffle 1261} 1262 1263define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1264; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08: 1265; AVX1: # %bb.0: 1266; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1267; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 1268; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1269; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1270; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1271; AVX1-NEXT: retq 1272; 1273; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08: 1274; AVX2OR512VL: # %bb.0: 1275; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17] 1276; AVX2OR512VL-NEXT: retq 1277 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8, i32 8> 1278 ret <16 x i16> %shuffle 1279} 1280 1281define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1282; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08: 1283; AVX1: # %bb.0: 1284; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1285; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 1286; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1287; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1288; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1289; AVX1-NEXT: retq 1290; 1291; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08: 1292; AVX2OR512VL: # %bb.0: 1293; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17] 1294; AVX2OR512VL-NEXT: retq 1295 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 13, i32 8, i32 8, i32 8, i32 8, i32 8> 1296 ret <16 x i16> %shuffle 1297} 1298 1299define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1300; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08: 1301; AVX1: # %bb.0: 1302; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1303; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 1304; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1305; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1306; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1307; AVX1-NEXT: retq 1308; 1309; AVX2OR512VL-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08: 1310; AVX2OR512VL: # %bb.0: 1311; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17] 1312; AVX2OR512VL-NEXT: retq 1313 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 14, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 1314 ret <16 x i16> %shuffle 1315} 1316 1317define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1318; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08: 1319; AVX1: # %bb.0: 1320; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1321; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 1322; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1323; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1324; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1325; AVX1-NEXT: retq 1326; 1327; AVX2OR512VL-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08: 1328; AVX2OR512VL: # %bb.0: 1329; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 1330; AVX2OR512VL-NEXT: retq 1331 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 1332 ret <16 x i16> %shuffle 1333} 1334 1335define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) { 1336; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27: 1337; AVX1: # %bb.0: 1338; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1339; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1340; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 1341; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1342; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1343; AVX1-NEXT: retq 1344; 1345; AVX2OR512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27: 1346; AVX2OR512VL: # %bb.0: 1347; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 1348; AVX2OR512VL-NEXT: retq 1349 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 1350 ret <16 x i16> %shuffle 1351} 1352 1353define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 1354; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31: 1355; AVX1: # %bb.0: 1356; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1357; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1358; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1359; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1360; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1361; AVX1-NEXT: retq 1362; 1363; AVX2OR512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31: 1364; AVX2OR512VL: # %bb.0: 1365; AVX2OR512VL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 1366; AVX2OR512VL-NEXT: retq 1367 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 1368 ret <16 x i16> %shuffle 1369} 1370 1371define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 1372; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31: 1373; AVX1: # %bb.0: 1374; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1375; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1376; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1377; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1378; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1379; AVX1-NEXT: retq 1380; 1381; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31: 1382; AVX2: # %bb.0: 1383; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31] 1384; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31,u,u] 1385; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 1386; AVX2-NEXT: retq 1387; 1388; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31: 1389; AVX512VL: # %bb.0: 1390; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,12,28,13,29,14,30,15,31] 1391; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 1392; AVX512VL-NEXT: retq 1393 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 1394 ret <16 x i16> %shuffle 1395} 1396 1397define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) { 1398; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27: 1399; AVX1: # %bb.0: 1400; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1401; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1402; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 1403; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1404; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1405; AVX1-NEXT: retq 1406; 1407; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27: 1408; AVX2: # %bb.0: 1409; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] 1410; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23,u,u] 1411; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 1412; AVX2-NEXT: retq 1413; 1414; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27: 1415; AVX512VL: # %bb.0: 1416; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,20,5,21,6,22,7,23,8,24,9,25,10,26,11,27] 1417; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 1418; AVX512VL-NEXT: retq 1419 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 1420 ret <16 x i16> %shuffle 1421} 1422 1423define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1424; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08: 1425; AVX1: # %bb.0: 1426; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,0,4,5,6,7] 1427; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1] 1428; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1429; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,0,0,4,5,6,7] 1430; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,1] 1431; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1432; AVX1-NEXT: retq 1433; 1434; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08: 1435; AVX2OR512VL: # %bb.0: 1436; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17] 1437; AVX2OR512VL-NEXT: retq 1438 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 9, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 1439 ret <16 x i16> %shuffle 1440} 1441 1442define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1443; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08: 1444; AVX1: # %bb.0: 1445; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,2,4,5,6,7] 1446; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0] 1447; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1448; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,0,4,5,6,7] 1449; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0] 1450; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1451; AVX1-NEXT: retq 1452; 1453; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08: 1454; AVX2OR512VL: # %bb.0: 1455; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17] 1456; AVX2OR512VL-NEXT: retq 1457 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 10, i32 8, i32 8, i32 8, i32 8, i32 8> 1458 ret <16 x i16> %shuffle 1459} 1460 1461define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1462; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08: 1463; AVX1: # %bb.0: 1464; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,3,0,4,5,6,7] 1465; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0] 1466; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1467; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7] 1468; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0] 1469; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1470; AVX1-NEXT: retq 1471; 1472; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08: 1473; AVX2OR512VL: # %bb.0: 1474; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17] 1475; AVX2OR512VL-NEXT: retq 1476 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8, i32 8> 1477 ret <16 x i16> %shuffle 1478} 1479 1480define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1481; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08: 1482; AVX1: # %bb.0: 1483; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 1484; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1485; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1] 1486; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1487; AVX1-NEXT: retq 1488; 1489; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08: 1490; AVX2OR512VL: # %bb.0: 1491; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17] 1492; AVX2OR512VL-NEXT: retq 1493 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8> 1494 ret <16 x i16> %shuffle 1495} 1496 1497define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08(<16 x i16> %a, <16 x i16> %b) { 1498; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08: 1499; AVX1: # %bb.0: 1500; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 1501; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1502; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1] 1503; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1504; AVX1-NEXT: retq 1505; 1506; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08: 1507; AVX2OR512VL: # %bb.0: 1508; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17] 1509; AVX2OR512VL-NEXT: retq 1510 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 13, i32 8, i32 8> 1511 ret <16 x i16> %shuffle 1512} 1513 1514define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08(<16 x i16> %a, <16 x i16> %b) { 1515; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08: 1516; AVX1: # %bb.0: 1517; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 1518; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1519; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1] 1520; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1521; AVX1-NEXT: retq 1522; 1523; AVX2OR512VL-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08: 1524; AVX2OR512VL: # %bb.0: 1525; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17] 1526; AVX2OR512VL-NEXT: retq 1527 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 14, i32 8> 1528 ret <16 x i16> %shuffle 1529} 1530 1531define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15(<16 x i16> %a, <16 x i16> %b) { 1532; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15: 1533; AVX1: # %bb.0: 1534; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 1535; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1536; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15] 1537; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1538; AVX1-NEXT: retq 1539; 1540; AVX2OR512VL-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15: 1541; AVX2OR512VL: # %bb.0: 1542; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31] 1543; AVX2OR512VL-NEXT: retq 1544 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 15> 1545 ret <16 x i16> %shuffle 1546} 1547 1548define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) { 1549; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08: 1550; AVX1: # %bb.0: 1551; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7] 1552; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6] 1553; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1554; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1] 1555; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1556; AVX1-NEXT: retq 1557; 1558; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08: 1559; AVX2OR512VL: # %bb.0: 1560; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17] 1561; AVX2OR512VL-NEXT: retq 1562 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 14, i32 14, i32 12, i32 12, i32 10, i32 10, i32 8, i32 8> 1563 ret <16 x i16> %shuffle 1564} 1565 1566define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 1567; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12: 1568; AVX1: # %bb.0: 1569; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 1570; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1571; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1572; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1573; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1574; AVX1-NEXT: retq 1575; 1576; AVX2OR512VL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12: 1577; AVX2OR512VL: # %bb.0: 1578; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25] 1579; AVX2OR512VL-NEXT: retq 1580 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 1581 ret <16 x i16> %shuffle 1582} 1583 1584define <16 x i16> @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08(<16 x i16> %a, <16 x i16> %b) { 1585; AVX1-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08: 1586; AVX1: # %bb.0: 1587; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] 1588; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 1589; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1590; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,12,13,0,1] 1591; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1592; AVX1-NEXT: retq 1593; 1594; AVX2OR512VL-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08: 1595; AVX2OR512VL: # %bb.0: 1596; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17] 1597; AVX2OR512VL-NEXT: retq 1598 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 14, i32 8> 1599 ret <16 x i16> %shuffle 1600} 1601 1602define <16 x i16> @shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15(<16 x i16> %a, <16 x i16> %b) { 1603; AVX1-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15: 1604; AVX1: # %bb.0: 1605; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1] 1606; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1607; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,0,1,14,15] 1608; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1609; AVX1-NEXT: retq 1610; 1611; AVX2OR512VL-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15: 1612; AVX2OR512VL: # %bb.0: 1613; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31] 1614; AVX2OR512VL-NEXT: retq 1615 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 15> 1616 ret <16 x i16> %shuffle 1617} 1618 1619define <16 x i16> @shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) { 1620; AVX1-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08: 1621; AVX1: # %bb.0: 1622; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7] 1623; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6] 1624; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1625; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1] 1626; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1627; AVX1-NEXT: retq 1628; 1629; AVX2OR512VL-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08: 1630; AVX2OR512VL: # %bb.0: 1631; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17] 1632; AVX2OR512VL-NEXT: retq 1633 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 2, i32 4, i32 4, i32 undef, i32 6, i32 14, i32 14, i32 undef, i32 12, i32 10, i32 10, i32 8, i32 8> 1634 ret <16 x i16> %shuffle 1635} 1636 1637define <16 x i16> @shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12(<16 x i16> %a, <16 x i16> %b) { 1638; AVX1-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12: 1639; AVX1: # %bb.0: 1640; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,4,6,7] 1641; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,2,3] 1642; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1643; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7] 1644; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1645; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1646; AVX1-NEXT: retq 1647; 1648; AVX2OR512VL-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12: 1649; AVX2OR512VL: # %bb.0: 1650; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25] 1651; AVX2OR512VL-NEXT: retq 1652 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 12, i32 12> 1653 ret <16 x i16> %shuffle 1654} 1655 1656define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) { 1657; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20: 1658; AVX1: # %bb.0: 1659; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1660; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1661; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1662; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1663; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1664; AVX1-NEXT: retq 1665; 1666; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20: 1667; AVX2-SLOW: # %bb.0: 1668; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1669; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1670; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1671; AVX2-SLOW-NEXT: retq 1672; 1673; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20: 1674; AVX2-FAST: # %bb.0: 1675; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1676; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25] 1677; AVX2-FAST-NEXT: retq 1678; 1679; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20: 1680; AVX512VL: # %bb.0: 1681; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,16,16,16,16,20,20,20,20] 1682; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 1683; AVX512VL-NEXT: retq 1684 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20> 1685 ret <16 x i16> %shuffle 1686} 1687 1688define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) { 1689; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20: 1690; AVX1: # %bb.0: 1691; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1692; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1693; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1694; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1695; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1696; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1697; AVX1-NEXT: retq 1698; 1699; AVX2-SLOW-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20: 1700; AVX2-SLOW: # %bb.0: 1701; AVX2-SLOW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 1702; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1703; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1704; AVX2-SLOW-NEXT: retq 1705; 1706; AVX2-FAST-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20: 1707; AVX2-FAST: # %bb.0: 1708; AVX2-FAST-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 1709; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25] 1710; AVX2-FAST-NEXT: retq 1711; 1712; AVX512VL-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20: 1713; AVX512VL: # %bb.0: 1714; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20] 1715; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 1716; AVX512VL-NEXT: retq 1717 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20> 1718 ret <16 x i16> %shuffle 1719} 1720 1721define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) { 1722; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28: 1723; AVX1: # %bb.0: 1724; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1725; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1726; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1727; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1728; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1729; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1730; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1731; AVX1-NEXT: retq 1732; 1733; AVX2-SLOW-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28: 1734; AVX2-SLOW: # %bb.0: 1735; AVX2-SLOW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1736; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1737; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1738; AVX2-SLOW-NEXT: retq 1739; 1740; AVX2-FAST-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28: 1741; AVX2-FAST: # %bb.0: 1742; AVX2-FAST-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1743; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25] 1744; AVX2-FAST-NEXT: retq 1745; 1746; AVX512VL-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28: 1747; AVX512VL: # %bb.0: 1748; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,24,24,24,24,28,28,28,28] 1749; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 1750; AVX512VL-NEXT: retq 1751 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28> 1752 ret <16 x i16> %shuffle 1753} 1754 1755define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) { 1756; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28: 1757; AVX1: # %bb.0: 1758; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1759; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1760; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1761; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1762; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1763; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1764; AVX1-NEXT: retq 1765; 1766; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28: 1767; AVX2-SLOW: # %bb.0: 1768; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1769; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1770; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1771; AVX2-SLOW-NEXT: retq 1772; 1773; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28: 1774; AVX2-FAST: # %bb.0: 1775; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1776; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25] 1777; AVX2-FAST-NEXT: retq 1778; 1779; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28: 1780; AVX512VL: # %bb.0: 1781; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,24,24,24,24,28,28,28,28] 1782; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 1783; AVX512VL-NEXT: retq 1784 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28> 1785 ret <16 x i16> %shuffle 1786} 1787 1788define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i16> %a, <16 x i16> %b) { 1789; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 1790; AVX1: # %bb.0: 1791; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1792; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1793; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1794; AVX1-NEXT: retq 1795; 1796; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 1797; AVX2: # %bb.0: 1798; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1799; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1800; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1801; AVX2-NEXT: retq 1802; 1803; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 1804; AVX512VL: # %bb.0: 1805; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23] 1806; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 1807; AVX512VL-NEXT: retq 1808 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 1809 ret <16 x i16> %shuffle 1810} 1811 1812define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) { 1813; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24: 1814; AVX1: # %bb.0: 1815; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1] 1816; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1817; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1] 1818; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1819; AVX1-NEXT: retq 1820; 1821; AVX2OR512VL-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24: 1822; AVX2OR512VL: # %bb.0: 1823; AVX2OR512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] 1824; AVX2OR512VL-NEXT: retq 1825 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24> 1826 ret <16 x i16> %shuffle 1827} 1828 1829define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz(<16 x i16> %a) { 1830; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz: 1831; AVX1: # %bb.0: 1832; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1833; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1834; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1835; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1836; AVX1-NEXT: retq 1837; 1838; AVX2OR512VL-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz: 1839; AVX2OR512VL: # %bb.0: 1840; AVX2OR512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero 1841; AVX2OR512VL-NEXT: retq 1842 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0> 1843 ret <16 x i16> %shuffle 1844} 1845 1846define <16 x i16> @shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13(<16 x i16> %a) { 1847; AVX1-LABEL: shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13: 1848; AVX1: # %bb.0: 1849; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1850; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,14,15,2,3,4,5,14,15,0,1,8,9,10,11] 1851; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1852; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1853; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1854; AVX1-NEXT: retq 1855; 1856; AVX2OR512VL-LABEL: shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13: 1857; AVX2OR512VL: # %bb.0: 1858; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,14,15,2,3,4,5,14,15,0,1,8,9,10,11,28,29,30,31,18,19,20,21,30,31,16,17,24,25,26,27] 1859; AVX2OR512VL-NEXT: retq 1860 %1 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 6, i32 7, i32 1, i32 2, i32 7, i32 0, i32 4, i32 5, i32 14, i32 15, i32 9, i32 10, i32 15, i32 8, i32 12, i32 13> 1861 ret <16 x i16> %1 1862} 1863 1864; 1865; Shuffle to logical bit shifts 1866; 1867 1868define <16 x i16> @shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i16> %a) { 1869; AVX1-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1870; AVX1: # %bb.0: 1871; AVX1-NEXT: vpslld $16, %xmm0, %xmm1 1872; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1873; AVX1-NEXT: vpslld $16, %xmm0, %xmm0 1874; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1875; AVX1-NEXT: retq 1876; 1877; AVX2OR512VL-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1878; AVX2OR512VL: # %bb.0: 1879; AVX2OR512VL-NEXT: vpslld $16, %ymm0, %ymm0 1880; AVX2OR512VL-NEXT: retq 1881 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14> 1882 ret <16 x i16> %shuffle 1883} 1884 1885define <16 x i16> @shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i16> %a) { 1886; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1887; AVX1: # %bb.0: 1888; AVX1-NEXT: vpsllq $48, %xmm0, %xmm1 1889; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1890; AVX1-NEXT: vpsllq $48, %xmm0, %xmm0 1891; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1892; AVX1-NEXT: retq 1893; 1894; AVX2OR512VL-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1895; AVX2OR512VL: # %bb.0: 1896; AVX2OR512VL-NEXT: vpsllq $48, %ymm0, %ymm0 1897; AVX2OR512VL-NEXT: retq 1898 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12> 1899 ret <16 x i16> %shuffle 1900} 1901 1902define <16 x i16> @shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz(<16 x i16> %a) { 1903; AVX1-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz: 1904; AVX1: # %bb.0: 1905; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 1906; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1907; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 1908; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1909; AVX1-NEXT: retq 1910; 1911; AVX2OR512VL-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz: 1912; AVX2OR512VL: # %bb.0: 1913; AVX2OR512VL-NEXT: vpsrld $16, %ymm0, %ymm0 1914; AVX2OR512VL-NEXT: retq 1915 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16> 1916 ret <16 x i16> %shuffle 1917} 1918 1919define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) { 1920; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz: 1921; AVX1: # %bb.0: 1922; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1923; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 1924; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1925; AVX1-NEXT: retq 1926; 1927; AVX2OR512VL-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz: 1928; AVX2OR512VL: # %bb.0: 1929; AVX2OR512VL-NEXT: vpsrlq $32, %ymm0, %ymm0 1930; AVX2OR512VL-NEXT: retq 1931 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16> 1932 ret <16 x i16> %shuffle 1933} 1934 1935define <16 x i16> @shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz(<16 x i16> %a) { 1936; AVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz: 1937; AVX1: # %bb.0: 1938; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1939; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1940; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1941; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1942; AVX1-NEXT: retq 1943; 1944; AVX2OR512VL-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz: 1945; AVX2OR512VL: # %bb.0: 1946; AVX2OR512VL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1947; AVX2OR512VL-NEXT: retq 1948 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0> 1949 ret <16 x i16> %shuffle 1950} 1951 1952define <16 x i16> @shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz(<16 x i16> %a) { 1953; AVX1-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz: 1954; AVX1: # %bb.0: 1955; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1956; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1957; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1958; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1959; AVX1-NEXT: retq 1960; 1961; AVX2OR512VL-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz: 1962; AVX2OR512VL: # %bb.0: 1963; AVX2OR512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1964; AVX2OR512VL-NEXT: retq 1965 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 17, i32 0, i32 18, i32 0, i32 19, i32 0, i32 20, i32 0, i32 21, i32 0, i32 22, i32 0, i32 23, i32 0> 1966 ret <16 x i16> %shuffle 1967} 1968 1969define <16 x i16> @shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz(<16 x i16> %a) { 1970; AVX1-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz: 1971; AVX1: # %bb.0: 1972; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1973; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1974; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 1975; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 1976; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1977; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1978; AVX1-NEXT: retq 1979; 1980; AVX2-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz: 1981; AVX2: # %bb.0: 1982; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1983; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1984; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 1985; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 1986; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1987; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1988; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1989; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15] 1990; AVX2-NEXT: retq 1991; 1992; AVX512VL-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz: 1993; AVX512VL: # %bb.0: 1994; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [28,1,2,3,29,5,6,7,30,9,10,11,31,13,14,15] 1995; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1996; AVX512VL-NEXT: vpermt2w %ymm0, %ymm2, %ymm1 1997; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0 1998; AVX512VL-NEXT: retq 1999 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 28, i32 0, i32 0, i32 0, i32 29, i32 0, i32 0, i32 0, i32 30, i32 0, i32 0, i32 0, i32 31, i32 0, i32 0, i32 0> 2000 ret <16 x i16> %shuffle 2001} 2002 2003define <16 x i16> @shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14(<16 x i16> %a, <16 x i16> %b) { 2004; AVX1-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14: 2005; AVX1: # %bb.0: 2006; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2007; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2008; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2009; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2010; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2011; AVX1-NEXT: retq 2012; 2013; AVX2OR512VL-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14: 2014; AVX2OR512VL: # %bb.0: 2015; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29] 2016; AVX2OR512VL-NEXT: retq 2017 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 2018 ret <16 x i16> %shuffle 2019} 2020 2021define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24(<16 x i16> %a, <16 x i16> %b) { 2022; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24: 2023; AVX1: # %bb.0: 2024; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2025; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2026; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1] 2027; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] 2028; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2029; AVX1-NEXT: retq 2030; 2031; AVX2OR512VL-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24: 2032; AVX2OR512VL: # %bb.0: 2033; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17] 2034; AVX2OR512VL-NEXT: retq 2035 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24> 2036 ret <16 x i16> %shuffle 2037} 2038 2039define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8(<16 x i16> %a, <16 x i16> %b) { 2040; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8: 2041; AVX1: # %bb.0: 2042; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2043; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2044; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1] 2045; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 2046; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2047; AVX1-NEXT: retq 2048; 2049; AVX2OR512VL-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8: 2050; AVX2OR512VL: # %bb.0: 2051; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 2052; AVX2OR512VL-NEXT: retq 2053 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 00, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8> 2054 ret <16 x i16> %shuffle 2055} 2056 2057define <16 x i16> @shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30(<16 x i16> %a, <16 x i16> %b) { 2058; AVX1-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30: 2059; AVX1: # %bb.0: 2060; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2061; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2062; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2063; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2064; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2065; AVX1-NEXT: retq 2066; 2067; AVX2OR512VL-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30: 2068; AVX2OR512VL: # %bb.0: 2069; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29] 2070; AVX2OR512VL-NEXT: retq 2071 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 2072 ret <16 x i16> %shuffle 2073} 2074 2075define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16(<16 x i16> %a, <16 x i16> %b) { 2076; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16: 2077; AVX1: # %bb.0: 2078; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 2079; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 2080; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2081; AVX1-NEXT: retq 2082; 2083; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16: 2084; AVX2: # %bb.0: 2085; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2086; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17] 2087; AVX2-NEXT: retq 2088; 2089; AVX512VL-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16: 2090; AVX512VL: # %bb.0: 2091; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,3,4,5,6,7,0,17,18,19,20,21,22,23,16] 2092; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 2093; AVX512VL-NEXT: retq 2094 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16> 2095 ret <16 x i16> %shuffle 2096} 2097 2098define <16 x i16> @shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22(<16 x i16> %a, <16 x i16> %b) { 2099; AVX1-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22: 2100; AVX1: # %bb.0: 2101; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2102; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2103; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2104; AVX1-NEXT: retq 2105; 2106; AVX2-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22: 2107; AVX2: # %bb.0: 2108; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2109; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29] 2110; AVX2-NEXT: retq 2111; 2112; AVX512VL-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22: 2113; AVX512VL: # %bb.0: 2114; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [7,0,1,2,3,4,5,6,23,16,17,18,19,20,21,22] 2115; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 2116; AVX512VL-NEXT: retq 2117 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22> 2118 ret <16 x i16> %shuffle 2119} 2120 2121define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11(<16 x i16> %a, <16 x i16> %b) { 2122; AVX1-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: 2123; AVX1: # %bb.0: 2124; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2125; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2126; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7] 2127; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7] 2128; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 2129; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2130; AVX1-NEXT: retq 2131; 2132; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: 2133; AVX2: # %bb.0: 2134; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2135; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1 2136; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] 2137; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2138; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2139; AVX2-NEXT: retq 2140; 2141; AVX512VL-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: 2142; AVX512VL: # %bb.0: 2143; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,0,1,2,3,2,11,8,9,8,9,10,11,10,11] 2144; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2145; AVX512VL-NEXT: retq 2146 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 11, i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11> 2147 ret <16 x i16> %shuffle 2148} 2149 2150define <16 x i16> @shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09(<16 x i16> %a, <16 x i16> %b) { 2151; AVX1-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09: 2152; AVX1: # %bb.0: 2153; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2154; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 2155; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 2156; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0] 2157; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2158; AVX1-NEXT: retq 2159; 2160; AVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09: 2161; AVX2: # %bb.0: 2162; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2163; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2164; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2165; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2166; AVX2-NEXT: retq 2167; 2168; AVX512VL-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09: 2169; AVX512VL: # %bb.0: 2170; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [6,7,4,5,2,3,0,9,14,15,12,13,10,11,8,9] 2171; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2172; AVX512VL-NEXT: retq 2173 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 9, i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9> 2174 ret <16 x i16> %shuffle 2175} 2176 2177define <16 x i16> @shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27(<16 x i16> %a, <16 x i16> %b) { 2178; AVX1-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27: 2179; AVX1: # %bb.0: 2180; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2181; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2182; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7] 2183; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 2184; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15] 2185; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 2186; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2187; AVX1-NEXT: retq 2188; 2189; AVX2-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27: 2190; AVX2: # %bb.0: 2191; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 2192; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2193; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2194; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2195; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 2196; AVX2-NEXT: retq 2197; 2198; AVX512VL-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27: 2199; AVX512VL: # %bb.0: 2200; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,5,6,7,16,17,18,27,12,13,14,15,24,25,26,27] 2201; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 2202; AVX512VL-NEXT: retq 2203 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 27, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27> 2204 ret <16 x i16> %shuffle 2205} 2206 2207define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 2208; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: 2209; AVX1: # %bb.0: 2210; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2211; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2212; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,1,4,5,6,7] 2213; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1] 2214; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] 2215; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 2216; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2217; AVX1-NEXT: retq 2218; 2219; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: 2220; AVX2-SLOW: # %bb.0: 2221; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2222; AVX2-SLOW-NEXT: vpbroadcastw %xmm1, %xmm1 2223; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,3,4,5,6,7,8,8,10,11,12,13,14,15] 2224; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 2225; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2226; AVX2-SLOW-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2227; AVX2-SLOW-NEXT: retq 2228; 2229; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: 2230; AVX2-FAST: # %bb.0: 2231; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2232; AVX2-FAST-NEXT: vpbroadcastw %xmm1, %xmm1 2233; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 2234; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2235; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2236; AVX2-FAST-NEXT: retq 2237; 2238; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: 2239; AVX512VL: # %bb.0: 2240; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,8] 2241; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2242; AVX512VL-NEXT: retq 2243 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 2244 ret <16 x i16> %shuffle 2245} 2246 2247define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2248; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: 2249; AVX1: # %bb.0: 2250; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2251; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2252; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2253; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7] 2254; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2255; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 2256; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 2257; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2258; AVX1-NEXT: retq 2259; 2260; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: 2261; AVX2-SLOW: # %bb.0: 2262; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2263; AVX2-SLOW-NEXT: vpsllq $48, %xmm1, %xmm1 2264; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 2265; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 2266; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2267; AVX2-SLOW-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2268; AVX2-SLOW-NEXT: retq 2269; 2270; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: 2271; AVX2-FAST: # %bb.0: 2272; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2273; AVX2-FAST-NEXT: vpsllq $48, %xmm1, %xmm1 2274; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25] 2275; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2276; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2277; AVX2-FAST-NEXT: retq 2278; 2279; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: 2280; AVX512VL: # %bb.0: 2281; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,4,4,4,12,8,8,8,8,12,12,12,12] 2282; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2283; AVX512VL-NEXT: retq 2284 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 2285 ret <16 x i16> %shuffle 2286} 2287 2288define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11(<16 x i16> %a, <16 x i16> %b) { 2289; AVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: 2290; AVX1: # %bb.0: 2291; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2292; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2293; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2294; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 2295; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 2296; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2297; AVX1-NEXT: retq 2298; 2299; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: 2300; AVX2: # %bb.0: 2301; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2302; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <255,255,255,255,255,255,0,0,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,u,u,u,u,u,u,u,u> 2303; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2304; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] 2305; AVX2-NEXT: retq 2306; 2307; AVX512VL-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: 2308; AVX512VL: # %bb.0: 2309; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <u,0,u,1,u,2,u,11,u,8,u,9,u,10,u,11> 2310; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2311; AVX512VL-NEXT: retq 2312 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11> 2313 ret <16 x i16> %shuffle 2314} 2315 2316define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15(<16 x i16> %a, <16 x i16> %b) { 2317; AVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: 2318; AVX1: # %bb.0: 2319; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2320; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2321; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2322; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 2323; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 2324; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2325; AVX1-NEXT: retq 2326; 2327; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: 2328; AVX2: # %bb.0: 2329; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] 2330; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 2331; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7] 2332; AVX2-NEXT: retq 2333; 2334; AVX512VL-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: 2335; AVX512VL: # %bb.0: 2336; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <u,4,u,5,u,6,u,15,u,12,u,13,u,14,u,15> 2337; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2338; AVX512VL-NEXT: retq 2339 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15> 2340 ret <16 x i16> %shuffle 2341} 2342 2343define <16 x i16> @shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13(<16 x i16> %a, <16 x i16> %b) { 2344; AVX1-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13: 2345; AVX1: # %bb.0: 2346; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2347; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 2348; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 2349; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 2350; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7] 2351; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] 2352; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2353; AVX1-NEXT: retq 2354; 2355; AVX2-SLOW-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13: 2356; AVX2-SLOW: # %bb.0: 2357; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2358; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2359; AVX2-SLOW-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2360; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,1,2,0,4,5,6,7,11,9,10,8,12,13,14,15] 2361; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6] 2362; AVX2-SLOW-NEXT: retq 2363; 2364; AVX2-FAST-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13: 2365; AVX2-FAST: # %bb.0: 2366; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2367; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2368; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2369; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11,22,23,18,19,20,21,16,17,28,29,30,31,24,25,26,27] 2370; AVX2-FAST-NEXT: retq 2371; 2372; AVX512VL-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13: 2373; AVX512VL: # %bb.0: 2374; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,1,2,0,6,7,4,13,11,9,10,8,14,15,12,13] 2375; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2376; AVX512VL-NEXT: retq 2377 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 13, i32 11, i32 9, i32 10, i32 8, i32 14, i32 15, i32 12, i32 13> 2378 ret <16 x i16> %shuffle 2379} 2380 2381define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 2382; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: 2383; AVX1: # %bb.0: 2384; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2385; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 2386; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15] 2387; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2388; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 2389; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2390; AVX1-NEXT: retq 2391; 2392; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: 2393; AVX2: # %bb.0: 2394; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2395; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 2396; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,24,25,24,25,24,25,24,25,16,17,16,17,16,17,16,17] 2397; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2398; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2399; AVX2-NEXT: retq 2400; 2401; AVX512VL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: 2402; AVX512VL: # %bb.0: 2403; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,0,0,0,8,12,12,12,12,8,8,8,8] 2404; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2405; AVX512VL-NEXT: retq 2406 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 8, i32 12, i32 12, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8> 2407 ret <16 x i16> %shuffle 2408} 2409 2410define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13(<16 x i16> %a, <16 x i16> %b) { 2411; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13: 2412; AVX1: # %bb.0: 2413; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2414; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 2415; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 2416; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2] 2417; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2418; AVX1-NEXT: retq 2419; 2420; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13: 2421; AVX2: # %bb.0: 2422; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2423; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2424; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2425; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 2426; AVX2-NEXT: retq 2427; 2428; AVX512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13: 2429; AVX512VL: # %bb.0: 2430; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,3,0,1,6,7,4,13,10,11,8,9,14,15,12,13] 2431; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2432; AVX512VL-NEXT: retq 2433 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13> 2434 ret <16 x i16> %shuffle 2435} 2436 2437define <16 x i16> @shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13(<16 x i16> %a, <16 x i16> %b) { 2438; AVX1-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13: 2439; AVX1: # %bb.0: 2440; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2441; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 2442; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 2443; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 2444; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7] 2445; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] 2446; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2447; AVX1-NEXT: retq 2448; 2449; AVX2-SLOW-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13: 2450; AVX2-SLOW: # %bb.0: 2451; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2452; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm2 = <255,255,u,u,255,255,255,255,255,255,0,0,255,255,255,255,255,255,u,u,255,255,255,255,255,255,255,255,255,255,255,255> 2453; AVX2-SLOW-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2454; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[2,3,0,2,4,5,6,7,10,11,8,10,12,13,14,15] 2455; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6] 2456; AVX2-SLOW-NEXT: retq 2457; 2458; AVX2-FAST-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13: 2459; AVX2-FAST: # %bb.0: 2460; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2461; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <255,255,u,u,255,255,255,255,255,255,0,0,255,255,255,255,255,255,u,u,255,255,255,255,255,255,255,255,255,255,255,255> 2462; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2463; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11,20,21,22,23,16,17,20,21,28,29,30,31,24,25,26,27] 2464; AVX2-FAST-NEXT: retq 2465; 2466; AVX512VL-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13: 2467; AVX512VL: # %bb.0: 2468; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,3,0,2,6,7,4,13,10,11,8,10,14,15,12,13] 2469; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2470; AVX512VL-NEXT: retq 2471 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 10, i32 14, i32 15, i32 12, i32 13> 2472 ret <16 x i16> %shuffle 2473} 2474 2475define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15(<16 x i16> %a, <16 x i16> %b) { 2476; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15: 2477; AVX1: # %bb.0: 2478; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2479; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 2480; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2481; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3] 2482; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7] 2483; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2484; AVX1-NEXT: retq 2485; 2486; AVX2-SLOW-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15: 2487; AVX2-SLOW: # %bb.0: 2488; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2489; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 2490; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] 2491; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2492; AVX2-SLOW-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2493; AVX2-SLOW-NEXT: retq 2494; 2495; AVX2-FAST-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15: 2496; AVX2-FAST: # %bb.0: 2497; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15,20,21,22,23,16,17,18,19,28,29,30,31,24,25,30,31] 2498; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 2499; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2500; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 2501; AVX2-FAST-NEXT: retq 2502; 2503; AVX512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15: 2504; AVX512VL: # %bb.0: 2505; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,3,0,1,6,7,4,15,10,11,8,9,14,15,12,15] 2506; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2507; AVX512VL-NEXT: retq 2508 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 15, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 15> 2509 ret <16 x i16> %shuffle 2510} 2511 2512define <16 x i16> @shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08(<16 x i16> %a, <16 x i16> %b) { 2513; AVX1-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08: 2514; AVX1: # %bb.0: 2515; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2516; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 2517; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2518; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] 2519; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2520; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2521; AVX1-NEXT: retq 2522; 2523; AVX2-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08: 2524; AVX2: # %bb.0: 2525; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2526; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2527; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2528; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1,30,31,26,27,28,29,24,25,22,23,18,19,20,21,16,17] 2529; AVX2-NEXT: retq 2530; 2531; AVX512VL-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08: 2532; AVX512VL: # %bb.0: 2533; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [7,5,6,4,3,1,2,8,15,13,14,12,11,9,10,8] 2534; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2535; AVX512VL-NEXT: retq 2536 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 8, i32 15, i32 13, i32 14, i32 12, i32 11, i32 9, i32 10, i32 8> 2537 ret <16 x i16> %shuffle 2538} 2539 2540define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08(<16 x i16> %a, <16 x i16> %b) { 2541; AVX1-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: 2542; AVX1: # %bb.0: 2543; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2544; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 2545; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3] 2546; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2547; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 2548; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2549; AVX1-NEXT: retq 2550; 2551; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: 2552; AVX2: # %bb.0: 2553; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2554; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 2555; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1,18,19,16,17,26,27,24,25,26,27,24,25,18,19,16,17] 2556; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2557; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2558; AVX2-NEXT: retq 2559; 2560; AVX512VL-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: 2561; AVX512VL: # %bb.0: 2562; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,5,4,5,4,1,8,9,8,13,12,13,12,9,8] 2563; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2564; AVX512VL-NEXT: retq 2565 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 8, i32 9, i32 8, i32 13, i32 12, i32 13, i32 12, i32 9, i32 8> 2566 ret <16 x i16> %shuffle 2567} 2568 2569define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08(<16 x i16> %a, <16 x i16> %b) { 2570; AVX1-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: 2571; AVX1: # %bb.0: 2572; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2573; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 2574; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3] 2575; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2576; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 2577; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2578; AVX1-NEXT: retq 2579; 2580; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: 2581; AVX2: # %bb.0: 2582; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2583; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 2584; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1,26,27,24,25,18,19,16,17,26,27,24,25,18,19,16,17] 2585; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2586; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2587; AVX2-NEXT: retq 2588; 2589; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: 2590; AVX512VL: # %bb.0: 2591; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [5,4,1,0,5,4,1,8,13,12,9,8,13,12,9,8] 2592; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2593; AVX512VL-NEXT: retq 2594 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 8, i32 13, i32 12, i32 9, i32 8, i32 13, i32 12, i32 9, i32 8> 2595 ret <16 x i16> %shuffle 2596} 2597 2598define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12(<16 x i16> %a, <16 x i16> %b) { 2599; AVX1-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: 2600; AVX1: # %bb.0: 2601; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2602; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2603; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3] 2604; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2605; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 2606; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2607; AVX1-NEXT: retq 2608; 2609; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: 2610; AVX2: # %bb.0: 2611; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2612; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1 2613; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9,26,27,24,25,18,19,16,17,18,19,16,17,26,27,24,25] 2614; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2615; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2616; AVX2-NEXT: retq 2617; 2618; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: 2619; AVX512VL: # %bb.0: 2620; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [5,4,1,0,1,0,5,12,13,12,9,8,9,8,13,12] 2621; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2622; AVX512VL-NEXT: retq 2623 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 12, i32 13, i32 12, i32 9, i32 8, i32 9, i32 8, i32 13, i32 12> 2624 ret <16 x i16> %shuffle 2625} 2626 2627define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08(<16 x i16> %a, <16 x i16> %b) { 2628; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: 2629; AVX1: # %bb.0: 2630; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2631; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 2632; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3] 2633; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2634; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 2635; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2636; AVX1-NEXT: retq 2637; 2638; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: 2639; AVX2: # %bb.0: 2640; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2641; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 2642; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1,16,17,24,25,24,25,16,17,16,17,24,25,24,25,16,17] 2643; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2644; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2645; AVX2-NEXT: retq 2646; 2647; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: 2648; AVX512VL: # %bb.0: 2649; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,4,0,0,4,4,8,8,12,12,8,8,12,12,8] 2650; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2651; AVX512VL-NEXT: retq 2652 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8> 2653 ret <16 x i16> %shuffle 2654} 2655 2656define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12(<16 x i16> %a, <16 x i16> %b) { 2657; AVX1-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: 2658; AVX1: # %bb.0: 2659; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2660; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2661; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3] 2662; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2663; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 2664; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2665; AVX1-NEXT: retq 2666; 2667; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: 2668; AVX2: # %bb.0: 2669; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2670; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1 2671; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9,24,25,16,17,16,17,24,25,24,25,16,17,16,17,24,25] 2672; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2673; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2674; AVX2-NEXT: retq 2675; 2676; AVX512VL-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: 2677; AVX512VL: # %bb.0: 2678; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,0,0,4,4,0,0,12,12,8,8,12,12,8,8,12] 2679; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2680; AVX512VL-NEXT: retq 2681 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12> 2682 ret <16 x i16> %shuffle 2683} 2684 2685define <16 x i16> @shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11(<16 x i16> %a, <16 x i16> %b) { 2686; AVX1-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11: 2687; AVX1: # %bb.0: 2688; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2689; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 2690; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2691; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2692; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2693; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2694; AVX1-NEXT: retq 2695; 2696; AVX2-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11: 2697; AVX2: # %bb.0: 2698; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2699; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2700; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2701; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7,20,21,28,29,24,25,16,17,26,27,18,19,30,31,22,23] 2702; AVX2-NEXT: retq 2703; 2704; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11: 2705; AVX512VL: # %bb.0: 2706; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,6,4,0,5,1,7,11,10,14,12,8,13,9,15,11] 2707; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2708; AVX512VL-NEXT: retq 2709 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 11, i32 10, i32 14, i32 12, i32 8, i32 13, i32 9, i32 15, i32 11> 2710 ret <16 x i16> %shuffle 2711} 2712 2713define <16 x i16> @shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11(<16 x i16> %a, <16 x i16> %b) { 2714; AVX1-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11: 2715; AVX1: # %bb.0: 2716; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2717; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 2718; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2719; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2720; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2721; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2722; AVX1-NEXT: retq 2723; 2724; AVX2-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11: 2725; AVX2: # %bb.0: 2726; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2727; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2728; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2729; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7,20,21,16,17,28,29,24,25,26,27,18,19,30,31,22,23] 2730; AVX2-NEXT: retq 2731; 2732; AVX512VL-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11: 2733; AVX512VL: # %bb.0: 2734; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,0,6,4,5,1,7,11,10,8,14,12,13,9,15,11] 2735; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2736; AVX512VL-NEXT: retq 2737 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 11, i32 10, i32 8, i32 14, i32 12, i32 13, i32 9, i32 15, i32 11> 2738 ret <16 x i16> %shuffle 2739} 2740 2741define <16 x i16> @shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13(<16 x i16> %a, <16 x i16> %b) { 2742; AVX1-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13: 2743; AVX1: # %bb.0: 2744; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2745; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 2746; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2747; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 2748; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2749; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2750; AVX1-NEXT: retq 2751; 2752; AVX2-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13: 2753; AVX2: # %bb.0: 2754; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2755; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2756; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2757; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11,20,21,28,29,24,25,16,17,18,19,22,23,30,31,26,27] 2758; AVX2-NEXT: retq 2759; 2760; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13: 2761; AVX512VL: # %bb.0: 2762; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,6,4,0,1,3,7,13,10,14,12,8,9,11,15,13] 2763; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2764; AVX512VL-NEXT: retq 2765 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 13, i32 10, i32 14, i32 12, i32 8, i32 9, i32 11, i32 15, i32 13> 2766 ret <16 x i16> %shuffle 2767} 2768 2769define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11(<16 x i16> %a, <16 x i16> %b) { 2770; AVX1-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: 2771; AVX1: # %bb.0: 2772; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2773; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 2774; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2775; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 2776; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2777; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2778; AVX1-NEXT: retq 2779; 2780; AVX2-SLOW-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: 2781; AVX2-SLOW: # %bb.0: 2782; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2783; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7] 2784; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23] 2785; AVX2-SLOW-NEXT: retq 2786; 2787; AVX2-FAST-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: 2788; AVX2-FAST: # %bb.0: 2789; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,2,3,4,5,6,7] 2790; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 2791; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23] 2792; AVX2-FAST-NEXT: retq 2793; 2794; AVX512VL-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: 2795; AVX512VL: # %bb.0: 2796; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [6,6,7,5,1,6,4,11,14,14,15,13,9,14,12,11] 2797; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2798; AVX512VL-NEXT: retq 2799 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 11, i32 14, i32 14, i32 15, i32 13, i32 9, i32 14, i32 12, i32 11> 2800 ret <16 x i16> %shuffle 2801} 2802 2803define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2804; AVX1-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: 2805; AVX1: # %bb.0: 2806; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2807; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2808; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15] 2809; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2810; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 2811; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2812; AVX1-NEXT: retq 2813; 2814; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: 2815; AVX2: # %bb.0: 2816; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2817; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1 2818; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9,16,17,16,17,24,25,24,25,24,25,24,25,24,25,24,25] 2819; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2820; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2821; AVX2-NEXT: retq 2822; 2823; AVX512VL-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: 2824; AVX512VL: # %bb.0: 2825; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,4,4,4,4,4,12,8,8,12,12,12,12,12,12] 2826; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2827; AVX512VL-NEXT: retq 2828 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12> 2829 ret <16 x i16> %shuffle 2830} 2831 2832define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2833; AVX1-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: 2834; AVX1: # %bb.0: 2835; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2836; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2837; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15] 2838; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2839; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 2840; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2841; AVX1-NEXT: retq 2842; 2843; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: 2844; AVX2: # %bb.0: 2845; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2846; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1 2847; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9,24,25,24,25,16,17,16,17,24,25,24,25,24,25,24,25] 2848; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2849; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2850; AVX2-NEXT: retq 2851; 2852; AVX512VL-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: 2853; AVX512VL: # %bb.0: 2854; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,0,0,4,4,4,12,12,12,8,8,12,12,12,12] 2855; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2856; AVX512VL-NEXT: retq 2857 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 2858 ret <16 x i16> %shuffle 2859} 2860 2861define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2862; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: 2863; AVX1: # %bb.0: 2864; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2865; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2866; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2867; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2868; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2869; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2870; AVX1-NEXT: retq 2871; 2872; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: 2873; AVX2: # %bb.0: 2874; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2875; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1 2876; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9,16,17,24,25,24,25,16,17,24,25,24,25,24,25,24,25] 2877; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2878; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2879; AVX2-NEXT: retq 2880; 2881; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: 2882; AVX512VL: # %bb.0: 2883; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,4,0,4,4,4,12,8,12,12,8,12,12,12,12] 2884; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2885; AVX512VL-NEXT: retq 2886 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12> 2887 ret <16 x i16> %shuffle 2888} 2889 2890define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 2891; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: 2892; AVX1: # %bb.0: 2893; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2894; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 2895; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15] 2896; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2897; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 2898; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2899; AVX1-NEXT: retq 2900; 2901; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: 2902; AVX2: # %bb.0: 2903; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2904; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 2905; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1,16,17,24,25,24,25,16,17,16,17,16,17,16,17,16,17] 2906; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2907; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2908; AVX2-NEXT: retq 2909; 2910; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: 2911; AVX512VL: # %bb.0: 2912; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,4,0,0,0,0,8,8,12,12,8,8,8,8,8] 2913; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2914; AVX512VL-NEXT: retq 2915 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8, i32 8> 2916 ret <16 x i16> %shuffle 2917} 2918 2919define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 2920; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: 2921; AVX1: # %bb.0: 2922; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2923; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2924; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 2925; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2926; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2927; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7] 2928; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2929; AVX1-NEXT: retq 2930; 2931; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: 2932; AVX2-SLOW: # %bb.0: 2933; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2934; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 2935; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,2,2,0,4,5,6,7,8,10,10,8,12,13,14,15] 2936; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2937; AVX2-SLOW-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2938; AVX2-SLOW-NEXT: retq 2939; 2940; AVX2-FAST-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: 2941; AVX2-FAST: # %bb.0: 2942; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15,16,17,24,25,24,25,16,17,24,25,26,27,28,29,30,31] 2943; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 2944; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2945; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 2946; AVX2-FAST-NEXT: retq 2947; 2948; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: 2949; AVX512VL: # %bb.0: 2950; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,4,0,4,5,6,15,8,12,12,8,12,13,14,15] 2951; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2952; AVX512VL-NEXT: retq 2953 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 15, i32 8, i32 12, i32 12, i32 8, i32 12, i32 13, i32 14, i32 15> 2954 ret <16 x i16> %shuffle 2955} 2956 2957define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2958; AVX1-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: 2959; AVX1: # %bb.0: 2960; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2961; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2962; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15] 2963; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2964; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 2965; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2966; AVX1-NEXT: retq 2967; 2968; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: 2969; AVX2: # %bb.0: 2970; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 2971; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1 2972; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9,16,17,18,19,24,25,24,25,24,25,24,25,24,25,24,25] 2973; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <255,255,u,u,255,255,255,255,255,255,255,255,255,255,0,0,255,255,u,u,255,255,255,255,255,255,255,255,255,255,255,255> 2974; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2975; AVX2-NEXT: retq 2976; 2977; AVX512VL-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: 2978; AVX512VL: # %bb.0: 2979; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,4,4,4,4,4,12,8,u,12,12,12,12,12,12> 2980; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 2981; AVX512VL-NEXT: retq 2982 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12> 2983 ret <16 x i16> %shuffle 2984} 2985 2986define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2987; AVX1-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: 2988; AVX1: # %bb.0: 2989; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2990; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2991; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2992; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2993; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2994; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2995; AVX1-NEXT: retq 2996; 2997; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: 2998; AVX2: # %bb.0: 2999; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 3000; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1 3001; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9,24,25,24,25,24,25,16,17,24,25,24,25,24,25,24,25] 3002; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <255,255,255,255,u,u,255,255,255,255,255,255,255,255,0,0,255,255,255,255,u,u,255,255,255,255,255,255,255,255,255,255> 3003; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 3004; AVX2-NEXT: retq 3005; 3006; AVX512VL-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: 3007; AVX512VL: # %bb.0: 3008; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <4,4,u,0,4,4,4,12,12,12,u,8,12,12,12,12> 3009; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 3010; AVX512VL-NEXT: retq 3011 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 undef, i32 8, i32 12, i32 12, i32 12, i32 12> 3012 ret <16 x i16> %shuffle 3013} 3014 3015define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 3016; AVX1-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: 3017; AVX1: # %bb.0: 3018; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3019; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 3020; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 3021; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 3022; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 3023; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3024; AVX1-NEXT: retq 3025; 3026; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: 3027; AVX2: # %bb.0: 3028; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 3029; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1 3030; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9,16,17,24,25,24,25,16,17,24,25,24,25,24,25,24,25] 3031; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,255,255,255,255,255,255,255,255,255,255,255,255,0,0,u,u,255,255,255,255,255,255,255,255,255,255,255,255,255,255> 3032; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 3033; AVX2-NEXT: retq 3034; 3035; AVX512VL-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: 3036; AVX512VL: # %bb.0: 3037; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <u,4,4,0,4,4,4,12,u,12,12,8,12,12,12,12> 3038; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 3039; AVX512VL-NEXT: retq 3040 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 undef, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12> 3041 ret <16 x i16> %shuffle 3042} 3043 3044define <16 x i16> @shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 3045; AVX1-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu: 3046; AVX1: # %bb.0: 3047; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3048; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 3049; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3050; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3051; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3052; AVX1-NEXT: retq 3053; 3054; AVX2OR512VL-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu: 3055; AVX2OR512VL: # %bb.0: 3056; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31] 3057; AVX2OR512VL-NEXT: retq 3058 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef> 3059 ret <16 x i16> %shuffle 3060} 3061 3062define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11(<16 x i16> %a, <16 x i16> %b) { 3063; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: 3064; AVX1: # %bb.0: 3065; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3066; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] 3067; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 3068; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 3069; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3070; AVX1-NEXT: retq 3071; 3072; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: 3073; AVX2: # %bb.0: 3074; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,2,3] 3075; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7,28,29,22,23,20,21,22,23,24,25,26,27,28,29,22,23] 3076; AVX2-NEXT: retq 3077; 3078; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: 3079; AVX512VL: # %bb.0: 3080; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,u,4,5,6,11,u,u,u,u,12,13,14,11> 3081; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 3082; AVX512VL-NEXT: retq 3083 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11> 3084 ret <16 x i16> %shuffle 3085} 3086 3087define <16 x i16> @shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 3088; AVX1-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu: 3089; AVX1: # %bb.0: 3090; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3091; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 3092; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3093; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3094; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3095; AVX1-NEXT: retq 3096; 3097; AVX2OR512VL-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu: 3098; AVX2OR512VL: # %bb.0: 3099; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19] 3100; AVX2OR512VL-NEXT: retq 3101 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11, i32 undef, i32 undef, i32 undef, i32 undef> 3102 ret <16 x i16> %shuffle 3103} 3104 3105define <16 x i16> @shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a) { 3106; AVX1-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15: 3107; AVX1: # %bb.0: 3108; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,2,3,4,5,6,7] 3109; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 3110; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4,5,6,7] 3111; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 3112; AVX1-NEXT: retq 3113; 3114; AVX2OR512VL-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15: 3115; AVX2OR512VL: # %bb.0: 3116; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[8,9,u,u,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 3117; AVX2OR512VL-NEXT: retq 3118 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 2, i32 16, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3119 ret <16 x i16> %shuffle 3120} 3121 3122define <16 x i16> @shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11(<16 x i16> %a, <16 x i16> %b) { 3123; AVX1-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11: 3124; AVX1: # %bb.0: 3125; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3126; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 3127; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 3128; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 3129; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3130; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 3131; AVX1-NEXT: retq 3132; 3133; AVX2-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11: 3134; AVX2: # %bb.0: 3135; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 3136; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 3137; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 3138; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7,16,17,18,19,20,21,30,31,24,25,26,27,28,29,22,23] 3139; AVX2-NEXT: retq 3140; 3141; AVX512VL-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11: 3142; AVX512VL: # %bb.0: 3143; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,7,4,5,6,11,8,9,10,15,12,13,14,11] 3144; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 3145; AVX512VL-NEXT: retq 3146 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 11> 3147 ret <16 x i16> %shuffle 3148} 3149 3150define <16 x i16> @shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15(<16 x i16> %a, <16 x i16> %b) { 3151; AVX1-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15: 3152; AVX1: # %bb.0: 3153; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3154; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3] 3155; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 3156; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 3157; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3158; AVX1-NEXT: retq 3159; 3160; AVX2-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15: 3161; AVX2: # %bb.0: 3162; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15,24,25,26,27,28,29,22,23,16,17,18,19,20,21,30,31] 3163; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 3164; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 3165; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 3166; AVX2-NEXT: retq 3167; 3168; AVX512VL-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15: 3169; AVX512VL: # %bb.0: 3170; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,6,3,0,1,2,15,12,13,14,11,8,9,10,15] 3171; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 3172; AVX512VL-NEXT: retq 3173 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 15, i32 12, i32 13, i32 14, i32 11, i32 8, i32 9, i32 10, i32 15> 3174 ret <16 x i16> %shuffle 3175} 3176 3177define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13(<16 x i16> %a, <16 x i16> %b) { 3178; AVX1-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: 3179; AVX1: # %bb.0: 3180; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3181; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 3182; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 3183; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 3184; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3185; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 3186; AVX1-NEXT: retq 3187; 3188; AVX2-SLOW-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: 3189; AVX2-SLOW: # %bb.0: 3190; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 3191; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7] 3192; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27] 3193; AVX2-SLOW-NEXT: retq 3194; 3195; AVX2-FAST-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: 3196; AVX2-FAST: # %bb.0: 3197; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,6,3,4,5,6,7] 3198; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 3199; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27] 3200; AVX2-FAST-NEXT: retq 3201; 3202; AVX512VL-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: 3203; AVX512VL: # %bb.0: 3204; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,1,0,2,7,3,13,11,15,9,8,10,15,11,13] 3205; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 3206; AVX512VL-NEXT: retq 3207 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 13, i32 11, i32 15, i32 9, i32 8, i32 10, i32 15, i32 11, i32 13> 3208 ret <16 x i16> %shuffle 3209} 3210 3211define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) { 3212; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: 3213; AVX1: # %bb.0: 3214; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3215; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3216; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 3217; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3218; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15] 3219; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3220; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 3221; AVX1-NEXT: retq 3222; 3223; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: 3224; AVX2: # %bb.0: 3225; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1] 3226; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,255,255,0,0,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,u,u,u,u,u,u,u,u> 3227; AVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 3228; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 3229; AVX2-NEXT: retq 3230; 3231; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: 3232; AVX512VL: # %bb.0: 3233; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27] 3234; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 3235; AVX512VL-NEXT: retq 3236 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 3237 ret <16 x i16> %shuffle 3238} 3239 3240define <16 x i16> @shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31(<16 x i16> %a, <16 x i16> %b) { 3241; AVX1-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31: 3242; AVX1: # %bb.0: 3243; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3244; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3245; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,0,1] 3246; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 3247; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 3248; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15] 3249; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3250; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3251; AVX1-NEXT: retq 3252; 3253; AVX2-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31: 3254; AVX2: # %bb.0: 3255; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 3256; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31] 3257; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 3258; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 3259; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 3260; AVX2-NEXT: retq 3261; 3262; AVX512VL-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31: 3263; AVX512VL: # %bb.0: 3264; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,20,1,21,2,22,3,31,8,28,9,29,10,30,11,31] 3265; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 3266; AVX512VL-NEXT: retq 3267 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 2, i32 22, i32 3, i32 31, i32 8, i32 28, i32 9, i32 29, i32 10, i32 30, i32 11, i32 31> 3268 ret <16 x i16> %shuffle 3269} 3270 3271define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 3272; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: 3273; AVX1: # %bb.0: 3274; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3275; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3276; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 3277; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 3278; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15] 3279; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3280; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 3281; AVX1-NEXT: retq 3282; 3283; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: 3284; AVX2: # %bb.0: 3285; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] 3286; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 3287; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] 3288; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2],ymm1[3],ymm2[4,5,6,7] 3289; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 3290; AVX2-NEXT: retq 3291; 3292; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: 3293; AVX512VL: # %bb.0: 3294; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31] 3295; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 3296; AVX512VL-NEXT: retq 3297 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 3298 ret <16 x i16> %shuffle 3299} 3300 3301define <16 x i16> @shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27(<16 x i16> %a, <16 x i16> %b) { 3302; AVX1-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27: 3303; AVX1: # %bb.0: 3304; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3305; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3306; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] 3307; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 3308; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3309; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15] 3310; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3311; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 3312; AVX1-NEXT: retq 3313; 3314; AVX2-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27: 3315; AVX2: # %bb.0: 3316; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 3317; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 3318; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 3319; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 3320; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7,24,25,16,17,26,27,18,19,28,29,20,21,30,31,22,23] 3321; AVX2-NEXT: retq 3322; 3323; AVX512VL-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27: 3324; AVX512VL: # %bb.0: 3325; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,16,5,17,6,18,7,27,12,24,13,25,14,26,15,27] 3326; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 3327; AVX512VL-NEXT: retq 3328 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 16, i32 5, i32 17, i32 6, i32 18, i32 7, i32 27, i32 12, i32 24, i32 13, i32 25, i32 14, i32 26, i32 15, i32 27> 3329 ret <16 x i16> %shuffle 3330} 3331 3332define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 3333; AVX1-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: 3334; AVX1: # %bb.0: 3335; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3336; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[0,3,2,3] 3337; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 3338; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3] 3339; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 3340; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7] 3341; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 3342; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7] 3343; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15] 3344; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 3345; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3346; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 3347; AVX1-NEXT: retq 3348; 3349; AVX2-SLOW-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: 3350; AVX2-SLOW: # %bb.0: 3351; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1] 3352; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,2,1,4,5,6,7,8,8,10,9,12,13,14,15] 3353; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,6,6,7,8,9,10,11,12,14,14,15] 3354; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6,7] 3355; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15] 3356; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15] 3357; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 3358; AVX2-SLOW-NEXT: retq 3359; 3360; AVX2-FAST-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: 3361; AVX2-FAST: # %bb.0: 3362; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1] 3363; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,2,3,8,9,12,13,12,13,14,15,16,17,16,17,20,21,18,19,24,25,28,29,28,29,30,31] 3364; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6,7] 3365; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,2,3,6,7,12,13,10,11,14,15,14,15,16,17,18,19,18,19,22,23,28,29,26,27,30,31,30,31] 3366; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 3367; AVX2-FAST-NEXT: retq 3368; 3369; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: 3370; AVX512VL: # %bb.0: 3371; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,1,17,6,22,7,31,8,24,9,25,14,30,15,31] 3372; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 3373; AVX512VL-NEXT: retq 3374 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 6, i32 22, i32 7, i32 31, i32 8, i32 24, i32 9, i32 25, i32 14, i32 30, i32 15, i32 31> 3375 ret <16 x i16> %shuffle 3376} 3377 3378define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25(<16 x i16> %a, <16 x i16> %b) { 3379; AVX1-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25: 3380; AVX1: # %bb.0: 3381; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3382; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,0,2,3] 3383; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 3384; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3] 3385; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 3386; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7] 3387; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,0,1,2,3,2,3,0,1,12,13,2,3] 3388; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 3389; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3390; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 3391; AVX1-NEXT: retq 3392; 3393; AVX2-SLOW-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25: 3394; AVX2-SLOW: # %bb.0: 3395; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1] 3396; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,0,0,u,u,u,u,255,255,255,255,u,u,u,u,255,255,255,255,u,u,u,u,255,255,255,255,u,u,u,u> 3397; AVX2-SLOW-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 3398; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[8,9,8,9,4,5,10,11,0,1,0,1,12,13,2,3,24,25,24,25,20,21,26,27,16,17,16,17,28,29,18,19] 3399; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15] 3400; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15] 3401; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 3402; AVX2-SLOW-NEXT: retq 3403; 3404; AVX2-FAST-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25: 3405; AVX2-FAST: # %bb.0: 3406; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1] 3407; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,0,0,u,u,u,u,255,255,255,255,u,u,u,u,255,255,255,255,u,u,u,u,255,255,255,255,u,u,u,u> 3408; AVX2-FAST-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 3409; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[8,9,8,9,4,5,10,11,0,1,0,1,12,13,2,3,24,25,24,25,20,21,26,27,16,17,16,17,28,29,18,19] 3410; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,2,3,6,7,12,13,10,11,14,15,14,15,16,17,18,19,18,19,22,23,28,29,26,27,30,31,30,31] 3411; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 3412; AVX2-FAST-NEXT: retq 3413; 3414; AVX512VL-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25: 3415; AVX512VL: # %bb.0: 3416; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,20,1,21,6,16,7,25,8,28,9,29,14,24,15,25] 3417; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 3418; AVX512VL-NEXT: retq 3419 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 6, i32 16, i32 7, i32 25, i32 8, i32 28, i32 9, i32 29, i32 14, i32 24, i32 15, i32 25> 3420 ret <16 x i16> %shuffle 3421} 3422 3423define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26(<16 x i16> %a, <16 x i16> %b) { 3424; AVX1-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: 3425; AVX1: # %bb.0: 3426; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3427; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3428; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,10,11,8,9,10,11,12,13,10,11] 3429; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,0,3,2,4,5,6,7] 3430; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 3431; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3432; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 3433; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 3434; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 3435; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3436; AVX1-NEXT: retq 3437; 3438; AVX2-SLOW-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: 3439; AVX2-SLOW: # %bb.0: 3440; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1] 3441; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,0,0,255,255,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,u,u,u,u,u,u,u,u> 3442; AVX2-SLOW-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 3443; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] 3444; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] 3445; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 3446; AVX2-SLOW-NEXT: retq 3447; 3448; AVX2-FAST-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: 3449; AVX2-FAST: # %bb.0: 3450; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1] 3451; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,0,0,255,255,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,u,u,u,u,u,u,u,u> 3452; AVX2-FAST-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 3453; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[2,3,0,1,2,3,0,1,8,9,10,11,6,7,4,5,18,19,16,17,18,19,16,17,24,25,26,27,22,23,20,21] 3454; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,6,7,4,5,12,13,14,15,18,19,16,17,22,23,20,21,22,23,20,21,28,29,30,31] 3455; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 3456; AVX2-FAST-NEXT: retq 3457; 3458; AVX512VL-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: 3459; AVX512VL: # %bb.0: 3460; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,0,17,16,3,2,19,26,9,8,25,24,11,10,27,26] 3461; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 3462; AVX512VL-NEXT: retq 3463 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 17, i32 16, i32 3, i32 2, i32 19, i32 26, i32 9, i32 8, i32 25, i32 24, i32 11, i32 10, i32 27, i32 26> 3464 ret <16 x i16> %shuffle 3465} 3466 3467define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11(<16 x i16> %a, <16 x i16> %b) { 3468; AVX1-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: 3469; AVX1: # %bb.0: 3470; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3471; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3472; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 3473; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 3474; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15] 3475; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3476; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 3477; AVX1-NEXT: retq 3478; 3479; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: 3480; AVX2: # %bb.0: 3481; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,0,1] 3482; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,255,255,0,0,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,u,u,u,u,u,u,u,u> 3483; AVX2-NEXT: vpblendvb %ymm3, %ymm0, %ymm2, %ymm0 3484; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11] 3485; AVX2-NEXT: retq 3486; 3487; AVX512VL-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: 3488; AVX512VL: # %bb.0: 3489; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27] 3490; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 3491; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 3492; AVX512VL-NEXT: retq 3493 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11> 3494 ret <16 x i16> %shuffle 3495} 3496 3497define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15(<16 x i16> %a, <16 x i16> %b) { 3498; AVX1-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: 3499; AVX1: # %bb.0: 3500; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3501; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3502; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 3503; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 3504; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15] 3505; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3506; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 3507; AVX1-NEXT: retq 3508; 3509; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: 3510; AVX2: # %bb.0: 3511; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15] 3512; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] 3513; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 3514; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2],ymm0[3],ymm2[4,5,6,7] 3515; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] 3516; AVX2-NEXT: retq 3517; 3518; AVX512VL-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: 3519; AVX512VL: # %bb.0: 3520; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31] 3521; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 3522; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 3523; AVX512VL-NEXT: retq 3524 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15> 3525 ret <16 x i16> %shuffle 3526} 3527 3528define <16 x i16> @shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31(<16 x i16> %a, <16 x i16> %b) { 3529; AVX1-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31: 3530; AVX1: # %bb.0: 3531; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3532; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm2[0,1,2,3,4,6,5,7] 3533; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 3534; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,2,1,3,4,5,6,7] 3535; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7] 3536; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 3537; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,4,5,14,15,0,1,4,5,4,5,6,7] 3538; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 3539; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3540; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 3541; AVX1-NEXT: retq 3542; 3543; AVX2-SLOW-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31: 3544; AVX2-SLOW: # %bb.0: 3545; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 3546; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 3547; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,2,1,3,4,5,6,7,8,10,9,11,12,13,14,15] 3548; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,5,7,8,9,10,11,12,14,13,15] 3549; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 3550; AVX2-SLOW-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 3551; AVX2-SLOW-NEXT: retq 3552; 3553; AVX2-FAST-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31: 3554; AVX2-FAST: # %bb.0: 3555; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 3556; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,4,5,2,3,6,7,8,9,12,13,10,11,14,15,16,17,20,21,18,19,22,23,24,25,28,29,26,27,30,31] 3557; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 3558; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 3559; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 3560; AVX2-FAST-NEXT: retq 3561; 3562; AVX512VL-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31: 3563; AVX512VL: # %bb.0: 3564; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,1,3,20,22,21,31,8,10,9,11,28,30,29,31] 3565; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 3566; AVX512VL-NEXT: retq 3567 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 1, i32 3, i32 20, i32 22, i32 21, i32 31, i32 8, i32 10, i32 9, i32 11, i32 28, i32 30, i32 29, i32 31> 3568 ret <16 x i16> %shuffle 3569} 3570 3571define <16 x i16> @shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 3572; AVX1-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu: 3573; AVX1: # %bb.0: 3574; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 3575; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,3] 3576; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,3,2,4,5,6,7] 3577; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 3578; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3579; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 3580; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 3581; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 3582; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 3583; AVX1-NEXT: retq 3584; 3585; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu: 3586; AVX2-SLOW: # %bb.0: 3587; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15] 3588; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7] 3589; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15] 3590; AVX2-SLOW-NEXT: retq 3591; 3592; AVX2-FAST-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu: 3593; AVX2-FAST: # %bb.0: 3594; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15] 3595; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15,24,25,24,25,22,23,20,21,24,25,26,27,28,29,30,31] 3596; AVX2-FAST-NEXT: retq 3597; 3598; AVX512VL-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu: 3599; AVX512VL: # %bb.0: 3600; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <4,4,3,18,u,u,u,u,12,12,11,26,u,u,u,u> 3601; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 3602; AVX512VL-NEXT: retq 3603 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 3, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 12, i32 11, i32 26, i32 undef, i32 undef, i32 undef, i32 undef> 3604 ret <16 x i16> %shuffle 3605} 3606 3607define <16 x i16> @shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 3608; AVX1-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu: 3609; AVX1: # %bb.0: 3610; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3611; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3612; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 3613; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 3614; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 3615; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 3616; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 3617; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3618; AVX1-NEXT: retq 3619; 3620; AVX2-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu: 3621; AVX2: # %bb.0: 3622; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 3623; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19] 3624; AVX2-NEXT: retq 3625; 3626; AVX512VL-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu: 3627; AVX512VL: # %bb.0: 3628; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <0,3,2,21,u,u,u,u,8,11,10,29,u,u,u,u> 3629; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 3630; AVX512VL-NEXT: retq 3631 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 3, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 11, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef> 3632 ret <16 x i16> %shuffle 3633} 3634 3635define <16 x i16> @shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 3636; ALL-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu: 3637; ALL: # %bb.0: 3638; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7] 3639; ALL-NEXT: retq 3640 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 29, i32 undef, i32 undef, i32 undef, i32 undef> 3641 ret <16 x i16> %shuffle 3642} 3643 3644define <16 x i16> @shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 3645; AVX1-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu: 3646; AVX1: # %bb.0: 3647; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3648; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3649; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 3650; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7] 3651; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 3652; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 3653; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3654; AVX1-NEXT: retq 3655; 3656; AVX2-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu: 3657; AVX2: # %bb.0: 3658; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 3659; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15] 3660; AVX2-NEXT: retq 3661; 3662; AVX512VL-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu: 3663; AVX512VL: # %bb.0: 3664; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <0,1,2,21,u,u,u,u,8,9,10,29,u,u,u,u> 3665; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 3666; AVX512VL-NEXT: retq 3667 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef> 3668 ret <16 x i16> %shuffle 3669} 3670 3671define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11(<16 x i16> %a, <16 x i16> %b) { 3672; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11: 3673; AVX1: # %bb.0: 3674; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3675; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3676; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 3677; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7] 3678; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 3679; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3680; AVX1-NEXT: retq 3681; 3682; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11: 3683; AVX2: # %bb.0: 3684; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,2] 3685; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] 3686; AVX2-NEXT: retq 3687; 3688; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11: 3689; AVX512VL: # %bb.0: 3690; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,u,u,4,5,6,27,u,u,u,u,12,13,14,27> 3691; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 3692; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 3693; AVX512VL-NEXT: retq 3694 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11> 3695 ret <16 x i16> %shuffle 3696} 3697 3698define <16 x i16> @shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 3699; AVX1-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu: 3700; AVX1: # %bb.0: 3701; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3702; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3703; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] 3704; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7] 3705; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 3706; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] 3707; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3708; AVX1-NEXT: retq 3709; 3710; AVX2-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu: 3711; AVX2: # %bb.0: 3712; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7] 3713; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15] 3714; AVX2-NEXT: retq 3715; 3716; AVX512VL-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu: 3717; AVX512VL: # %bb.0: 3718; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <4,5,6,19,u,u,u,u,12,13,14,27,u,u,u,u> 3719; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 3720; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 3721; AVX512VL-NEXT: retq 3722 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 21, i32 22, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11, i32 undef, i32 undef, i32 undef, i32 undef> 3723 ret <16 x i16> %shuffle 3724} 3725 3726define <16 x i16> @shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11(<16 x i16> %a, <16 x i16> %b) { 3727; AVX1-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11: 3728; AVX1: # %bb.0: 3729; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3730; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3731; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[0,2,2,3] 3732; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3,4,5,6],xmm0[7] 3733; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 3734; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 3735; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 3736; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3737; AVX1-NEXT: retq 3738; 3739; AVX2-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11: 3740; AVX2: # %bb.0: 3741; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 3742; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 3743; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <255,255,255,255,255,255,0,0,255,255,255,255,255,255,u,u,255,255,255,255,255,255,255,255,255,255,255,255,255,255,u,u> 3744; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 3745; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7,16,17,18,19,20,21,26,27,24,25,26,27,28,29,22,23] 3746; AVX2-NEXT: retq 3747; 3748; AVX512VL-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11: 3749; AVX512VL: # %bb.0: 3750; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,21,20,21,22,11,8,9,10,29,28,29,30,11] 3751; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 3752; AVX512VL-NEXT: retq 3753 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 20, i32 21, i32 22, i32 11, i32 8, i32 9, i32 10, i32 29, i32 28, i32 29, i32 30, i32 11> 3754 ret <16 x i16> %shuffle 3755} 3756 3757define <16 x i16> @shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15(<16 x i16> %a, <16 x i16> %b) { 3758; AVX1-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15: 3759; AVX1: # %bb.0: 3760; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3761; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3762; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7] 3763; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] 3764; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7] 3765; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3766; AVX1-NEXT: retq 3767; 3768; AVX2-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15: 3769; AVX2: # %bb.0: 3770; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3] 3771; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15] 3772; AVX2-NEXT: retq 3773; 3774; AVX512VL-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15: 3775; AVX512VL: # %bb.0: 3776; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,17,2,3,20,21,22,15,8,25,10,11,28,29,30,15] 3777; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 3778; AVX512VL-NEXT: retq 3779 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 20, i32 21, i32 22, i32 15, i32 8, i32 25, i32 10, i32 11, i32 28, i32 29, i32 30, i32 15> 3780 ret <16 x i16> %shuffle 3781} 3782 3783define <16 x i16> @shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25(<16 x i16> %a, <16 x i16> %b) { 3784; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25: 3785; AVX1: # %bb.0: 3786; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 3787; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 3788; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3789; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,2,1,4,5,6,7] 3790; AVX1-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7] 3791; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm1[7] 3792; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 3793; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 3794; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 3795; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3796; AVX1-NEXT: retq 3797; 3798; AVX2-SLOW-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25: 3799; AVX2-SLOW: # %bb.0: 3800; AVX2-SLOW-NEXT: vextracti128 $1, %ymm1, %xmm1 3801; AVX2-SLOW-NEXT: vpbroadcastd %xmm1, %ymm1 3802; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,2,1,4,5,6,7,8,9,10,9,12,13,14,15] 3803; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15] 3804; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] 3805; AVX2-SLOW-NEXT: retq 3806; 3807; AVX2-FAST-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25: 3808; AVX2-FAST: # %bb.0: 3809; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15,16,17,18,19,20,21,18,19,24,25,26,27,30,31,30,31] 3810; AVX2-FAST-NEXT: vextracti128 $1, %ymm1, %xmm1 3811; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %ymm1 3812; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] 3813; AVX2-FAST-NEXT: retq 3814; 3815; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25: 3816; AVX512VL: # %bb.0: 3817; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,u,1,u,5,7,25,u,u,u,9,u,13,15,25> 3818; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 3819; AVX512VL-NEXT: retq 3820 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 25, i32 undef, i32 undef, i32 undef, i32 9, i32 undef, i32 13, i32 15, i32 25> 3821 ret <16 x i16> %shuffle 3822} 3823 3824define <16 x i16> @shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu(<16 x i16> %a, <16 x i16> %b) { 3825; AVX1-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu: 3826; AVX1: # %bb.0: 3827; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3828; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 3829; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 3830; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 3831; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3] 3832; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7] 3833; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1 3834; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 3835; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 3836; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3837; AVX1-NEXT: retq 3838; 3839; AVX2-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu: 3840; AVX2: # %bb.0: 3841; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5,16,17,20,21,20,21,22,23,16,17,20,21,24,25,20,21] 3842; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 3843; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 3844; AVX2-NEXT: retq 3845; 3846; AVX512VL-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu: 3847; AVX512VL: # %bb.0: 3848; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,20,u,0,2,4,u,u,u,28,u,8,10,12,u> 3849; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 3850; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 3851; AVX512VL-NEXT: retq 3852 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 12, i32 undef, i32 24, i32 26, i32 28, i32 undef> 3853 ret <16 x i16> %shuffle 3854} 3855 3856define <16 x i16> @shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) { 3857; AVX1-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12: 3858; AVX1: # %bb.0: 3859; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3860; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3861; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 3862; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] 3863; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 3864; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3865; AVX1-NEXT: retq 3866; 3867; AVX2-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12: 3868; AVX2: # %bb.0: 3869; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15] 3870; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 3871; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 3872; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 3873; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25] 3874; AVX2-NEXT: retq 3875; 3876; AVX512VL-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12: 3877; AVX512VL: # %bb.0: 3878; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12] 3879; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 3880; AVX512VL-NEXT: retq 3881 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 12, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12> 3882 ret <16 x i16> %shuffle 3883} 3884 3885define <16 x i16> @shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) { 3886; AVX1-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu: 3887; AVX1: # %bb.0: 3888; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3889; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3890; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 3891; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 3892; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3893; AVX1-NEXT: retq 3894; 3895; AVX2OR512VL-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu: 3896; AVX2OR512VL: # %bb.0: 3897; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25] 3898; AVX2OR512VL-NEXT: retq 3899 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 22, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 30, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 3900 ret <16 x i16> %shuffle 3901} 3902 3903define <16 x i16> @shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) { 3904; AVX1-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12: 3905; AVX1: # %bb.0: 3906; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3907; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] 3908; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3909; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3910; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3911; AVX1-NEXT: retq 3912; 3913; AVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12: 3914; AVX2: # %bb.0: 3915; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 3916; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 3917; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 3918; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25] 3919; AVX2-NEXT: retq 3920; 3921; AVX512VL-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12: 3922; AVX512VL: # %bb.0: 3923; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [5,6,7,0,1,2,3,12,13,14,15,8,9,10,11,12] 3924; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 3925; AVX512VL-NEXT: retq 3926 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12> 3927 ret <16 x i16> %shuffle 3928} 3929 3930define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) { 3931; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu: 3932; AVX1: # %bb.0: 3933; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3934; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3935; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3936; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3937; AVX1-NEXT: retq 3938; 3939; AVX2OR512VL-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu: 3940; AVX2OR512VL: # %bb.0: 3941; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25] 3942; AVX2OR512VL-NEXT: retq 3943 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 3944 ret <16 x i16> %shuffle 3945} 3946 3947define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) { 3948; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu: 3949; AVX1: # %bb.0: 3950; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 3951; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3952; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 3953; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3954; AVX1-NEXT: retq 3955; 3956; AVX2OR512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu: 3957; AVX2OR512VL: # %bb.0: 3958; AVX2OR512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25] 3959; AVX2OR512VL-NEXT: retq 3960 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 3961 ret <16 x i16> %shuffle 3962} 3963 3964define <16 x i16> @shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10(<16 x i16> %a, <16 x i16> %b) { 3965; AVX1-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10: 3966; AVX1: # %bb.0: 3967; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3968; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3969; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 3970; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 3971; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11] 3972; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 3973; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7] 3974; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3975; AVX1-NEXT: retq 3976; 3977; AVX2-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10: 3978; AVX2: # %bb.0: 3979; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15] 3980; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 3981; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 3982; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 3983; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21] 3984; AVX2-NEXT: retq 3985; 3986; AVX512VL-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10: 3987; AVX512VL: # %bb.0: 3988; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26] 3989; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 3990; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 3991; AVX512VL-NEXT: retq 3992 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10> 3993 ret <16 x i16> %shuffle 3994} 3995 3996define <16 x i16> @shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) { 3997; AVX1-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu: 3998; AVX1: # %bb.0: 3999; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4000; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 4001; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 4002; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 4003; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4004; AVX1-NEXT: retq 4005; 4006; AVX2OR512VL-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu: 4007; AVX2OR512VL: # %bb.0: 4008; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21] 4009; AVX2OR512VL-NEXT: retq 4010 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 20, i32 21, i32 22, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 undef, i32 undef, i32 9, i32 undef> 4011 ret <16 x i16> %shuffle 4012} 4013 4014define <16 x i16> @shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10(<16 x i16> %a, <16 x i16> %b) { 4015; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10: 4016; AVX1: # %bb.0: 4017; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 4018; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 4019; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 4020; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 4021; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4022; AVX1-NEXT: retq 4023; 4024; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10: 4025; AVX2: # %bb.0: 4026; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 4027; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 4028; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 4029; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21] 4030; AVX2-NEXT: retq 4031; 4032; AVX512VL-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10: 4033; AVX512VL: # %bb.0: 4034; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,4,5,6,7,0,1,10,11,12,13,14,15,8,9,10] 4035; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 4036; AVX512VL-NEXT: retq 4037 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10> 4038 ret <16 x i16> %shuffle 4039} 4040 4041define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) { 4042; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu: 4043; AVX1: # %bb.0: 4044; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 4045; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4046; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 4047; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4048; AVX1-NEXT: retq 4049; 4050; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu: 4051; AVX2OR512VL: # %bb.0: 4052; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21] 4053; AVX2OR512VL-NEXT: retq 4054 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 9, i32 undef> 4055 ret <16 x i16> %shuffle 4056} 4057 4058define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 4059; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu: 4060; AVX1: # %bb.0: 4061; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 4062; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4063; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 4064; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4065; AVX1-NEXT: retq 4066; 4067; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu: 4068; AVX2OR512VL: # %bb.0: 4069; AVX2OR512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero 4070; AVX2OR512VL-NEXT: retq 4071 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 undef, i32 undef> 4072 ret <16 x i16> %shuffle 4073} 4074 4075define <16 x i16> @shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26(<16 x i16> %a, <16 x i16> %b) { 4076; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26: 4077; AVX1: # %bb.0: 4078; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4079; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 4080; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 4081; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 4082; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11] 4083; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 4084; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5,6,7] 4085; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4086; AVX1-NEXT: retq 4087; 4088; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26: 4089; AVX2: # %bb.0: 4090; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15] 4091; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 4092; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 4093; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 4094; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21] 4095; AVX2-NEXT: retq 4096; 4097; AVX512VL-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26: 4098; AVX512VL: # %bb.0: 4099; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26] 4100; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 4101; AVX512VL-NEXT: retq 4102 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26> 4103 ret <16 x i16> %shuffle 4104} 4105 4106define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu(<16 x i16> %a, <16 x i16> %b) { 4107; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu: 4108; AVX1: # %bb.0: 4109; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4110; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 4111; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 4112; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 4113; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4114; AVX1-NEXT: retq 4115; 4116; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu: 4117; AVX2OR512VL: # %bb.0: 4118; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21] 4119; AVX2OR512VL-NEXT: retq 4120 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 17, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 25, i32 undef> 4121 ret <16 x i16> %shuffle 4122} 4123 4124define <16 x i16> @shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28(<16 x i16> %a, <16 x i16> %b) { 4125; AVX1-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28: 4126; AVX1: # %bb.0: 4127; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4128; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 4129; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 4130; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 4131; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 4132; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4133; AVX1-NEXT: retq 4134; 4135; AVX2-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28: 4136; AVX2: # %bb.0: 4137; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15] 4138; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 4139; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 4140; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 4141; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25] 4142; AVX2-NEXT: retq 4143; 4144; AVX512VL-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28: 4145; AVX512VL: # %bb.0: 4146; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12] 4147; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 4148; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 4149; AVX512VL-NEXT: retq 4150 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 28, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28> 4151 ret <16 x i16> %shuffle 4152} 4153 4154define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu(<16 x i16> %a, <16 x i16> %b) { 4155; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu: 4156; AVX1: # %bb.0: 4157; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4158; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 4159; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 4160; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 4161; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4162; AVX1-NEXT: retq 4163; 4164; AVX2OR512VL-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu: 4165; AVX2OR512VL: # %bb.0: 4166; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25] 4167; AVX2OR512VL-NEXT: retq 4168 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 25, i32 26, i32 27, i32 undef> 4169 ret <16 x i16> %shuffle 4170} 4171 4172define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu(<16 x i16> %a, <16 x i16> %b) { 4173; AVX1-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu: 4174; AVX1: # %bb.0: 4175; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4176; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 4177; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 4178; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,4,4] 4179; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm3[2],xmm2[2],xmm3[3],xmm2[3] 4180; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 4181; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] 4182; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 4183; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4184; AVX1-NEXT: retq 4185; 4186; AVX2-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu: 4187; AVX2: # %bb.0: 4188; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15] 4189; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31] 4190; AVX2-NEXT: retq 4191; 4192; AVX512VL-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu: 4193; AVX512VL: # %bb.0: 4194; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <7,u,19,u,4,4,21,u,15,u,27,u,12,12,29,u> 4195; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 4196; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 4197; AVX512VL-NEXT: retq 4198 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef> 4199 ret <16 x i16> %shuffle 4200} 4201 4202define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19(<16 x i16> %a, <16 x i16> %b) { 4203; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19: 4204; AVX1: # %bb.0: 4205; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4206; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 4207; AVX1-NEXT: retq 4208; 4209; AVX2OR512VL-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19: 4210; AVX2OR512VL: # %bb.0: 4211; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4212; AVX2OR512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 4213; AVX2OR512VL-NEXT: retq 4214 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19> 4215 ret <16 x i16> %shuffle 4216} 4217 4218define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a, <16 x i16> %b) { 4219; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3: 4220; AVX1: # %bb.0: 4221; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7] 4222; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 4223; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 4224; AVX1-NEXT: retq 4225; 4226; AVX2-SLOW-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3: 4227; AVX2-SLOW: # %bb.0: 4228; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7] 4229; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0 4230; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 4231; AVX2-SLOW-NEXT: retq 4232; 4233; AVX2-FAST-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3: 4234; AVX2-FAST: # %bb.0: 4235; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7] 4236; AVX2-FAST-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 4237; AVX2-FAST-NEXT: retq 4238; 4239; AVX512VL-SLOW-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3: 4240; AVX512VL-SLOW: # %bb.0: 4241; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7] 4242; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0 4243; AVX512VL-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 4244; AVX512VL-SLOW-NEXT: retq 4245; 4246; AVX512VL-FAST-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3: 4247; AVX512VL-FAST: # %bb.0: 4248; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7] 4249; AVX512VL-FAST-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 4250; AVX512VL-FAST-NEXT: retq 4251 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 4252 ret <16 x i16> %shuffle 4253} 4254 4255define <16 x i16> @shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8(<16 x i16> %a, <16 x i16> %b) { 4256; AVX1-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8: 4257; AVX1: # %bb.0: 4258; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4259; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 4260; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 4261; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 4262; AVX1-NEXT: retq 4263; 4264; AVX2OR512VL-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8: 4265; AVX2OR512VL: # %bb.0: 4266; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 4267; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %ymm0 4268; AVX2OR512VL-NEXT: retq 4269 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 4270 ret <16 x i16> %shuffle 4271} 4272 4273define <16 x i16> @shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) { 4274; ALL-LABEL: shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u: 4275; ALL: # %bb.0: 4276; ALL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 4277; ALL-NEXT: retq 4278 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 4279 ret <16 x i16> %shuffle 4280} 4281 4282define <16 x i16> @shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) { 4283; AVX1-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u: 4284; AVX1: # %bb.0: 4285; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7] 4286; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 4287; AVX1-NEXT: retq 4288; 4289; AVX2-SLOW-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u: 4290; AVX2-SLOW: # %bb.0: 4291; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7] 4292; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0 4293; AVX2-SLOW-NEXT: retq 4294; 4295; AVX2-FAST-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u: 4296; AVX2-FAST: # %bb.0: 4297; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7] 4298; AVX2-FAST-NEXT: retq 4299; 4300; AVX512VL-SLOW-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u: 4301; AVX512VL-SLOW: # %bb.0: 4302; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7] 4303; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0 4304; AVX512VL-SLOW-NEXT: retq 4305; 4306; AVX512VL-FAST-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u: 4307; AVX512VL-FAST: # %bb.0: 4308; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7] 4309; AVX512VL-FAST-NEXT: retq 4310 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 4311 ret <16 x i16> %shuffle 4312} 4313 4314define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) { 4315; AVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u: 4316; AVX1: # %bb.0: 4317; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4318; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] 4319; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 4320; AVX1-NEXT: retq 4321; 4322; AVX2-SLOW-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u: 4323; AVX2-SLOW: # %bb.0: 4324; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm0 4325; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] 4326; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0 4327; AVX2-SLOW-NEXT: retq 4328; 4329; AVX2-FAST-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u: 4330; AVX2-FAST: # %bb.0: 4331; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0 4332; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 4333; AVX2-FAST-NEXT: retq 4334; 4335; AVX512VL-SLOW-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u: 4336; AVX512VL-SLOW: # %bb.0: 4337; AVX512VL-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm0 4338; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] 4339; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0 4340; AVX512VL-SLOW-NEXT: retq 4341; 4342; AVX512VL-FAST-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u: 4343; AVX512VL-FAST: # %bb.0: 4344; AVX512VL-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0 4345; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 4346; AVX512VL-FAST-NEXT: retq 4347 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 4348 ret <16 x i16> %shuffle 4349} 4350 4351define <16 x i16> @shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25(<16 x i16> %a0, <16 x i16> %a1) { 4352; AVX1-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25: 4353; AVX1: # %bb.0: 4354; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4355; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 4356; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 4357; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4358; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4359; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4360; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 4361; AVX1-NEXT: retq 4362; 4363; AVX2-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25: 4364; AVX2: # %bb.0: 4365; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,0,1,12,13,2,3,16,17,20,21,20,21,22,23,16,17,16,17,28,29,18,19] 4366; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,2,3,6,7,6,7,0,1,2,3,2,3,14,15,20,21,18,19,22,23,22,23,16,17,18,19,18,19,30,31] 4367; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 4368; AVX2-NEXT: retq 4369; 4370; AVX512VL-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25: 4371; AVX512VL: # %bb.0: 4372; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [2,18,3,19,0,16,1,17,10,26,11,27,8,24,9,25] 4373; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 4374; AVX512VL-NEXT: retq 4375 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 0, i32 16, i32 1, i32 17, i32 10, i32 26, i32 11, i32 27, i32 8, i32 24, i32 9, i32 25> 4376 ret <16 x i16> %1 4377} 4378 4379define <16 x i16> @shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25(<16 x i16> %a0, <16 x i16> %a1) { 4380; AVX1-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25: 4381; AVX1: # %bb.0: 4382; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4383; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 4384; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] 4385; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,2,4,5,6,7] 4386; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 4387; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 4388; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[0,1,0,1] 4389; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,7,7] 4390; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7] 4391; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 4392; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7] 4393; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 4394; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7] 4395; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 4396; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4397; AVX1-NEXT: retq 4398; 4399; AVX2-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25: 4400; AVX2: # %bb.0: 4401; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,0,1,12,13,2,3,16,17,20,21,20,21,22,23,16,17,16,17,28,29,18,19] 4402; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,2,3,6,7,6,7,0,1,2,3,2,3,14,15,20,21,18,19,22,23,22,23,16,17,18,19,18,19,30,31] 4403; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 4404; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4405; AVX2-NEXT: retq 4406; 4407; AVX512VL-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25: 4408; AVX512VL: # %bb.0: 4409; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [2,18,3,19,10,26,11,27,0,16,1,17,8,24,9,25] 4410; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 4411; AVX512VL-NEXT: retq 4412 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 0, i32 16, i32 1, i32 17, i32 10, i32 26, i32 11, i32 27, i32 8, i32 24, i32 9, i32 25> 4413 %2 = bitcast <16 x i16> %1 to <4 x i64> 4414 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 4415 %4 = bitcast <4 x i64> %3 to <16 x i16> 4416 ret <16 x i16> %4 4417} 4418 4419define <16 x i16> @shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13(<16 x i16> %a) { 4420; AVX1-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13: 4421; AVX1: # %bb.0: 4422; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,6,7,5] 4423; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 4424; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4425; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5] 4426; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 4427; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4428; AVX1-NEXT: retq 4429; 4430; AVX2-SLOW-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13: 4431; AVX2-SLOW: # %bb.0: 4432; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,5,8,9,10,11,12,14,15,13] 4433; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7] 4434; AVX2-SLOW-NEXT: retq 4435; 4436; AVX2-FAST-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13: 4437; AVX2-FAST: # %bb.0: 4438; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11,24,25,28,29,30,31,26,27,24,25,28,29,30,31,26,27] 4439; AVX2-FAST-NEXT: retq 4440; 4441; AVX512VL-SLOW-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13: 4442; AVX512VL-SLOW: # %bb.0: 4443; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,5,8,9,10,11,12,14,15,13] 4444; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7] 4445; AVX512VL-SLOW-NEXT: retq 4446; 4447; AVX512VL-FAST-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13: 4448; AVX512VL-FAST: # %bb.0: 4449; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11,24,25,28,29,30,31,26,27,24,25,28,29,30,31,26,27] 4450; AVX512VL-FAST-NEXT: retq 4451 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 4, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7, i32 5, i32 12, i32 14, i32 15, i32 undef, i32 undef, i32 14, i32 15, i32 13> 4452 ret <16 x i16> %shuffle 4453} 4454 4455define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) { 4456; ALL-LABEL: insert_v16i16_0elt_into_zero_vector: 4457; ALL: # %bb.0: 4458; ALL-NEXT: movzwl (%rdi), %eax 4459; ALL-NEXT: vmovd %eax, %xmm0 4460; ALL-NEXT: retq 4461 %val = load i16, i16* %ptr 4462 %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0 4463 ret <16 x i16> %i0 4464} 4465 4466define <16 x i16> @concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31(<16 x i16> %a, <16 x i16> %b) { 4467; ALL-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31: 4468; ALL: # %bb.0: 4469; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 4470; ALL-NEXT: retq 4471 %alo = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 4472 %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4473 %shuf = shufflevector <8 x i16> %alo, <8 x i16> %bhi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4474 ret <16 x i16> %shuf 4475} 4476 4477define <16 x i16> @concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc(<16 x i16> %a, <16 x i16> %b) { 4478; AVX1-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc: 4479; AVX1: # %bb.0: 4480; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 4481; AVX1-NEXT: retq 4482; 4483; AVX2-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc: 4484; AVX2: # %bb.0: 4485; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 4486; AVX2-NEXT: retq 4487; 4488; AVX512VL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc: 4489; AVX512VL: # %bb.0: 4490; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 4491; AVX512VL-NEXT: retq 4492 %ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4493 %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4494 %bc0hi = bitcast <8 x i16> %ahi to <16 x i8> 4495 %bc1hi = bitcast <8 x i16> %bhi to <16 x i8> 4496 %shuffle8 = shufflevector <16 x i8> %bc0hi, <16 x i8> %bc1hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 4497 %shuffle16 = bitcast <32 x i8> %shuffle8 to <16 x i16> 4498 ret <16 x i16> %shuffle16 4499} 4500 4501define <16 x i16> @PR24935(<16 x i16> %a, <16 x i16> %b) { 4502; AVX1-LABEL: PR24935: 4503; AVX1: # %bb.0: 4504; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1] 4505; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 4506; AVX1-NEXT: vpalignr {{.*#+}} xmm4 = xmm3[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 4507; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2],xmm4[3,4,5,6,7] 4508; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 4509; AVX1-NEXT: vpshufhw {{.*#+}} xmm5 = xmm4[0,1,2,3,5,5,6,7] 4510; AVX1-NEXT: vpshufb {{.*#+}} xmm6 = xmm0[2,3,2,3,4,5,6,7,8,9,8,9,0,1,2,3] 4511; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0],xmm5[1],xmm6[2,3],xmm5[4],xmm6[5,6,7] 4512; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm5[0,1],xmm2[2,3],xmm5[4,5,6],xmm2[7] 4513; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 4514; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 4515; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5],xmm1[6,7] 4516; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,7,4,5,4,5,10,11,4,5,14,15,12,13,0,1] 4517; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3,4,5],xmm0[6],xmm1[7] 4518; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4519; AVX1-NEXT: retq 4520; 4521; AVX2-SLOW-LABEL: PR24935: 4522; AVX2-SLOW: # %bb.0: 4523; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[8,9,10,11,4,5,8,9,0,1,14,15,12,13,0,1,24,25,26,27,20,21,24,25,16,17,30,31,28,29,16,17] 4524; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] 4525; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5,0,1,10,11,4,5,10,11,4,5,6,7,22,23,20,21,16,17,26,27,20,21,26,27,20,21,22,23] 4526; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,255,255,255,255,0,0,u,u,0,0,u,u,u,u,255,255,0,0,u,u,u,u,u,u,0,0> 4527; AVX2-SLOW-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 4528; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm2 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15] 4529; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm2 = ymm2[0,1,2,3,5,5,6,7,8,9,10,11,13,13,14,15] 4530; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 4531; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u] 4532; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2],ymm0[3],ymm2[4],ymm0[5,6,7,8],ymm2[9,10],ymm0[11],ymm2[12],ymm0[13,14,15] 4533; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255] 4534; AVX2-SLOW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 4535; AVX2-SLOW-NEXT: retq 4536; 4537; AVX2-FAST-LABEL: PR24935: 4538; AVX2-FAST: # %bb.0: 4539; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[8,9,10,11,4,5,8,9,0,1,14,15,12,13,0,1,24,25,26,27,20,21,24,25,16,17,30,31,28,29,16,17] 4540; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] 4541; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5,0,1,10,11,4,5,10,11,4,5,6,7,22,23,20,21,16,17,26,27,20,21,26,27,20,21,22,23] 4542; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,255,255,255,255,0,0,u,u,0,0,u,u,u,u,255,255,0,0,u,u,u,u,u,u,0,0> 4543; AVX2-FAST-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 4544; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[0,1,2,3,2,3,6,7,10,11,10,11,12,13,14,15,16,17,18,19,18,19,22,23,26,27,26,27,28,29,30,31] 4545; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 4546; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u] 4547; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2],ymm0[3],ymm2[4],ymm0[5,6,7,8],ymm2[9,10],ymm0[11],ymm2[12],ymm0[13,14,15] 4548; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255] 4549; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 4550; AVX2-FAST-NEXT: retq 4551; 4552; AVX512VL-LABEL: PR24935: 4553; AVX512VL: # %bb.0: 4554; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [11,10,17,13,10,7,27,0,17,25,0,12,29,20,16,8] 4555; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 4556; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 4557; AVX512VL-NEXT: retq 4558 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 27, i32 26, i32 1, i32 29, i32 26, i32 23, i32 11, i32 16, i32 1, i32 9, i32 16, i32 28, i32 13, i32 4, i32 0, i32 24> 4559 ret <16 x i16> %shuffle 4560} 4561 4562define <16 x i16> @PR34369(<16 x i16> %vec, <16 x i16> %mask) { 4563; AVX1-LABEL: PR34369: 4564; AVX1: # %bb.0: 4565; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4566; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm2[8,9,10,11,4,5,10,11,8,9,10,11,4,5,4,5] 4567; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,0,1,0,1,6,7,10,11,4,5,4,5,6,7] 4568; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3],xmm0[4,5,6],xmm3[7] 4569; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[14,15,0,1,12,13,0,1,2,3,4,5,8,9,8,9] 4570; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4571; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4572; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 4573; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 4574; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 4575; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 4576; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 4577; AVX1-NEXT: retq 4578; 4579; AVX2-LABEL: PR34369: 4580; AVX2: # %bb.0: 4581; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,0,1] 4582; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[8,9,10,11,4,5,10,11,8,9,10,11,4,5,4,5] 4583; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u,30,31,16,17,28,29,16,17,18,19,20,21,24,25,24,25] 4584; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 4585; AVX2-NEXT: vpblendvb %ymm3, %ymm0, %ymm2, %ymm0 4586; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 4587; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1 4588; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 4589; AVX2-NEXT: retq 4590; 4591; AVX512VL-LABEL: PR34369: 4592; AVX512VL: # %bb.0: 4593; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,0,13,5,2,2,10,15,8,14,8,9,10,12,12] 4594; AVX512VL-NEXT: vptestnmw %ymm1, %ymm1, %k1 4595; AVX512VL-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} 4596; AVX512VL-NEXT: retq 4597 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 9, i32 10, i32 12, i32 12> 4598 %cmp = icmp eq <16 x i16> %mask, zeroinitializer 4599 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer 4600 ret <16 x i16> %res 4601} 4602 4603define <16 x i16> @insert_dup_mem_v16i16_i32(i32* %ptr) { 4604; AVX1-LABEL: insert_dup_mem_v16i16_i32: 4605; AVX1: # %bb.0: 4606; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 4607; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 4608; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 4609; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 4610; AVX1-NEXT: retq 4611; 4612; AVX2OR512VL-LABEL: insert_dup_mem_v16i16_i32: 4613; AVX2OR512VL: # %bb.0: 4614; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %ymm0 4615; AVX2OR512VL-NEXT: retq 4616 %tmp = load i32, i32* %ptr, align 4 4617 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 4618 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 4619 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> zeroinitializer 4620 ret <16 x i16> %tmp3 4621} 4622 4623define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) { 4624; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16: 4625; AVX1: # %bb.0: 4626; AVX1-NEXT: movswl (%rdi), %eax 4627; AVX1-NEXT: vmovd %eax, %xmm0 4628; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 4629; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 4630; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 4631; AVX1-NEXT: retq 4632; 4633; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16: 4634; AVX2: # %bb.0: 4635; AVX2-NEXT: movswl (%rdi), %eax 4636; AVX2-NEXT: vmovd %eax, %xmm0 4637; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 4638; AVX2-NEXT: retq 4639; 4640; AVX512VL-LABEL: insert_dup_mem_v16i16_sext_i16: 4641; AVX512VL: # %bb.0: 4642; AVX512VL-NEXT: movswl (%rdi), %eax 4643; AVX512VL-NEXT: vpbroadcastw %eax, %ymm0 4644; AVX512VL-NEXT: retq 4645 %tmp = load i16, i16* %ptr, align 2 4646 %tmp1 = sext i16 %tmp to i32 4647 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 4648 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 4649 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <16 x i32> zeroinitializer 4650 ret <16 x i16> %tmp4 4651} 4652 4653define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(i32* %ptr) #0 { 4654; AVX1-LABEL: insert_dup_elt1_mem_v16i16_i32: 4655; AVX1: # %bb.0: 4656; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 4657; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] 4658; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 4659; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 4660; AVX1-NEXT: retq 4661; 4662; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v16i16_i32: 4663; AVX2OR512VL: # %bb.0: 4664; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %ymm0 4665; AVX2OR512VL-NEXT: retq 4666 %tmp = load i32, i32* %ptr, align 4 4667 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 4668 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 4669 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 4670 ret <16 x i16> %tmp3 4671} 4672 4673define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 { 4674; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i32: 4675; AVX1: # %bb.0: 4676; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 4677; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 4678; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 4679; AVX1-NEXT: retq 4680; 4681; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v16i16_i32: 4682; AVX2OR512VL: # %bb.0: 4683; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %ymm0 4684; AVX2OR512VL-NEXT: retq 4685 %tmp = load i32, i32* %ptr, align 4 4686 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1 4687 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 4688 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 4689 ret <16 x i16> %tmp3 4690} 4691