1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST 10 11define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) { 12; SSE-LABEL: shuffle_v8i16_01012323: 13; SSE: # %bb.0: 14; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 15; SSE-NEXT: retq 16; 17; AVX-LABEL: shuffle_v8i16_01012323: 18; AVX: # %bb.0: 19; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1] 20; AVX-NEXT: retq 21 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3> 22 ret <8 x i16> %shuffle 23} 24define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) { 25; SSE-LABEL: shuffle_v8i16_67452301: 26; SSE: # %bb.0: 27; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 28; SSE-NEXT: retq 29; 30; AVX-LABEL: shuffle_v8i16_67452301: 31; AVX: # %bb.0: 32; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 33; AVX-NEXT: retq 34 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1> 35 ret <8 x i16> %shuffle 36} 37define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) { 38; SSE2-LABEL: shuffle_v8i16_456789AB: 39; SSE2: # %bb.0: 40; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 41; SSE2-NEXT: retq 42; 43; SSSE3-LABEL: shuffle_v8i16_456789AB: 44; SSSE3: # %bb.0: 45; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 46; SSSE3-NEXT: movdqa %xmm1, %xmm0 47; SSSE3-NEXT: retq 48; 49; SSE41-LABEL: shuffle_v8i16_456789AB: 50; SSE41: # %bb.0: 51; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 52; SSE41-NEXT: movdqa %xmm1, %xmm0 53; SSE41-NEXT: retq 54; 55; AVX-LABEL: shuffle_v8i16_456789AB: 56; AVX: # %bb.0: 57; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 58; AVX-NEXT: retq 59 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 60 ret <8 x i16> %shuffle 61} 62 63define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) { 64; SSE-LABEL: shuffle_v8i16_00000000: 65; SSE: # %bb.0: 66; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 67; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 68; SSE-NEXT: retq 69; 70; AVX1-LABEL: shuffle_v8i16_00000000: 71; AVX1: # %bb.0: 72; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 73; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 74; AVX1-NEXT: retq 75; 76; AVX2OR512VL-LABEL: shuffle_v8i16_00000000: 77; AVX2OR512VL: # %bb.0: 78; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %xmm0 79; AVX2OR512VL-NEXT: retq 80 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 81 ret <8 x i16> %shuffle 82} 83define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) { 84; SSE-LABEL: shuffle_v8i16_00004444: 85; SSE: # %bb.0: 86; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 87; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 88; SSE-NEXT: retq 89; 90; AVX1-LABEL: shuffle_v8i16_00004444: 91; AVX1: # %bb.0: 92; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 93; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 94; AVX1-NEXT: retq 95; 96; AVX2-SLOW-LABEL: shuffle_v8i16_00004444: 97; AVX2-SLOW: # %bb.0: 98; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 99; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 100; AVX2-SLOW-NEXT: retq 101; 102; AVX2-FAST-LABEL: shuffle_v8i16_00004444: 103; AVX2-FAST: # %bb.0: 104; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9] 105; AVX2-FAST-NEXT: retq 106; 107; AVX512VL-SLOW-LABEL: shuffle_v8i16_00004444: 108; AVX512VL-SLOW: # %bb.0: 109; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 110; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 111; AVX512VL-SLOW-NEXT: retq 112; 113; AVX512VL-FAST-LABEL: shuffle_v8i16_00004444: 114; AVX512VL-FAST: # %bb.0: 115; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9] 116; AVX512VL-FAST-NEXT: retq 117 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 118 ret <8 x i16> %shuffle 119} 120define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) { 121; SSE-LABEL: shuffle_v8i16_u0u1u2u3: 122; SSE: # %bb.0: 123; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 124; SSE-NEXT: retq 125; 126; AVX-LABEL: shuffle_v8i16_u0u1u2u3: 127; AVX: # %bb.0: 128; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 129; AVX-NEXT: retq 130 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3> 131 ret <8 x i16> %shuffle 132} 133define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) { 134; SSE-LABEL: shuffle_v8i16_u4u5u6u7: 135; SSE: # %bb.0: 136; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 137; SSE-NEXT: retq 138; 139; AVX-LABEL: shuffle_v8i16_u4u5u6u7: 140; AVX: # %bb.0: 141; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 142; AVX-NEXT: retq 143 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7> 144 ret <8 x i16> %shuffle 145} 146define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) { 147; SSE-LABEL: shuffle_v8i16_31206745: 148; SSE: # %bb.0: 149; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 150; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 151; SSE-NEXT: retq 152; 153; AVX1-LABEL: shuffle_v8i16_31206745: 154; AVX1: # %bb.0: 155; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 156; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 157; AVX1-NEXT: retq 158; 159; AVX2-SLOW-LABEL: shuffle_v8i16_31206745: 160; AVX2-SLOW: # %bb.0: 161; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 162; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 163; AVX2-SLOW-NEXT: retq 164; 165; AVX2-FAST-LABEL: shuffle_v8i16_31206745: 166; AVX2-FAST: # %bb.0: 167; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11] 168; AVX2-FAST-NEXT: retq 169; 170; AVX512VL-SLOW-LABEL: shuffle_v8i16_31206745: 171; AVX512VL-SLOW: # %bb.0: 172; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 173; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 174; AVX512VL-SLOW-NEXT: retq 175; 176; AVX512VL-FAST-LABEL: shuffle_v8i16_31206745: 177; AVX512VL-FAST: # %bb.0: 178; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11] 179; AVX512VL-FAST-NEXT: retq 180 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5> 181 ret <8 x i16> %shuffle 182} 183define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) { 184; SSE2-LABEL: shuffle_v8i16_44440000: 185; SSE2: # %bb.0: 186; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3] 187; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 188; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 189; SSE2-NEXT: retq 190; 191; SSSE3-LABEL: shuffle_v8i16_44440000: 192; SSSE3: # %bb.0: 193; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 194; SSSE3-NEXT: retq 195; 196; SSE41-LABEL: shuffle_v8i16_44440000: 197; SSE41: # %bb.0: 198; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 199; SSE41-NEXT: retq 200; 201; AVX-LABEL: shuffle_v8i16_44440000: 202; AVX: # %bb.0: 203; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 204; AVX-NEXT: retq 205 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0> 206 ret <8 x i16> %shuffle 207} 208define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) { 209; SSE-LABEL: shuffle_v8i16_23016745: 210; SSE: # %bb.0: 211; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 212; SSE-NEXT: retq 213; 214; AVX-LABEL: shuffle_v8i16_23016745: 215; AVX: # %bb.0: 216; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,2] 217; AVX-NEXT: retq 218 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> 219 ret <8 x i16> %shuffle 220} 221define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) { 222; SSE-LABEL: shuffle_v8i16_23026745: 223; SSE: # %bb.0: 224; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 225; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 226; SSE-NEXT: retq 227; 228; AVX1-LABEL: shuffle_v8i16_23026745: 229; AVX1: # %bb.0: 230; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 231; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 232; AVX1-NEXT: retq 233; 234; AVX2-SLOW-LABEL: shuffle_v8i16_23026745: 235; AVX2-SLOW: # %bb.0: 236; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 237; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 238; AVX2-SLOW-NEXT: retq 239; 240; AVX2-FAST-LABEL: shuffle_v8i16_23026745: 241; AVX2-FAST: # %bb.0: 242; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11] 243; AVX2-FAST-NEXT: retq 244; 245; AVX512VL-SLOW-LABEL: shuffle_v8i16_23026745: 246; AVX512VL-SLOW: # %bb.0: 247; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 248; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 249; AVX512VL-SLOW-NEXT: retq 250; 251; AVX512VL-FAST-LABEL: shuffle_v8i16_23026745: 252; AVX512VL-FAST: # %bb.0: 253; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11] 254; AVX512VL-FAST-NEXT: retq 255 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5> 256 ret <8 x i16> %shuffle 257} 258define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) { 259; SSE-LABEL: shuffle_v8i16_23016747: 260; SSE: # %bb.0: 261; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 262; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 263; SSE-NEXT: retq 264; 265; AVX1-LABEL: shuffle_v8i16_23016747: 266; AVX1: # %bb.0: 267; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 268; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 269; AVX1-NEXT: retq 270; 271; AVX2-SLOW-LABEL: shuffle_v8i16_23016747: 272; AVX2-SLOW: # %bb.0: 273; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 274; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 275; AVX2-SLOW-NEXT: retq 276; 277; AVX2-FAST-LABEL: shuffle_v8i16_23016747: 278; AVX2-FAST: # %bb.0: 279; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15] 280; AVX2-FAST-NEXT: retq 281; 282; AVX512VL-SLOW-LABEL: shuffle_v8i16_23016747: 283; AVX512VL-SLOW: # %bb.0: 284; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 285; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 286; AVX512VL-SLOW-NEXT: retq 287; 288; AVX512VL-FAST-LABEL: shuffle_v8i16_23016747: 289; AVX512VL-FAST: # %bb.0: 290; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15] 291; AVX512VL-FAST-NEXT: retq 292 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7> 293 ret <8 x i16> %shuffle 294} 295define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) { 296; SSE2-LABEL: shuffle_v8i16_75643120: 297; SSE2: # %bb.0: 298; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 299; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 300; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 301; SSE2-NEXT: retq 302; 303; SSSE3-LABEL: shuffle_v8i16_75643120: 304; SSSE3: # %bb.0: 305; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 306; SSSE3-NEXT: retq 307; 308; SSE41-LABEL: shuffle_v8i16_75643120: 309; SSE41: # %bb.0: 310; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 311; SSE41-NEXT: retq 312; 313; AVX-LABEL: shuffle_v8i16_75643120: 314; AVX: # %bb.0: 315; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 316; AVX-NEXT: retq 317 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0> 318 ret <8 x i16> %shuffle 319} 320 321define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) { 322; SSE2-LABEL: shuffle_v8i16_10545410: 323; SSE2: # %bb.0: 324; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 325; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] 326; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] 327; SSE2-NEXT: retq 328; 329; SSSE3-LABEL: shuffle_v8i16_10545410: 330; SSSE3: # %bb.0: 331; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 332; SSSE3-NEXT: retq 333; 334; SSE41-LABEL: shuffle_v8i16_10545410: 335; SSE41: # %bb.0: 336; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 337; SSE41-NEXT: retq 338; 339; AVX-LABEL: shuffle_v8i16_10545410: 340; AVX: # %bb.0: 341; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 342; AVX-NEXT: retq 343 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0> 344 ret <8 x i16> %shuffle 345} 346define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) { 347; SSE2-LABEL: shuffle_v8i16_54105410: 348; SSE2: # %bb.0: 349; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 350; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 351; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] 352; SSE2-NEXT: retq 353; 354; SSSE3-LABEL: shuffle_v8i16_54105410: 355; SSSE3: # %bb.0: 356; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 357; SSSE3-NEXT: retq 358; 359; SSE41-LABEL: shuffle_v8i16_54105410: 360; SSE41: # %bb.0: 361; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 362; SSE41-NEXT: retq 363; 364; AVX-LABEL: shuffle_v8i16_54105410: 365; AVX: # %bb.0: 366; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 367; AVX-NEXT: retq 368 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0> 369 ret <8 x i16> %shuffle 370} 371define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) { 372; SSE2-LABEL: shuffle_v8i16_54101054: 373; SSE2: # %bb.0: 374; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 375; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 376; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 377; SSE2-NEXT: retq 378; 379; SSSE3-LABEL: shuffle_v8i16_54101054: 380; SSSE3: # %bb.0: 381; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 382; SSSE3-NEXT: retq 383; 384; SSE41-LABEL: shuffle_v8i16_54101054: 385; SSE41: # %bb.0: 386; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 387; SSE41-NEXT: retq 388; 389; AVX-LABEL: shuffle_v8i16_54101054: 390; AVX: # %bb.0: 391; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 392; AVX-NEXT: retq 393 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4> 394 ret <8 x i16> %shuffle 395} 396define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) { 397; SSE2-LABEL: shuffle_v8i16_04400440: 398; SSE2: # %bb.0: 399; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 400; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 401; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6] 402; SSE2-NEXT: retq 403; 404; SSSE3-LABEL: shuffle_v8i16_04400440: 405; SSSE3: # %bb.0: 406; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 407; SSSE3-NEXT: retq 408; 409; SSE41-LABEL: shuffle_v8i16_04400440: 410; SSE41: # %bb.0: 411; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 412; SSE41-NEXT: retq 413; 414; AVX-LABEL: shuffle_v8i16_04400440: 415; AVX: # %bb.0: 416; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 417; AVX-NEXT: retq 418 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0> 419 ret <8 x i16> %shuffle 420} 421define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) { 422; SSE2-LABEL: shuffle_v8i16_40044004: 423; SSE2: # %bb.0: 424; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 425; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7] 426; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4] 427; SSE2-NEXT: retq 428; 429; SSSE3-LABEL: shuffle_v8i16_40044004: 430; SSSE3: # %bb.0: 431; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 432; SSSE3-NEXT: retq 433; 434; SSE41-LABEL: shuffle_v8i16_40044004: 435; SSE41: # %bb.0: 436; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 437; SSE41-NEXT: retq 438; 439; AVX-LABEL: shuffle_v8i16_40044004: 440; AVX: # %bb.0: 441; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 442; AVX-NEXT: retq 443 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4> 444 ret <8 x i16> %shuffle 445} 446 447define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) { 448; SSE2-LABEL: shuffle_v8i16_26405173: 449; SSE2: # %bb.0: 450; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 451; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 452; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 453; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] 454; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7] 455; SSE2-NEXT: retq 456; 457; SSSE3-LABEL: shuffle_v8i16_26405173: 458; SSSE3: # %bb.0: 459; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 460; SSSE3-NEXT: retq 461; 462; SSE41-LABEL: shuffle_v8i16_26405173: 463; SSE41: # %bb.0: 464; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 465; SSE41-NEXT: retq 466; 467; AVX-LABEL: shuffle_v8i16_26405173: 468; AVX: # %bb.0: 469; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 470; AVX-NEXT: retq 471 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3> 472 ret <8 x i16> %shuffle 473} 474define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) { 475; SSE2-LABEL: shuffle_v8i16_20645173: 476; SSE2: # %bb.0: 477; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 478; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 479; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 480; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7] 481; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7] 482; SSE2-NEXT: retq 483; 484; SSSE3-LABEL: shuffle_v8i16_20645173: 485; SSSE3: # %bb.0: 486; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 487; SSSE3-NEXT: retq 488; 489; SSE41-LABEL: shuffle_v8i16_20645173: 490; SSE41: # %bb.0: 491; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 492; SSE41-NEXT: retq 493; 494; AVX-LABEL: shuffle_v8i16_20645173: 495; AVX: # %bb.0: 496; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 497; AVX-NEXT: retq 498 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3> 499 ret <8 x i16> %shuffle 500} 501define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) { 502; SSE2-LABEL: shuffle_v8i16_26401375: 503; SSE2: # %bb.0: 504; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 505; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 506; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2] 507; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] 508; SSE2-NEXT: retq 509; 510; SSSE3-LABEL: shuffle_v8i16_26401375: 511; SSSE3: # %bb.0: 512; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 513; SSSE3-NEXT: retq 514; 515; SSE41-LABEL: shuffle_v8i16_26401375: 516; SSE41: # %bb.0: 517; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 518; SSE41-NEXT: retq 519; 520; AVX-LABEL: shuffle_v8i16_26401375: 521; AVX: # %bb.0: 522; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 523; AVX-NEXT: retq 524 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5> 525 ret <8 x i16> %shuffle 526} 527 528define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) { 529; SSE2-LABEL: shuffle_v8i16_66751643: 530; SSE2: # %bb.0: 531; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7] 532; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] 533; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0] 534; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7] 535; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6] 536; SSE2-NEXT: retq 537; 538; SSSE3-LABEL: shuffle_v8i16_66751643: 539; SSSE3: # %bb.0: 540; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 541; SSSE3-NEXT: retq 542; 543; SSE41-LABEL: shuffle_v8i16_66751643: 544; SSE41: # %bb.0: 545; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 546; SSE41-NEXT: retq 547; 548; AVX-LABEL: shuffle_v8i16_66751643: 549; AVX: # %bb.0: 550; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 551; AVX-NEXT: retq 552 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3> 553 ret <8 x i16> %shuffle 554} 555 556define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) { 557; SSE2-LABEL: shuffle_v8i16_60514754: 558; SSE2: # %bb.0: 559; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] 560; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 561; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 562; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6] 563; SSE2-NEXT: retq 564; 565; SSSE3-LABEL: shuffle_v8i16_60514754: 566; SSSE3: # %bb.0: 567; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] 568; SSSE3-NEXT: retq 569; 570; SSE41-LABEL: shuffle_v8i16_60514754: 571; SSE41: # %bb.0: 572; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] 573; SSE41-NEXT: retq 574; 575; AVX-LABEL: shuffle_v8i16_60514754: 576; AVX: # %bb.0: 577; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] 578; AVX-NEXT: retq 579 %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4> 580 ret <8 x i16> %shuffle 581} 582 583define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) { 584; SSE2-LABEL: shuffle_v8i16_00444444: 585; SSE2: # %bb.0: 586; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 587; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 588; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 589; SSE2-NEXT: retq 590; 591; SSSE3-LABEL: shuffle_v8i16_00444444: 592; SSSE3: # %bb.0: 593; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 594; SSSE3-NEXT: retq 595; 596; SSE41-LABEL: shuffle_v8i16_00444444: 597; SSE41: # %bb.0: 598; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 599; SSE41-NEXT: retq 600; 601; AVX-LABEL: shuffle_v8i16_00444444: 602; AVX: # %bb.0: 603; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 604; AVX-NEXT: retq 605 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 606 ret <8 x i16> %shuffle 607} 608define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) { 609; SSE2-LABEL: shuffle_v8i16_44004444: 610; SSE2: # %bb.0: 611; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 612; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7] 613; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 614; SSE2-NEXT: retq 615; 616; SSSE3-LABEL: shuffle_v8i16_44004444: 617; SSSE3: # %bb.0: 618; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 619; SSSE3-NEXT: retq 620; 621; SSE41-LABEL: shuffle_v8i16_44004444: 622; SSE41: # %bb.0: 623; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 624; SSE41-NEXT: retq 625; 626; AVX-LABEL: shuffle_v8i16_44004444: 627; AVX: # %bb.0: 628; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 629; AVX-NEXT: retq 630 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 631 ret <8 x i16> %shuffle 632} 633define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) { 634; SSE2-LABEL: shuffle_v8i16_04404444: 635; SSE2: # %bb.0: 636; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 637; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 638; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 639; SSE2-NEXT: retq 640; 641; SSSE3-LABEL: shuffle_v8i16_04404444: 642; SSSE3: # %bb.0: 643; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 644; SSSE3-NEXT: retq 645; 646; SSE41-LABEL: shuffle_v8i16_04404444: 647; SSE41: # %bb.0: 648; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 649; SSE41-NEXT: retq 650; 651; AVX-LABEL: shuffle_v8i16_04404444: 652; AVX: # %bb.0: 653; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 654; AVX-NEXT: retq 655 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4> 656 ret <8 x i16> %shuffle 657} 658define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) { 659; SSE2-LABEL: shuffle_v8i16_04400000: 660; SSE2: # %bb.0: 661; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3] 662; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 663; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 664; SSE2-NEXT: retq 665; 666; SSSE3-LABEL: shuffle_v8i16_04400000: 667; SSSE3: # %bb.0: 668; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 669; SSSE3-NEXT: retq 670; 671; SSE41-LABEL: shuffle_v8i16_04400000: 672; SSE41: # %bb.0: 673; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 674; SSE41-NEXT: retq 675; 676; AVX-LABEL: shuffle_v8i16_04400000: 677; AVX: # %bb.0: 678; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 679; AVX-NEXT: retq 680 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0> 681 ret <8 x i16> %shuffle 682} 683define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) { 684; SSE-LABEL: shuffle_v8i16_04404567: 685; SSE: # %bb.0: 686; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 687; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 688; SSE-NEXT: retq 689; 690; AVX1-LABEL: shuffle_v8i16_04404567: 691; AVX1: # %bb.0: 692; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 693; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 694; AVX1-NEXT: retq 695; 696; AVX2-SLOW-LABEL: shuffle_v8i16_04404567: 697; AVX2-SLOW: # %bb.0: 698; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 699; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 700; AVX2-SLOW-NEXT: retq 701; 702; AVX2-FAST-LABEL: shuffle_v8i16_04404567: 703; AVX2-FAST: # %bb.0: 704; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15] 705; AVX2-FAST-NEXT: retq 706; 707; AVX512VL-SLOW-LABEL: shuffle_v8i16_04404567: 708; AVX512VL-SLOW: # %bb.0: 709; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 710; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 711; AVX512VL-SLOW-NEXT: retq 712; 713; AVX512VL-FAST-LABEL: shuffle_v8i16_04404567: 714; AVX512VL-FAST: # %bb.0: 715; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15] 716; AVX512VL-FAST-NEXT: retq 717 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7> 718 ret <8 x i16> %shuffle 719} 720 721define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) { 722; SSE2-LABEL: shuffle_v8i16_0X444444: 723; SSE2: # %bb.0: 724; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 725; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7] 726; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 727; SSE2-NEXT: retq 728; 729; SSSE3-LABEL: shuffle_v8i16_0X444444: 730; SSSE3: # %bb.0: 731; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 732; SSSE3-NEXT: retq 733; 734; SSE41-LABEL: shuffle_v8i16_0X444444: 735; SSE41: # %bb.0: 736; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 737; SSE41-NEXT: retq 738; 739; AVX-LABEL: shuffle_v8i16_0X444444: 740; AVX: # %bb.0: 741; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 742; AVX-NEXT: retq 743 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 744 ret <8 x i16> %shuffle 745} 746define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) { 747; SSE2-LABEL: shuffle_v8i16_44X04444: 748; SSE2: # %bb.0: 749; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 750; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7] 751; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 752; SSE2-NEXT: retq 753; 754; SSSE3-LABEL: shuffle_v8i16_44X04444: 755; SSSE3: # %bb.0: 756; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 757; SSSE3-NEXT: retq 758; 759; SSE41-LABEL: shuffle_v8i16_44X04444: 760; SSE41: # %bb.0: 761; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 762; SSE41-NEXT: retq 763; 764; AVX-LABEL: shuffle_v8i16_44X04444: 765; AVX: # %bb.0: 766; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 767; AVX-NEXT: retq 768 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4> 769 ret <8 x i16> %shuffle 770} 771define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) { 772; SSE2-LABEL: shuffle_v8i16_X4404444: 773; SSE2: # %bb.0: 774; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 775; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 776; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 777; SSE2-NEXT: retq 778; 779; SSSE3-LABEL: shuffle_v8i16_X4404444: 780; SSSE3: # %bb.0: 781; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 782; SSSE3-NEXT: retq 783; 784; SSE41-LABEL: shuffle_v8i16_X4404444: 785; SSE41: # %bb.0: 786; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 787; SSE41-NEXT: retq 788; 789; AVX-LABEL: shuffle_v8i16_X4404444: 790; AVX: # %bb.0: 791; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 792; AVX-NEXT: retq 793 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4> 794 ret <8 x i16> %shuffle 795} 796 797define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) { 798; SSE2-LABEL: shuffle_v8i16_0127XXXX: 799; SSE2: # %bb.0: 800; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 801; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] 802; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 803; SSE2-NEXT: retq 804; 805; SSSE3-LABEL: shuffle_v8i16_0127XXXX: 806; SSSE3: # %bb.0: 807; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 808; SSSE3-NEXT: retq 809; 810; SSE41-LABEL: shuffle_v8i16_0127XXXX: 811; SSE41: # %bb.0: 812; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 813; SSE41-NEXT: retq 814; 815; AVX-LABEL: shuffle_v8i16_0127XXXX: 816; AVX: # %bb.0: 817; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 818; AVX-NEXT: retq 819 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 820 ret <8 x i16> %shuffle 821} 822 823define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) { 824; SSE2-LABEL: shuffle_v8i16_XXXX4563: 825; SSE2: # %bb.0: 826; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 827; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] 828; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0] 829; SSE2-NEXT: retq 830; 831; SSSE3-LABEL: shuffle_v8i16_XXXX4563: 832; SSSE3: # %bb.0: 833; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 834; SSSE3-NEXT: retq 835; 836; SSE41-LABEL: shuffle_v8i16_XXXX4563: 837; SSE41: # %bb.0: 838; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 839; SSE41-NEXT: retq 840; 841; AVX-LABEL: shuffle_v8i16_XXXX4563: 842; AVX: # %bb.0: 843; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 844; AVX-NEXT: retq 845 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3> 846 ret <8 x i16> %shuffle 847} 848 849define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) { 850; SSE2-LABEL: shuffle_v8i16_4563XXXX: 851; SSE2: # %bb.0: 852; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 853; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] 854; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3] 855; SSE2-NEXT: retq 856; 857; SSSE3-LABEL: shuffle_v8i16_4563XXXX: 858; SSSE3: # %bb.0: 859; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 860; SSSE3-NEXT: retq 861; 862; SSE41-LABEL: shuffle_v8i16_4563XXXX: 863; SSE41: # %bb.0: 864; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 865; SSE41-NEXT: retq 866; 867; AVX-LABEL: shuffle_v8i16_4563XXXX: 868; AVX: # %bb.0: 869; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 870; AVX-NEXT: retq 871 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 872 ret <8 x i16> %shuffle 873} 874 875define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) { 876; SSE2-LABEL: shuffle_v8i16_01274563: 877; SSE2: # %bb.0: 878; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 879; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] 880; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2] 881; SSE2-NEXT: retq 882; 883; SSSE3-LABEL: shuffle_v8i16_01274563: 884; SSSE3: # %bb.0: 885; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 886; SSSE3-NEXT: retq 887; 888; SSE41-LABEL: shuffle_v8i16_01274563: 889; SSE41: # %bb.0: 890; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 891; SSE41-NEXT: retq 892; 893; AVX-LABEL: shuffle_v8i16_01274563: 894; AVX: # %bb.0: 895; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 896; AVX-NEXT: retq 897 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3> 898 ret <8 x i16> %shuffle 899} 900 901define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) { 902; SSE2-LABEL: shuffle_v8i16_45630127: 903; SSE2: # %bb.0: 904; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 905; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 906; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1] 907; SSE2-NEXT: retq 908; 909; SSSE3-LABEL: shuffle_v8i16_45630127: 910; SSSE3: # %bb.0: 911; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 912; SSSE3-NEXT: retq 913; 914; SSE41-LABEL: shuffle_v8i16_45630127: 915; SSE41: # %bb.0: 916; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 917; SSE41-NEXT: retq 918; 919; AVX-LABEL: shuffle_v8i16_45630127: 920; AVX: # %bb.0: 921; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 922; AVX-NEXT: retq 923 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7> 924 ret <8 x i16> %shuffle 925} 926 927define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) { 928; SSE2-LABEL: shuffle_v8i16_37102735: 929; SSE2: # %bb.0: 930; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] 931; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 932; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 933; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 934; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 935; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6] 936; SSE2-NEXT: retq 937; 938; SSSE3-LABEL: shuffle_v8i16_37102735: 939; SSSE3: # %bb.0: 940; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 941; SSSE3-NEXT: retq 942; 943; SSE41-LABEL: shuffle_v8i16_37102735: 944; SSE41: # %bb.0: 945; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 946; SSE41-NEXT: retq 947; 948; AVX-LABEL: shuffle_v8i16_37102735: 949; AVX: # %bb.0: 950; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 951; AVX-NEXT: retq 952 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5> 953 ret <8 x i16> %shuffle 954} 955 956define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) { 957; SSE-LABEL: shuffle_v8i16_08192a3b: 958; SSE: # %bb.0: 959; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 960; SSE-NEXT: retq 961; 962; AVX-LABEL: shuffle_v8i16_08192a3b: 963; AVX: # %bb.0: 964; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 965; AVX-NEXT: retq 966 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 967 ret <8 x i16> %shuffle 968} 969 970define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) { 971; SSE-LABEL: shuffle_v8i16_0c1d2e3f: 972; SSE: # %bb.0: 973; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 974; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 975; SSE-NEXT: retq 976; 977; AVX-LABEL: shuffle_v8i16_0c1d2e3f: 978; AVX: # %bb.0: 979; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 980; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 981; AVX-NEXT: retq 982 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15> 983 ret <8 x i16> %shuffle 984} 985 986define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) { 987; SSE-LABEL: shuffle_v8i16_4c5d6e7f: 988; SSE: # %bb.0: 989; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 990; SSE-NEXT: retq 991; 992; AVX-LABEL: shuffle_v8i16_4c5d6e7f: 993; AVX: # %bb.0: 994; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 995; AVX-NEXT: retq 996 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 997 ret <8 x i16> %shuffle 998} 999 1000define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) { 1001; SSE-LABEL: shuffle_v8i16_48596a7b: 1002; SSE: # %bb.0: 1003; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1004; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1005; SSE-NEXT: retq 1006; 1007; AVX-LABEL: shuffle_v8i16_48596a7b: 1008; AVX: # %bb.0: 1009; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1010; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1011; AVX-NEXT: retq 1012 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11> 1013 ret <8 x i16> %shuffle 1014} 1015 1016define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) { 1017; SSE-LABEL: shuffle_v8i16_08196e7f: 1018; SSE: # %bb.0: 1019; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] 1020; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 1021; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1022; SSE-NEXT: retq 1023; 1024; AVX-LABEL: shuffle_v8i16_08196e7f: 1025; AVX: # %bb.0: 1026; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] 1027; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 1028; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1029; AVX-NEXT: retq 1030 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15> 1031 ret <8 x i16> %shuffle 1032} 1033 1034define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) { 1035; SSE-LABEL: shuffle_v8i16_0c1d6879: 1036; SSE: # %bb.0: 1037; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3] 1038; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 1039; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1040; SSE-NEXT: retq 1041; 1042; AVX-LABEL: shuffle_v8i16_0c1d6879: 1043; AVX: # %bb.0: 1044; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3] 1045; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 1046; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1047; AVX-NEXT: retq 1048 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9> 1049 ret <8 x i16> %shuffle 1050} 1051 1052define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) { 1053; SSE-LABEL: shuffle_v8i16_109832ba: 1054; SSE: # %bb.0: 1055; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1056; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 1057; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 1058; SSE-NEXT: retq 1059; 1060; AVX1-LABEL: shuffle_v8i16_109832ba: 1061; AVX1: # %bb.0: 1062; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1063; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 1064; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 1065; AVX1-NEXT: retq 1066; 1067; AVX2-SLOW-LABEL: shuffle_v8i16_109832ba: 1068; AVX2-SLOW: # %bb.0: 1069; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1070; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 1071; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 1072; AVX2-SLOW-NEXT: retq 1073; 1074; AVX2-FAST-LABEL: shuffle_v8i16_109832ba: 1075; AVX2-FAST: # %bb.0: 1076; AVX2-FAST-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1077; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11] 1078; AVX2-FAST-NEXT: retq 1079; 1080; AVX512VL-SLOW-LABEL: shuffle_v8i16_109832ba: 1081; AVX512VL-SLOW: # %bb.0: 1082; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1083; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 1084; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 1085; AVX512VL-SLOW-NEXT: retq 1086; 1087; AVX512VL-FAST-LABEL: shuffle_v8i16_109832ba: 1088; AVX512VL-FAST: # %bb.0: 1089; AVX512VL-FAST-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1090; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11] 1091; AVX512VL-FAST-NEXT: retq 1092 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10> 1093 ret <8 x i16> %shuffle 1094} 1095 1096define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) { 1097; SSE-LABEL: shuffle_v8i16_8091a2b3: 1098; SSE: # %bb.0: 1099; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1100; SSE-NEXT: movdqa %xmm1, %xmm0 1101; SSE-NEXT: retq 1102; 1103; AVX-LABEL: shuffle_v8i16_8091a2b3: 1104; AVX: # %bb.0: 1105; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1106; AVX-NEXT: retq 1107 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3> 1108 ret <8 x i16> %shuffle 1109} 1110define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) { 1111; SSE-LABEL: shuffle_v8i16_c4d5e6f7: 1112; SSE: # %bb.0: 1113; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1114; SSE-NEXT: movdqa %xmm1, %xmm0 1115; SSE-NEXT: retq 1116; 1117; AVX-LABEL: shuffle_v8i16_c4d5e6f7: 1118; AVX: # %bb.0: 1119; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1120; AVX-NEXT: retq 1121 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7> 1122 ret <8 x i16> %shuffle 1123} 1124 1125define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) { 1126; SSE2-LABEL: shuffle_v8i16_0213cedf: 1127; SSE2: # %bb.0: 1128; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7] 1129; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7] 1130; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1131; SSE2-NEXT: retq 1132; 1133; SSSE3-LABEL: shuffle_v8i16_0213cedf: 1134; SSSE3: # %bb.0: 1135; SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7] 1136; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7] 1137; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1138; SSSE3-NEXT: retq 1139; 1140; SSE41-LABEL: shuffle_v8i16_0213cedf: 1141; SSE41: # %bb.0: 1142; SSE41-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7] 1143; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1144; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1145; SSE41-NEXT: retq 1146; 1147; AVX1-LABEL: shuffle_v8i16_0213cedf: 1148; AVX1: # %bb.0: 1149; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7] 1150; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1151; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1152; AVX1-NEXT: retq 1153; 1154; AVX2-SLOW-LABEL: shuffle_v8i16_0213cedf: 1155; AVX2-SLOW: # %bb.0: 1156; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7] 1157; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1158; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1159; AVX2-SLOW-NEXT: retq 1160; 1161; AVX2-FAST-LABEL: shuffle_v8i16_0213cedf: 1162; AVX2-FAST: # %bb.0: 1163; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,12,13,10,11,14,15] 1164; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1165; AVX2-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1166; AVX2-FAST-NEXT: retq 1167; 1168; AVX512VL-SLOW-LABEL: shuffle_v8i16_0213cedf: 1169; AVX512VL-SLOW: # %bb.0: 1170; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1171; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7] 1172; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1173; AVX512VL-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1174; AVX512VL-SLOW-NEXT: retq 1175; 1176; AVX512VL-FAST-LABEL: shuffle_v8i16_0213cedf: 1177; AVX512VL-FAST: # %bb.0: 1178; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,12,13,10,11,14,15] 1179; AVX512VL-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1180; AVX512VL-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1181; AVX512VL-FAST-NEXT: retq 1182 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15> 1183 ret <8 x i16> %shuffle 1184} 1185 1186define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) { 1187; SSE2-LABEL: shuffle_v8i16_443aXXXX: 1188; SSE2: # %bb.0: 1189; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535] 1190; SSE2-NEXT: pand %xmm2, %xmm0 1191; SSE2-NEXT: pandn %xmm1, %xmm2 1192; SSE2-NEXT: por %xmm0, %xmm2 1193; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3] 1194; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1195; SSE2-NEXT: retq 1196; 1197; SSSE3-LABEL: shuffle_v8i16_443aXXXX: 1198; SSSE3: # %bb.0: 1199; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u] 1200; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u] 1201; SSSE3-NEXT: por %xmm1, %xmm0 1202; SSSE3-NEXT: retq 1203; 1204; SSE41-LABEL: shuffle_v8i16_443aXXXX: 1205; SSE41: # %bb.0: 1206; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1207; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1208; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1209; SSE41-NEXT: retq 1210; 1211; AVX1-LABEL: shuffle_v8i16_443aXXXX: 1212; AVX1: # %bb.0: 1213; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1214; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1215; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1216; AVX1-NEXT: retq 1217; 1218; AVX2-SLOW-LABEL: shuffle_v8i16_443aXXXX: 1219; AVX2-SLOW: # %bb.0: 1220; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1221; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1222; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1223; AVX2-SLOW-NEXT: retq 1224; 1225; AVX2-FAST-LABEL: shuffle_v8i16_443aXXXX: 1226; AVX2-FAST: # %bb.0: 1227; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1228; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15] 1229; AVX2-FAST-NEXT: retq 1230; 1231; AVX512VL-SLOW-LABEL: shuffle_v8i16_443aXXXX: 1232; AVX512VL-SLOW: # %bb.0: 1233; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1234; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1235; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1236; AVX512VL-SLOW-NEXT: retq 1237; 1238; AVX512VL-FAST-LABEL: shuffle_v8i16_443aXXXX: 1239; AVX512VL-FAST: # %bb.0: 1240; AVX512VL-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1241; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15] 1242; AVX512VL-FAST-NEXT: retq 1243 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef> 1244 ret <8 x i16> %shuffle 1245} 1246 1247define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) { 1248; SSE2-LABEL: shuffle_v8i16_032dXXXX: 1249; SSE2: # %bb.0: 1250; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1251; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,0] 1252; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,6,5,6,7] 1253; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1254; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 1255; SSE2-NEXT: retq 1256; 1257; SSSE3-LABEL: shuffle_v8i16_032dXXXX: 1258; SSSE3: # %bb.0: 1259; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u] 1260; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 1261; SSSE3-NEXT: por %xmm1, %xmm0 1262; SSSE3-NEXT: retq 1263; 1264; SSE41-LABEL: shuffle_v8i16_032dXXXX: 1265; SSE41: # %bb.0: 1266; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1267; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 1268; SSE41-NEXT: retq 1269; 1270; AVX1-LABEL: shuffle_v8i16_032dXXXX: 1271; AVX1: # %bb.0: 1272; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1273; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 1274; AVX1-NEXT: retq 1275; 1276; AVX2OR512VL-LABEL: shuffle_v8i16_032dXXXX: 1277; AVX2OR512VL: # %bb.0: 1278; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1279; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 1280; AVX2OR512VL-NEXT: retq 1281 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 1282 ret <8 x i16> %shuffle 1283} 1284define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) { 1285; SSE-LABEL: shuffle_v8i16_XXXdXXXX: 1286; SSE: # %bb.0: 1287; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] 1288; SSE-NEXT: retq 1289; 1290; AVX-LABEL: shuffle_v8i16_XXXdXXXX: 1291; AVX: # %bb.0: 1292; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,2,3,3] 1293; AVX-NEXT: retq 1294 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 1295 ret <8 x i16> %shuffle 1296} 1297 1298define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) { 1299; SSE2-LABEL: shuffle_v8i16_012dXXXX: 1300; SSE2: # %bb.0: 1301; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535] 1302; SSE2-NEXT: pand %xmm2, %xmm0 1303; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1304; SSE2-NEXT: pandn %xmm1, %xmm2 1305; SSE2-NEXT: por %xmm2, %xmm0 1306; SSE2-NEXT: retq 1307; 1308; SSSE3-LABEL: shuffle_v8i16_012dXXXX: 1309; SSSE3: # %bb.0: 1310; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u] 1311; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 1312; SSSE3-NEXT: por %xmm1, %xmm0 1313; SSSE3-NEXT: retq 1314; 1315; SSE41-LABEL: shuffle_v8i16_012dXXXX: 1316; SSE41: # %bb.0: 1317; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1318; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 1319; SSE41-NEXT: retq 1320; 1321; AVX-LABEL: shuffle_v8i16_012dXXXX: 1322; AVX: # %bb.0: 1323; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1324; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 1325; AVX-NEXT: retq 1326 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 1327 ret <8 x i16> %shuffle 1328} 1329 1330define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) { 1331; SSE2-LABEL: shuffle_v8i16_XXXXcde3: 1332; SSE2: # %bb.0: 1333; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0] 1334; SSE2-NEXT: pand %xmm2, %xmm1 1335; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1336; SSE2-NEXT: pandn %xmm0, %xmm2 1337; SSE2-NEXT: por %xmm1, %xmm2 1338; SSE2-NEXT: movdqa %xmm2, %xmm0 1339; SSE2-NEXT: retq 1340; 1341; SSSE3-LABEL: shuffle_v8i16_XXXXcde3: 1342; SSSE3: # %bb.0: 1343; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7] 1344; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero 1345; SSSE3-NEXT: por %xmm1, %xmm0 1346; SSSE3-NEXT: retq 1347; 1348; SSE41-LABEL: shuffle_v8i16_XXXXcde3: 1349; SSE41: # %bb.0: 1350; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1351; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 1352; SSE41-NEXT: retq 1353; 1354; AVX1-LABEL: shuffle_v8i16_XXXXcde3: 1355; AVX1: # %bb.0: 1356; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1357; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 1358; AVX1-NEXT: retq 1359; 1360; AVX2OR512VL-LABEL: shuffle_v8i16_XXXXcde3: 1361; AVX2OR512VL: # %bb.0: 1362; AVX2OR512VL-NEXT: vpbroadcastq %xmm0, %xmm0 1363; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 1364; AVX2OR512VL-NEXT: retq 1365 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3> 1366 ret <8 x i16> %shuffle 1367} 1368 1369define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) { 1370; SSE2-LABEL: shuffle_v8i16_cde3XXXX: 1371; SSE2: # %bb.0: 1372; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535] 1373; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1374; SSE2-NEXT: pand %xmm2, %xmm1 1375; SSE2-NEXT: pandn %xmm0, %xmm2 1376; SSE2-NEXT: por %xmm1, %xmm2 1377; SSE2-NEXT: movdqa %xmm2, %xmm0 1378; SSE2-NEXT: retq 1379; 1380; SSSE3-LABEL: shuffle_v8i16_cde3XXXX: 1381; SSSE3: # %bb.0: 1382; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u] 1383; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u] 1384; SSSE3-NEXT: por %xmm1, %xmm0 1385; SSSE3-NEXT: retq 1386; 1387; SSE41-LABEL: shuffle_v8i16_cde3XXXX: 1388; SSE41: # %bb.0: 1389; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1390; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] 1391; SSE41-NEXT: retq 1392; 1393; AVX-LABEL: shuffle_v8i16_cde3XXXX: 1394; AVX: # %bb.0: 1395; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1396; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] 1397; AVX-NEXT: retq 1398 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 1399 ret <8 x i16> %shuffle 1400} 1401 1402define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) { 1403; SSE2-LABEL: shuffle_v8i16_012dcde3: 1404; SSE2: # %bb.0: 1405; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1406; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3,2,1] 1407; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[3,1,2,0,4,5,6,7] 1408; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 1409; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 1410; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7] 1411; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 1412; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7] 1413; SSE2-NEXT: retq 1414; 1415; SSSE3-LABEL: shuffle_v8i16_012dcde3: 1416; SSSE3: # %bb.0: 1417; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero 1418; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7] 1419; SSSE3-NEXT: por %xmm1, %xmm0 1420; SSSE3-NEXT: retq 1421; 1422; SSE41-LABEL: shuffle_v8i16_012dcde3: 1423; SSE41: # %bb.0: 1424; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1425; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 1426; SSE41-NEXT: retq 1427; 1428; AVX1-LABEL: shuffle_v8i16_012dcde3: 1429; AVX1: # %bb.0: 1430; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1431; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 1432; AVX1-NEXT: retq 1433; 1434; AVX2OR512VL-LABEL: shuffle_v8i16_012dcde3: 1435; AVX2OR512VL: # %bb.0: 1436; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1437; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 1438; AVX2OR512VL-NEXT: retq 1439 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3> 1440 ret <8 x i16> %shuffle 1441} 1442 1443define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) { 1444; SSE2-LABEL: shuffle_v8i16_0923cde7: 1445; SSE2: # %bb.0: 1446; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535] 1447; SSE2-NEXT: andps %xmm2, %xmm0 1448; SSE2-NEXT: andnps %xmm1, %xmm2 1449; SSE2-NEXT: orps %xmm2, %xmm0 1450; SSE2-NEXT: retq 1451; 1452; SSSE3-LABEL: shuffle_v8i16_0923cde7: 1453; SSSE3: # %bb.0: 1454; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535] 1455; SSSE3-NEXT: andps %xmm2, %xmm0 1456; SSSE3-NEXT: andnps %xmm1, %xmm2 1457; SSSE3-NEXT: orps %xmm2, %xmm0 1458; SSSE3-NEXT: retq 1459; 1460; SSE41-LABEL: shuffle_v8i16_0923cde7: 1461; SSE41: # %bb.0: 1462; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7] 1463; SSE41-NEXT: retq 1464; 1465; AVX-LABEL: shuffle_v8i16_0923cde7: 1466; AVX: # %bb.0: 1467; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7] 1468; AVX-NEXT: retq 1469 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7> 1470 ret <8 x i16> %shuffle 1471} 1472 1473define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) { 1474; SSE2-LABEL: shuffle_v8i16_XXX1X579: 1475; SSE2: # %bb.0: 1476; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0] 1477; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0] 1478; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1479; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1480; SSE2-NEXT: pand %xmm1, %xmm0 1481; SSE2-NEXT: pandn %xmm2, %xmm1 1482; SSE2-NEXT: por %xmm0, %xmm1 1483; SSE2-NEXT: movdqa %xmm1, %xmm0 1484; SSE2-NEXT: retq 1485; 1486; SSSE3-LABEL: shuffle_v8i16_XXX1X579: 1487; SSSE3: # %bb.0: 1488; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3] 1489; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero 1490; SSSE3-NEXT: por %xmm1, %xmm0 1491; SSSE3-NEXT: retq 1492; 1493; SSE41-LABEL: shuffle_v8i16_XXX1X579: 1494; SSE41: # %bb.0: 1495; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 1496; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1497; SSE41-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1498; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1499; SSE41-NEXT: retq 1500; 1501; AVX1-LABEL: shuffle_v8i16_XXX1X579: 1502; AVX1: # %bb.0: 1503; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 1504; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1505; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1506; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1507; AVX1-NEXT: retq 1508; 1509; AVX2-SLOW-LABEL: shuffle_v8i16_XXX1X579: 1510; AVX2-SLOW: # %bb.0: 1511; AVX2-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1 1512; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1513; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1514; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1515; AVX2-SLOW-NEXT: retq 1516; 1517; AVX2-FAST-LABEL: shuffle_v8i16_XXX1X579: 1518; AVX2-FAST: # %bb.0: 1519; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1 1520; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15] 1521; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1522; AVX2-FAST-NEXT: retq 1523; 1524; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXX1X579: 1525; AVX512VL-SLOW: # %bb.0: 1526; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1 1527; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1528; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1529; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1530; AVX512VL-SLOW-NEXT: retq 1531; 1532; AVX512VL-FAST-LABEL: shuffle_v8i16_XXX1X579: 1533; AVX512VL-FAST: # %bb.0: 1534; AVX512VL-FAST-NEXT: vpbroadcastd %xmm1, %xmm1 1535; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15] 1536; AVX512VL-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1537; AVX512VL-FAST-NEXT: retq 1538 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9> 1539 ret <8 x i16> %shuffle 1540} 1541 1542define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) { 1543; SSE2-LABEL: shuffle_v8i16_XX4X8acX: 1544; SSE2: # %bb.0: 1545; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 1546; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 1547; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7] 1548; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2],xmm1[2,3] 1549; SSE2-NEXT: retq 1550; 1551; SSSE3-LABEL: shuffle_v8i16_XX4X8acX: 1552; SSSE3: # %bb.0: 1553; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u] 1554; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u] 1555; SSSE3-NEXT: por %xmm1, %xmm0 1556; SSSE3-NEXT: retq 1557; 1558; SSE41-LABEL: shuffle_v8i16_XX4X8acX: 1559; SSE41: # %bb.0: 1560; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 1561; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1562; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1563; SSE41-NEXT: retq 1564; 1565; AVX1-LABEL: shuffle_v8i16_XX4X8acX: 1566; AVX1: # %bb.0: 1567; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 1568; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1569; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1570; AVX1-NEXT: retq 1571; 1572; AVX2OR512VL-LABEL: shuffle_v8i16_XX4X8acX: 1573; AVX2OR512VL: # %bb.0: 1574; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 1575; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1576; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1577; AVX2OR512VL-NEXT: retq 1578 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef> 1579 ret <8 x i16> %shuffle 1580} 1581 1582define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) { 1583; SSE-LABEL: shuffle_v8i16_8zzzzzzz: 1584; SSE: # %bb.0: 1585; SSE-NEXT: movzwl %di, %eax 1586; SSE-NEXT: movd %eax, %xmm0 1587; SSE-NEXT: retq 1588; 1589; AVX-LABEL: shuffle_v8i16_8zzzzzzz: 1590; AVX: # %bb.0: 1591; AVX-NEXT: movzwl %di, %eax 1592; AVX-NEXT: vmovd %eax, %xmm0 1593; AVX-NEXT: retq 1594 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1595 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1596 ret <8 x i16> %shuffle 1597} 1598 1599define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) { 1600; SSE-LABEL: shuffle_v8i16_z8zzzzzz: 1601; SSE: # %bb.0: 1602; SSE-NEXT: pxor %xmm0, %xmm0 1603; SSE-NEXT: pinsrw $1, %edi, %xmm0 1604; SSE-NEXT: retq 1605; 1606; AVX-LABEL: shuffle_v8i16_z8zzzzzz: 1607; AVX: # %bb.0: 1608; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1609; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 1610; AVX-NEXT: retq 1611 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1612 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3> 1613 ret <8 x i16> %shuffle 1614} 1615 1616define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) { 1617; SSE-LABEL: shuffle_v8i16_zzzzz8zz: 1618; SSE: # %bb.0: 1619; SSE-NEXT: pxor %xmm0, %xmm0 1620; SSE-NEXT: pinsrw $5, %edi, %xmm0 1621; SSE-NEXT: retq 1622; 1623; AVX-LABEL: shuffle_v8i16_zzzzz8zz: 1624; AVX: # %bb.0: 1625; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1626; AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 1627; AVX-NEXT: retq 1628 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1629 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0> 1630 ret <8 x i16> %shuffle 1631} 1632 1633define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) { 1634; SSE-LABEL: shuffle_v8i16_zuuzuuz8: 1635; SSE: # %bb.0: 1636; SSE-NEXT: pxor %xmm0, %xmm0 1637; SSE-NEXT: pinsrw $7, %edi, %xmm0 1638; SSE-NEXT: retq 1639; 1640; AVX-LABEL: shuffle_v8i16_zuuzuuz8: 1641; AVX: # %bb.0: 1642; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1643; AVX-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 1644; AVX-NEXT: retq 1645 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1646 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8> 1647 ret <8 x i16> %shuffle 1648} 1649 1650define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) { 1651; SSE-LABEL: shuffle_v8i16_zzBzzzzz: 1652; SSE: # %bb.0: 1653; SSE-NEXT: pxor %xmm0, %xmm0 1654; SSE-NEXT: pinsrw $2, %edi, %xmm0 1655; SSE-NEXT: retq 1656; 1657; AVX-LABEL: shuffle_v8i16_zzBzzzzz: 1658; AVX: # %bb.0: 1659; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1660; AVX-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0 1661; AVX-NEXT: retq 1662 %a = insertelement <8 x i16> undef, i16 %i, i32 3 1663 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7> 1664 ret <8 x i16> %shuffle 1665} 1666 1667define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) { 1668; SSE2-LABEL: shuffle_v8i16_def01234: 1669; SSE2: # %bb.0: 1670; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1671; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1672; SSE2-NEXT: por %xmm1, %xmm0 1673; SSE2-NEXT: retq 1674; 1675; SSSE3-LABEL: shuffle_v8i16_def01234: 1676; SSSE3: # %bb.0: 1677; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1678; SSSE3-NEXT: retq 1679; 1680; SSE41-LABEL: shuffle_v8i16_def01234: 1681; SSE41: # %bb.0: 1682; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1683; SSE41-NEXT: retq 1684; 1685; AVX-LABEL: shuffle_v8i16_def01234: 1686; AVX: # %bb.0: 1687; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1688; AVX-NEXT: retq 1689 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4> 1690 ret <8 x i16> %shuffle 1691} 1692 1693define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) { 1694; SSE2-LABEL: shuffle_v8i16_ueuu123u: 1695; SSE2: # %bb.0: 1696; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1697; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1698; SSE2-NEXT: por %xmm1, %xmm0 1699; SSE2-NEXT: retq 1700; 1701; SSSE3-LABEL: shuffle_v8i16_ueuu123u: 1702; SSSE3: # %bb.0: 1703; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1704; SSSE3-NEXT: retq 1705; 1706; SSE41-LABEL: shuffle_v8i16_ueuu123u: 1707; SSE41: # %bb.0: 1708; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1709; SSE41-NEXT: retq 1710; 1711; AVX-LABEL: shuffle_v8i16_ueuu123u: 1712; AVX: # %bb.0: 1713; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1714; AVX-NEXT: retq 1715 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef> 1716 ret <8 x i16> %shuffle 1717} 1718 1719define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) { 1720; SSE2-LABEL: shuffle_v8i16_56701234: 1721; SSE2: # %bb.0: 1722; SSE2-NEXT: movdqa %xmm0, %xmm1 1723; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1724; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1725; SSE2-NEXT: por %xmm1, %xmm0 1726; SSE2-NEXT: retq 1727; 1728; SSSE3-LABEL: shuffle_v8i16_56701234: 1729; SSSE3: # %bb.0: 1730; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1731; SSSE3-NEXT: retq 1732; 1733; SSE41-LABEL: shuffle_v8i16_56701234: 1734; SSE41: # %bb.0: 1735; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1736; SSE41-NEXT: retq 1737; 1738; AVX-LABEL: shuffle_v8i16_56701234: 1739; AVX: # %bb.0: 1740; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1741; AVX-NEXT: retq 1742 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4> 1743 ret <8 x i16> %shuffle 1744} 1745 1746define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) { 1747; SSE2-LABEL: shuffle_v8i16_u6uu123u: 1748; SSE2: # %bb.0: 1749; SSE2-NEXT: movdqa %xmm0, %xmm1 1750; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1751; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1752; SSE2-NEXT: por %xmm1, %xmm0 1753; SSE2-NEXT: retq 1754; 1755; SSSE3-LABEL: shuffle_v8i16_u6uu123u: 1756; SSSE3: # %bb.0: 1757; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1758; SSSE3-NEXT: retq 1759; 1760; SSE41-LABEL: shuffle_v8i16_u6uu123u: 1761; SSE41: # %bb.0: 1762; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1763; SSE41-NEXT: retq 1764; 1765; AVX-LABEL: shuffle_v8i16_u6uu123u: 1766; AVX: # %bb.0: 1767; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1768; AVX-NEXT: retq 1769 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef> 1770 ret <8 x i16> %shuffle 1771} 1772 1773define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) { 1774; SSE-LABEL: shuffle_v8i16_uuuu123u: 1775; SSE: # %bb.0: 1776; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1777; SSE-NEXT: retq 1778; 1779; AVX-LABEL: shuffle_v8i16_uuuu123u: 1780; AVX: # %bb.0: 1781; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1782; AVX-NEXT: retq 1783 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef> 1784 ret <8 x i16> %shuffle 1785} 1786 1787define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) { 1788; SSE2-LABEL: shuffle_v8i16_bcdef012: 1789; SSE2: # %bb.0: 1790; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1791; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1792; SSE2-NEXT: por %xmm1, %xmm0 1793; SSE2-NEXT: retq 1794; 1795; SSSE3-LABEL: shuffle_v8i16_bcdef012: 1796; SSSE3: # %bb.0: 1797; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1798; SSSE3-NEXT: retq 1799; 1800; SSE41-LABEL: shuffle_v8i16_bcdef012: 1801; SSE41: # %bb.0: 1802; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1803; SSE41-NEXT: retq 1804; 1805; AVX-LABEL: shuffle_v8i16_bcdef012: 1806; AVX: # %bb.0: 1807; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1808; AVX-NEXT: retq 1809 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2> 1810 ret <8 x i16> %shuffle 1811} 1812 1813define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) { 1814; SSE2-LABEL: shuffle_v8i16_ucdeuu1u: 1815; SSE2: # %bb.0: 1816; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1817; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1818; SSE2-NEXT: por %xmm1, %xmm0 1819; SSE2-NEXT: retq 1820; 1821; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u: 1822; SSSE3: # %bb.0: 1823; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1824; SSSE3-NEXT: retq 1825; 1826; SSE41-LABEL: shuffle_v8i16_ucdeuu1u: 1827; SSE41: # %bb.0: 1828; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1829; SSE41-NEXT: retq 1830; 1831; AVX-LABEL: shuffle_v8i16_ucdeuu1u: 1832; AVX: # %bb.0: 1833; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1834; AVX-NEXT: retq 1835 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef> 1836 ret <8 x i16> %shuffle 1837} 1838 1839define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) { 1840; SSE2-LABEL: shuffle_v8i16_34567012: 1841; SSE2: # %bb.0: 1842; SSE2-NEXT: movdqa %xmm0, %xmm1 1843; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1844; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1845; SSE2-NEXT: por %xmm1, %xmm0 1846; SSE2-NEXT: retq 1847; 1848; SSSE3-LABEL: shuffle_v8i16_34567012: 1849; SSSE3: # %bb.0: 1850; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1851; SSSE3-NEXT: retq 1852; 1853; SSE41-LABEL: shuffle_v8i16_34567012: 1854; SSE41: # %bb.0: 1855; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1856; SSE41-NEXT: retq 1857; 1858; AVX-LABEL: shuffle_v8i16_34567012: 1859; AVX: # %bb.0: 1860; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1861; AVX-NEXT: retq 1862 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2> 1863 ret <8 x i16> %shuffle 1864} 1865 1866define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) { 1867; SSE2-LABEL: shuffle_v8i16_u456uu1u: 1868; SSE2: # %bb.0: 1869; SSE2-NEXT: movdqa %xmm0, %xmm1 1870; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1871; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1872; SSE2-NEXT: por %xmm1, %xmm0 1873; SSE2-NEXT: retq 1874; 1875; SSSE3-LABEL: shuffle_v8i16_u456uu1u: 1876; SSSE3: # %bb.0: 1877; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1878; SSSE3-NEXT: retq 1879; 1880; SSE41-LABEL: shuffle_v8i16_u456uu1u: 1881; SSE41: # %bb.0: 1882; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1883; SSE41-NEXT: retq 1884; 1885; AVX-LABEL: shuffle_v8i16_u456uu1u: 1886; AVX: # %bb.0: 1887; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1888; AVX-NEXT: retq 1889 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef> 1890 ret <8 x i16> %shuffle 1891} 1892 1893define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) { 1894; SSE-LABEL: shuffle_v8i16_u456uuuu: 1895; SSE: # %bb.0: 1896; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1897; SSE-NEXT: retq 1898; 1899; AVX-LABEL: shuffle_v8i16_u456uuuu: 1900; AVX: # %bb.0: 1901; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1902; AVX-NEXT: retq 1903 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef> 1904 ret <8 x i16> %shuffle 1905} 1906 1907define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) { 1908; SSE2-LABEL: shuffle_v8i16_3456789a: 1909; SSE2: # %bb.0: 1910; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1911; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5] 1912; SSE2-NEXT: por %xmm1, %xmm0 1913; SSE2-NEXT: retq 1914; 1915; SSSE3-LABEL: shuffle_v8i16_3456789a: 1916; SSSE3: # %bb.0: 1917; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1918; SSSE3-NEXT: movdqa %xmm1, %xmm0 1919; SSSE3-NEXT: retq 1920; 1921; SSE41-LABEL: shuffle_v8i16_3456789a: 1922; SSE41: # %bb.0: 1923; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1924; SSE41-NEXT: movdqa %xmm1, %xmm0 1925; SSE41-NEXT: retq 1926; 1927; AVX-LABEL: shuffle_v8i16_3456789a: 1928; AVX: # %bb.0: 1929; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1930; AVX-NEXT: retq 1931 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10> 1932 ret <8 x i16> %shuffle 1933} 1934 1935define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) { 1936; SSE2-LABEL: shuffle_v8i16_u456uu9u: 1937; SSE2: # %bb.0: 1938; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1939; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5] 1940; SSE2-NEXT: por %xmm1, %xmm0 1941; SSE2-NEXT: retq 1942; 1943; SSSE3-LABEL: shuffle_v8i16_u456uu9u: 1944; SSSE3: # %bb.0: 1945; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1946; SSSE3-NEXT: movdqa %xmm1, %xmm0 1947; SSSE3-NEXT: retq 1948; 1949; SSE41-LABEL: shuffle_v8i16_u456uu9u: 1950; SSE41: # %bb.0: 1951; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1952; SSE41-NEXT: movdqa %xmm1, %xmm0 1953; SSE41-NEXT: retq 1954; 1955; AVX-LABEL: shuffle_v8i16_u456uu9u: 1956; AVX: # %bb.0: 1957; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1958; AVX-NEXT: retq 1959 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef> 1960 ret <8 x i16> %shuffle 1961} 1962 1963define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) { 1964; SSE2-LABEL: shuffle_v8i16_56789abc: 1965; SSE2: # %bb.0: 1966; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1967; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9] 1968; SSE2-NEXT: por %xmm1, %xmm0 1969; SSE2-NEXT: retq 1970; 1971; SSSE3-LABEL: shuffle_v8i16_56789abc: 1972; SSSE3: # %bb.0: 1973; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1974; SSSE3-NEXT: movdqa %xmm1, %xmm0 1975; SSSE3-NEXT: retq 1976; 1977; SSE41-LABEL: shuffle_v8i16_56789abc: 1978; SSE41: # %bb.0: 1979; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1980; SSE41-NEXT: movdqa %xmm1, %xmm0 1981; SSE41-NEXT: retq 1982; 1983; AVX-LABEL: shuffle_v8i16_56789abc: 1984; AVX: # %bb.0: 1985; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1986; AVX-NEXT: retq 1987 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> 1988 ret <8 x i16> %shuffle 1989} 1990 1991define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) { 1992; SSE2-LABEL: shuffle_v8i16_u6uu9abu: 1993; SSE2: # %bb.0: 1994; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1995; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9] 1996; SSE2-NEXT: por %xmm1, %xmm0 1997; SSE2-NEXT: retq 1998; 1999; SSSE3-LABEL: shuffle_v8i16_u6uu9abu: 2000; SSSE3: # %bb.0: 2001; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 2002; SSSE3-NEXT: movdqa %xmm1, %xmm0 2003; SSSE3-NEXT: retq 2004; 2005; SSE41-LABEL: shuffle_v8i16_u6uu9abu: 2006; SSE41: # %bb.0: 2007; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 2008; SSE41-NEXT: movdqa %xmm1, %xmm0 2009; SSE41-NEXT: retq 2010; 2011; AVX-LABEL: shuffle_v8i16_u6uu9abu: 2012; AVX: # %bb.0: 2013; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 2014; AVX-NEXT: retq 2015 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 2016 ret <8 x i16> %shuffle 2017} 2018 2019define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) { 2020; SSE2-LABEL: shuffle_v8i16_0uuu1uuu: 2021; SSE2: # %bb.0: 2022; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 2023; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] 2024; SSE2-NEXT: retq 2025; 2026; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu: 2027; SSSE3: # %bb.0: 2028; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 2029; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] 2030; SSSE3-NEXT: retq 2031; 2032; SSE41-LABEL: shuffle_v8i16_0uuu1uuu: 2033; SSE41: # %bb.0: 2034; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 2035; SSE41-NEXT: retq 2036; 2037; AVX-LABEL: shuffle_v8i16_0uuu1uuu: 2038; AVX: # %bb.0: 2039; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 2040; AVX-NEXT: retq 2041 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef> 2042 ret <8 x i16> %shuffle 2043} 2044 2045define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) { 2046; SSE2-LABEL: shuffle_v8i16_0zzz1zzz: 2047; SSE2: # %bb.0: 2048; SSE2-NEXT: pxor %xmm1, %xmm1 2049; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2050; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2051; SSE2-NEXT: retq 2052; 2053; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz: 2054; SSSE3: # %bb.0: 2055; SSSE3-NEXT: pxor %xmm1, %xmm1 2056; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2057; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2058; SSSE3-NEXT: retq 2059; 2060; SSE41-LABEL: shuffle_v8i16_0zzz1zzz: 2061; SSE41: # %bb.0: 2062; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 2063; SSE41-NEXT: retq 2064; 2065; AVX-LABEL: shuffle_v8i16_0zzz1zzz: 2066; AVX: # %bb.0: 2067; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 2068; AVX-NEXT: retq 2069 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 2070 ret <8 x i16> %shuffle 2071} 2072 2073define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) { 2074; SSE2-LABEL: shuffle_v8i16_0u1u2u3u: 2075; SSE2: # %bb.0: 2076; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2077; SSE2-NEXT: retq 2078; 2079; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u: 2080; SSSE3: # %bb.0: 2081; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2082; SSSE3-NEXT: retq 2083; 2084; SSE41-LABEL: shuffle_v8i16_0u1u2u3u: 2085; SSE41: # %bb.0: 2086; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2087; SSE41-NEXT: retq 2088; 2089; AVX-LABEL: shuffle_v8i16_0u1u2u3u: 2090; AVX: # %bb.0: 2091; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2092; AVX-NEXT: retq 2093 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef> 2094 ret <8 x i16> %shuffle 2095} 2096 2097define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) { 2098; SSE2-LABEL: shuffle_v8i16_0z1z2z3z: 2099; SSE2: # %bb.0: 2100; SSE2-NEXT: pxor %xmm1, %xmm1 2101; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2102; SSE2-NEXT: retq 2103; 2104; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z: 2105; SSSE3: # %bb.0: 2106; SSSE3-NEXT: pxor %xmm1, %xmm1 2107; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2108; SSSE3-NEXT: retq 2109; 2110; SSE41-LABEL: shuffle_v8i16_0z1z2z3z: 2111; SSE41: # %bb.0: 2112; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2113; SSE41-NEXT: retq 2114; 2115; AVX-LABEL: shuffle_v8i16_0z1z2z3z: 2116; AVX: # %bb.0: 2117; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2118; AVX-NEXT: retq 2119 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 2120 ret <8 x i16> %shuffle 2121} 2122 2123define <8 x i16> @shuffle_v8i16_01100110(<8 x i16> %a) { 2124; SSE-LABEL: shuffle_v8i16_01100110: 2125; SSE: # %bb.0: 2126; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2127; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2128; SSE-NEXT: retq 2129; 2130; AVX1-LABEL: shuffle_v8i16_01100110: 2131; AVX1: # %bb.0: 2132; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2133; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2134; AVX1-NEXT: retq 2135; 2136; AVX2-SLOW-LABEL: shuffle_v8i16_01100110: 2137; AVX2-SLOW: # %bb.0: 2138; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2139; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0 2140; AVX2-SLOW-NEXT: retq 2141; 2142; AVX2-FAST-LABEL: shuffle_v8i16_01100110: 2143; AVX2-FAST: # %bb.0: 2144; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1] 2145; AVX2-FAST-NEXT: retq 2146; 2147; AVX512VL-SLOW-LABEL: shuffle_v8i16_01100110: 2148; AVX512VL-SLOW: # %bb.0: 2149; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2150; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2151; AVX512VL-SLOW-NEXT: retq 2152; 2153; AVX512VL-FAST-LABEL: shuffle_v8i16_01100110: 2154; AVX512VL-FAST: # %bb.0: 2155; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1] 2156; AVX512VL-FAST-NEXT: retq 2157 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 1, i32 0> 2158 ret <8 x i16> %shuffle 2159} 2160 2161define <8 x i16> @shuffle_v8i16_01u0u110(<8 x i16> %a) { 2162; SSE-LABEL: shuffle_v8i16_01u0u110: 2163; SSE: # %bb.0: 2164; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2165; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2166; SSE-NEXT: retq 2167; 2168; AVX1-LABEL: shuffle_v8i16_01u0u110: 2169; AVX1: # %bb.0: 2170; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2171; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2172; AVX1-NEXT: retq 2173; 2174; AVX2-SLOW-LABEL: shuffle_v8i16_01u0u110: 2175; AVX2-SLOW: # %bb.0: 2176; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2177; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0 2178; AVX2-SLOW-NEXT: retq 2179; 2180; AVX2-FAST-LABEL: shuffle_v8i16_01u0u110: 2181; AVX2-FAST: # %bb.0: 2182; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1] 2183; AVX2-FAST-NEXT: retq 2184; 2185; AVX512VL-SLOW-LABEL: shuffle_v8i16_01u0u110: 2186; AVX512VL-SLOW: # %bb.0: 2187; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2188; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2189; AVX512VL-SLOW-NEXT: retq 2190; 2191; AVX512VL-FAST-LABEL: shuffle_v8i16_01u0u110: 2192; AVX512VL-FAST: # %bb.0: 2193; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1] 2194; AVX512VL-FAST-NEXT: retq 2195 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 0, i32 undef, i32 1, i32 1, i32 0> 2196 ret <8 x i16> %shuffle 2197} 2198 2199define <8 x i16> @shuffle_v8i16_467uu675(<8 x i16> %a) { 2200; SSE-LABEL: shuffle_v8i16_467uu675: 2201; SSE: # %bb.0: 2202; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5] 2203; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 2204; SSE-NEXT: retq 2205; 2206; AVX1-LABEL: shuffle_v8i16_467uu675: 2207; AVX1: # %bb.0: 2208; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5] 2209; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 2210; AVX1-NEXT: retq 2211; 2212; AVX2-SLOW-LABEL: shuffle_v8i16_467uu675: 2213; AVX2-SLOW: # %bb.0: 2214; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5] 2215; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 2216; AVX2-SLOW-NEXT: retq 2217; 2218; AVX2-FAST-LABEL: shuffle_v8i16_467uu675: 2219; AVX2-FAST: # %bb.0: 2220; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11] 2221; AVX2-FAST-NEXT: retq 2222; 2223; AVX512VL-SLOW-LABEL: shuffle_v8i16_467uu675: 2224; AVX512VL-SLOW: # %bb.0: 2225; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5] 2226; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 2227; AVX512VL-SLOW-NEXT: retq 2228; 2229; AVX512VL-FAST-LABEL: shuffle_v8i16_467uu675: 2230; AVX512VL-FAST: # %bb.0: 2231; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11] 2232; AVX512VL-FAST-NEXT: retq 2233 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7, i32 5> 2234 ret <8 x i16> %shuffle 2235} 2236 2237; 2238; Shuffle to logical bit shifts 2239; 2240define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) { 2241; SSE-LABEL: shuffle_v8i16_z0z2z4z6: 2242; SSE: # %bb.0: 2243; SSE-NEXT: pslld $16, %xmm0 2244; SSE-NEXT: retq 2245; 2246; AVX-LABEL: shuffle_v8i16_z0z2z4z6: 2247; AVX: # %bb.0: 2248; AVX-NEXT: vpslld $16, %xmm0, %xmm0 2249; AVX-NEXT: retq 2250 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6> 2251 ret <8 x i16> %shuffle 2252} 2253 2254define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) { 2255; SSE-LABEL: shuffle_v8i16_zzz0zzz4: 2256; SSE: # %bb.0: 2257; SSE-NEXT: psllq $48, %xmm0 2258; SSE-NEXT: retq 2259; 2260; AVX-LABEL: shuffle_v8i16_zzz0zzz4: 2261; AVX: # %bb.0: 2262; AVX-NEXT: vpsllq $48, %xmm0, %xmm0 2263; AVX-NEXT: retq 2264 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4> 2265 ret <8 x i16> %shuffle 2266} 2267 2268define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) { 2269; SSE-LABEL: shuffle_v8i16_zz01zX4X: 2270; SSE: # %bb.0: 2271; SSE-NEXT: psllq $32, %xmm0 2272; SSE-NEXT: retq 2273; 2274; AVX-LABEL: shuffle_v8i16_zz01zX4X: 2275; AVX: # %bb.0: 2276; AVX-NEXT: vpsllq $32, %xmm0, %xmm0 2277; AVX-NEXT: retq 2278 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef> 2279 ret <8 x i16> %shuffle 2280} 2281 2282define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) { 2283; SSE-LABEL: shuffle_v8i16_z0X2z456: 2284; SSE: # %bb.0: 2285; SSE-NEXT: psllq $16, %xmm0 2286; SSE-NEXT: retq 2287; 2288; AVX-LABEL: shuffle_v8i16_z0X2z456: 2289; AVX: # %bb.0: 2290; AVX-NEXT: vpsllq $16, %xmm0, %xmm0 2291; AVX-NEXT: retq 2292 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6> 2293 ret <8 x i16> %shuffle 2294} 2295 2296define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) { 2297; SSE-LABEL: shuffle_v8i16_1z3zXz7z: 2298; SSE: # %bb.0: 2299; SSE-NEXT: psrld $16, %xmm0 2300; SSE-NEXT: retq 2301; 2302; AVX-LABEL: shuffle_v8i16_1z3zXz7z: 2303; AVX: # %bb.0: 2304; AVX-NEXT: vpsrld $16, %xmm0, %xmm0 2305; AVX-NEXT: retq 2306 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8> 2307 ret <8 x i16> %shuffle 2308} 2309 2310define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) { 2311; SSE-LABEL: shuffle_v8i16_1X3z567z: 2312; SSE: # %bb.0: 2313; SSE-NEXT: psrlq $16, %xmm0 2314; SSE-NEXT: retq 2315; 2316; AVX-LABEL: shuffle_v8i16_1X3z567z: 2317; AVX: # %bb.0: 2318; AVX-NEXT: vpsrlq $16, %xmm0, %xmm0 2319; AVX-NEXT: retq 2320 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8> 2321 ret <8 x i16> %shuffle 2322} 2323 2324define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) { 2325; SSE-LABEL: shuffle_v8i16_23zz67zz: 2326; SSE: # %bb.0: 2327; SSE-NEXT: psrlq $32, %xmm0 2328; SSE-NEXT: retq 2329; 2330; AVX-LABEL: shuffle_v8i16_23zz67zz: 2331; AVX: # %bb.0: 2332; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 2333; AVX-NEXT: retq 2334 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8> 2335 ret <8 x i16> %shuffle 2336} 2337 2338define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) { 2339; SSE-LABEL: shuffle_v8i16_3zXXXzzz: 2340; SSE: # %bb.0: 2341; SSE-NEXT: psrlq $48, %xmm0 2342; SSE-NEXT: retq 2343; 2344; AVX-LABEL: shuffle_v8i16_3zXXXzzz: 2345; AVX: # %bb.0: 2346; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0 2347; AVX-NEXT: retq 2348 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8> 2349 ret <8 x i16> %shuffle 2350} 2351 2352define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) { 2353; SSE-LABEL: shuffle_v8i16_01u3zzuz: 2354; SSE: # %bb.0: 2355; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 2356; SSE-NEXT: retq 2357; 2358; AVX-LABEL: shuffle_v8i16_01u3zzuz: 2359; AVX: # %bb.0: 2360; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2361; AVX-NEXT: retq 2362 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8> 2363 ret <8 x i16> %shuffle 2364} 2365 2366define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) { 2367; SSE2-LABEL: shuffle_v8i16_0z234567: 2368; SSE2: # %bb.0: 2369; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 2370; SSE2-NEXT: retq 2371; 2372; SSSE3-LABEL: shuffle_v8i16_0z234567: 2373; SSSE3: # %bb.0: 2374; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 2375; SSSE3-NEXT: retq 2376; 2377; SSE41-LABEL: shuffle_v8i16_0z234567: 2378; SSE41: # %bb.0: 2379; SSE41-NEXT: pxor %xmm1, %xmm1 2380; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 2381; SSE41-NEXT: retq 2382; 2383; AVX-LABEL: shuffle_v8i16_0z234567: 2384; AVX: # %bb.0: 2385; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2386; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 2387; AVX-NEXT: retq 2388 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2389 ret <8 x i16> %shuffle 2390} 2391 2392define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) { 2393; SSE2-LABEL: shuffle_v8i16_0zzzz5z7: 2394; SSE2: # %bb.0: 2395; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 2396; SSE2-NEXT: retq 2397; 2398; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7: 2399; SSSE3: # %bb.0: 2400; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 2401; SSSE3-NEXT: retq 2402; 2403; SSE41-LABEL: shuffle_v8i16_0zzzz5z7: 2404; SSE41: # %bb.0: 2405; SSE41-NEXT: pxor %xmm1, %xmm1 2406; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7] 2407; SSE41-NEXT: retq 2408; 2409; AVX-LABEL: shuffle_v8i16_0zzzz5z7: 2410; AVX: # %bb.0: 2411; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2412; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7] 2413; AVX-NEXT: retq 2414 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7> 2415 ret <8 x i16> %shuffle 2416} 2417 2418define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) { 2419; SSE2-LABEL: shuffle_v8i16_0123456z: 2420; SSE2: # %bb.0: 2421; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 2422; SSE2-NEXT: retq 2423; 2424; SSSE3-LABEL: shuffle_v8i16_0123456z: 2425; SSSE3: # %bb.0: 2426; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 2427; SSSE3-NEXT: retq 2428; 2429; SSE41-LABEL: shuffle_v8i16_0123456z: 2430; SSE41: # %bb.0: 2431; SSE41-NEXT: pxor %xmm1, %xmm1 2432; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7] 2433; SSE41-NEXT: retq 2434; 2435; AVX-LABEL: shuffle_v8i16_0123456z: 2436; AVX: # %bb.0: 2437; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2438; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7] 2439; AVX-NEXT: retq 2440 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15> 2441 ret <8 x i16> %shuffle 2442} 2443 2444define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) { 2445; SSE-LABEL: shuffle_v8i16_fu3ucc5u: 2446; SSE: # %bb.0: 2447; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2448; SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] 2449; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2450; SSE-NEXT: movdqa %xmm1, %xmm0 2451; SSE-NEXT: retq 2452; 2453; AVX-LABEL: shuffle_v8i16_fu3ucc5u: 2454; AVX: # %bb.0: 2455; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2456; AVX-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] 2457; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2458; AVX-NEXT: retq 2459 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef> 2460 ret <8 x i16> %shuffle 2461} 2462 2463define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) { 2464; SSE-LABEL: shuffle_v8i16_8012345u: 2465; SSE: # %bb.0: 2466; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2467; SSE-NEXT: retq 2468; 2469; AVX-LABEL: shuffle_v8i16_8012345u: 2470; AVX: # %bb.0: 2471; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2472; AVX-NEXT: retq 2473 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef> 2474 2475 ret <8 x i16> %shuffle 2476} 2477 2478define <8 x i16> @mask_v8i16_012345ef(<8 x i16> %a, <8 x i16> %b) { 2479; SSE2-LABEL: mask_v8i16_012345ef: 2480; SSE2: # %bb.0: 2481; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 2482; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 2483; SSE2-NEXT: movaps %xmm1, %xmm0 2484; SSE2-NEXT: retq 2485; 2486; SSSE3-LABEL: mask_v8i16_012345ef: 2487; SSSE3: # %bb.0: 2488; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 2489; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 2490; SSSE3-NEXT: movaps %xmm1, %xmm0 2491; SSSE3-NEXT: retq 2492; 2493; SSE41-LABEL: mask_v8i16_012345ef: 2494; SSE41: # %bb.0: 2495; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 2496; SSE41-NEXT: retq 2497; 2498; AVX-LABEL: mask_v8i16_012345ef: 2499; AVX: # %bb.0: 2500; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 2501; AVX-NEXT: retq 2502 %1 = bitcast <8 x i16> %a to <2 x i64> 2503 %2 = bitcast <8 x i16> %b to <2 x i64> 2504 %3 = and <2 x i64> %1, <i64 0, i64 -4294967296> 2505 %4 = and <2 x i64> %2, <i64 -1, i64 4294967295> 2506 %5 = or <2 x i64> %4, %3 2507 %6 = bitcast <2 x i64> %5 to <8 x i16> 2508 ret <8 x i16> %6 2509} 2510 2511define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) { 2512; SSE-LABEL: insert_dup_mem_v8i16_i32: 2513; SSE: # %bb.0: 2514; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2515; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 2516; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2517; SSE-NEXT: retq 2518; 2519; AVX1-LABEL: insert_dup_mem_v8i16_i32: 2520; AVX1: # %bb.0: 2521; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2522; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 2523; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2524; AVX1-NEXT: retq 2525; 2526; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_i32: 2527; AVX2OR512VL: # %bb.0: 2528; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0 2529; AVX2OR512VL-NEXT: retq 2530 %tmp = load i32, i32* %ptr, align 4 2531 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2532 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 2533 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer 2534 ret <8 x i16> %tmp3 2535} 2536 2537define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) { 2538; SSE-LABEL: insert_dup_mem_v8i16_sext_i16: 2539; SSE: # %bb.0: 2540; SSE-NEXT: movswl (%rdi), %eax 2541; SSE-NEXT: movd %eax, %xmm0 2542; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 2543; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2544; SSE-NEXT: retq 2545; 2546; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16: 2547; AVX1: # %bb.0: 2548; AVX1-NEXT: movswl (%rdi), %eax 2549; AVX1-NEXT: vmovd %eax, %xmm0 2550; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 2551; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2552; AVX1-NEXT: retq 2553; 2554; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16: 2555; AVX2: # %bb.0: 2556; AVX2-NEXT: movswl (%rdi), %eax 2557; AVX2-NEXT: vmovd %eax, %xmm0 2558; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2559; AVX2-NEXT: retq 2560; 2561; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16: 2562; AVX512VL: # %bb.0: 2563; AVX512VL-NEXT: movswl (%rdi), %eax 2564; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 2565; AVX512VL-NEXT: retq 2566 %tmp = load i16, i16* %ptr, align 2 2567 %tmp1 = sext i16 %tmp to i32 2568 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 2569 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 2570 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer 2571 ret <8 x i16> %tmp4 2572} 2573 2574define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) { 2575; SSE-LABEL: insert_dup_elt1_mem_v8i16_i32: 2576; SSE: # %bb.0: 2577; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2578; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] 2579; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2580; SSE-NEXT: retq 2581; 2582; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32: 2583; AVX1: # %bb.0: 2584; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2585; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] 2586; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2587; AVX1-NEXT: retq 2588; 2589; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v8i16_i32: 2590; AVX2OR512VL: # %bb.0: 2591; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0 2592; AVX2OR512VL-NEXT: retq 2593 %tmp = load i32, i32* %ptr, align 4 2594 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2595 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 2596 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 2597 ret <8 x i16> %tmp3 2598} 2599 2600define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) { 2601; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32: 2602; SSE2: # %bb.0: 2603; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2604; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,0,1,4,5,6,7] 2605; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2606; SSE2-NEXT: retq 2607; 2608; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32: 2609; SSSE3: # %bb.0: 2610; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2611; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2612; SSSE3-NEXT: retq 2613; 2614; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32: 2615; SSE41: # %bb.0: 2616; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2617; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2618; SSE41-NEXT: retq 2619; 2620; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32: 2621; AVX1: # %bb.0: 2622; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2623; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2624; AVX1-NEXT: retq 2625; 2626; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v8i16_i32: 2627; AVX2OR512VL: # %bb.0: 2628; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0 2629; AVX2OR512VL-NEXT: retq 2630 %tmp = load i32, i32* %ptr, align 4 2631 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1 2632 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 2633 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2634 ret <8 x i16> %tmp3 2635} 2636 2637define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) { 2638; SSE-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2639; SSE: # %bb.0: 2640; SSE-NEXT: movswl (%rdi), %eax 2641; SSE-NEXT: movd %eax, %xmm0 2642; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] 2643; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2644; SSE-NEXT: retq 2645; 2646; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2647; AVX1: # %bb.0: 2648; AVX1-NEXT: movswl (%rdi), %eax 2649; AVX1-NEXT: vmovd %eax, %xmm0 2650; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] 2651; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2652; AVX1-NEXT: retq 2653; 2654; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2655; AVX2: # %bb.0: 2656; AVX2-NEXT: movswl (%rdi), %eax 2657; AVX2-NEXT: shrl $16, %eax 2658; AVX2-NEXT: vmovd %eax, %xmm0 2659; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2660; AVX2-NEXT: retq 2661; 2662; AVX512VL-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2663; AVX512VL: # %bb.0: 2664; AVX512VL-NEXT: movswl (%rdi), %eax 2665; AVX512VL-NEXT: shrl $16, %eax 2666; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 2667; AVX512VL-NEXT: retq 2668 %tmp = load i16, i16* %ptr, align 2 2669 %tmp1 = sext i16 %tmp to i32 2670 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 2671 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 2672 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 2673 ret <8 x i16> %tmp4 2674} 2675 2676define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) { 2677; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2678; SSE2: # %bb.0: 2679; SSE2-NEXT: movswl (%rdi), %eax 2680; SSE2-NEXT: movd %eax, %xmm0 2681; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,0,1,4,5,6,7] 2682; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2683; SSE2-NEXT: retq 2684; 2685; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2686; SSSE3: # %bb.0: 2687; SSSE3-NEXT: movswl (%rdi), %eax 2688; SSSE3-NEXT: movd %eax, %xmm0 2689; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2690; SSSE3-NEXT: retq 2691; 2692; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2693; SSE41: # %bb.0: 2694; SSE41-NEXT: movswl (%rdi), %eax 2695; SSE41-NEXT: movd %eax, %xmm0 2696; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2697; SSE41-NEXT: retq 2698; 2699; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2700; AVX1: # %bb.0: 2701; AVX1-NEXT: movswl (%rdi), %eax 2702; AVX1-NEXT: vmovd %eax, %xmm0 2703; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2704; AVX1-NEXT: retq 2705; 2706; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2707; AVX2: # %bb.0: 2708; AVX2-NEXT: movswl (%rdi), %eax 2709; AVX2-NEXT: shrl $16, %eax 2710; AVX2-NEXT: vmovd %eax, %xmm0 2711; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2712; AVX2-NEXT: retq 2713; 2714; AVX512VL-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2715; AVX512VL: # %bb.0: 2716; AVX512VL-NEXT: movswl (%rdi), %eax 2717; AVX512VL-NEXT: shrl $16, %eax 2718; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 2719; AVX512VL-NEXT: retq 2720 %tmp = load i16, i16* %ptr, align 2 2721 %tmp1 = sext i16 %tmp to i32 2722 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1 2723 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 2724 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2725 ret <8 x i16> %tmp4 2726} 2727