1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 7 8target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 9target triple = "x86_64-unknown-unknown" 10 11define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) { 12; SSE-LABEL: shuffle_v8i16_01012323: 13; SSE: # BB#0: 14; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 15; SSE-NEXT: retq 16; 17; AVX-LABEL: shuffle_v8i16_01012323: 18; AVX: # BB#0: 19; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 20; AVX-NEXT: retq 21 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3> 22 ret <8 x i16> %shuffle 23} 24define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) { 25; SSE-LABEL: shuffle_v8i16_67452301: 26; SSE: # BB#0: 27; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 28; SSE-NEXT: retq 29; 30; AVX-LABEL: shuffle_v8i16_67452301: 31; AVX: # BB#0: 32; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 33; AVX-NEXT: retq 34 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1> 35 ret <8 x i16> %shuffle 36} 37define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) { 38; SSE2-LABEL: shuffle_v8i16_456789AB: 39; SSE2: # BB#0: 40; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 41; SSE2-NEXT: retq 42; 43; SSSE3-LABEL: shuffle_v8i16_456789AB: 44; SSSE3: # BB#0: 45; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 46; SSSE3-NEXT: movdqa %xmm1, %xmm0 47; SSSE3-NEXT: retq 48; 49; SSE41-LABEL: shuffle_v8i16_456789AB: 50; SSE41: # BB#0: 51; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 52; SSE41-NEXT: movdqa %xmm1, %xmm0 53; SSE41-NEXT: retq 54; 55; AVX-LABEL: shuffle_v8i16_456789AB: 56; AVX: # BB#0: 57; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 58; AVX-NEXT: retq 59 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 60 ret <8 x i16> %shuffle 61} 62 63define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) { 64; SSE2-LABEL: shuffle_v8i16_00000000: 65; SSE2: # BB#0: 66; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 67; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 68; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 69; SSE2-NEXT: retq 70; 71; SSSE3-LABEL: shuffle_v8i16_00000000: 72; SSSE3: # BB#0: 73; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 74; SSSE3-NEXT: retq 75; 76; SSE41-LABEL: shuffle_v8i16_00000000: 77; SSE41: # BB#0: 78; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 79; SSE41-NEXT: retq 80; 81; AVX1-LABEL: shuffle_v8i16_00000000: 82; AVX1: # BB#0: 83; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 84; AVX1-NEXT: retq 85; 86; AVX2-LABEL: shuffle_v8i16_00000000: 87; AVX2: # BB#0: 88; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 89; AVX2-NEXT: retq 90 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 91 ret <8 x i16> %shuffle 92} 93define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) { 94; SSE-LABEL: shuffle_v8i16_00004444: 95; SSE: # BB#0: 96; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 97; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 98; SSE-NEXT: retq 99; 100; AVX-LABEL: shuffle_v8i16_00004444: 101; AVX: # BB#0: 102; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 103; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 104; AVX-NEXT: retq 105 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 106 ret <8 x i16> %shuffle 107} 108define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) { 109; SSE-LABEL: shuffle_v8i16_u0u1u2u3: 110; SSE: # BB#0: 111; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 112; SSE-NEXT: retq 113; 114; AVX-LABEL: shuffle_v8i16_u0u1u2u3: 115; AVX: # BB#0: 116; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 117; AVX-NEXT: retq 118 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3> 119 ret <8 x i16> %shuffle 120} 121define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) { 122; SSE-LABEL: shuffle_v8i16_u4u5u6u7: 123; SSE: # BB#0: 124; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 125; SSE-NEXT: retq 126; 127; AVX-LABEL: shuffle_v8i16_u4u5u6u7: 128; AVX: # BB#0: 129; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 130; AVX-NEXT: retq 131 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7> 132 ret <8 x i16> %shuffle 133} 134define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) { 135; SSE-LABEL: shuffle_v8i16_31206745: 136; SSE: # BB#0: 137; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 138; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 139; SSE-NEXT: retq 140; 141; AVX-LABEL: shuffle_v8i16_31206745: 142; AVX: # BB#0: 143; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 144; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 145; AVX-NEXT: retq 146 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5> 147 ret <8 x i16> %shuffle 148} 149define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) { 150; SSE2-LABEL: shuffle_v8i16_44440000: 151; SSE2: # BB#0: 152; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3] 153; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 154; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 155; SSE2-NEXT: retq 156; 157; SSSE3-LABEL: shuffle_v8i16_44440000: 158; SSSE3: # BB#0: 159; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 160; SSSE3-NEXT: retq 161; 162; SSE41-LABEL: shuffle_v8i16_44440000: 163; SSE41: # BB#0: 164; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 165; SSE41-NEXT: retq 166; 167; AVX-LABEL: shuffle_v8i16_44440000: 168; AVX: # BB#0: 169; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 170; AVX-NEXT: retq 171 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0> 172 ret <8 x i16> %shuffle 173} 174define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) { 175; SSE-LABEL: shuffle_v8i16_23016745: 176; SSE: # BB#0: 177; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 178; SSE-NEXT: retq 179; 180; AVX-LABEL: shuffle_v8i16_23016745: 181; AVX: # BB#0: 182; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 183; AVX-NEXT: retq 184 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> 185 ret <8 x i16> %shuffle 186} 187define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) { 188; SSE-LABEL: shuffle_v8i16_23026745: 189; SSE: # BB#0: 190; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 191; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 192; SSE-NEXT: retq 193; 194; AVX-LABEL: shuffle_v8i16_23026745: 195; AVX: # BB#0: 196; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 197; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 198; AVX-NEXT: retq 199 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5> 200 ret <8 x i16> %shuffle 201} 202define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) { 203; SSE-LABEL: shuffle_v8i16_23016747: 204; SSE: # BB#0: 205; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 206; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 207; SSE-NEXT: retq 208; 209; AVX-LABEL: shuffle_v8i16_23016747: 210; AVX: # BB#0: 211; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 212; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 213; AVX-NEXT: retq 214 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7> 215 ret <8 x i16> %shuffle 216} 217define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) { 218; SSE2-LABEL: shuffle_v8i16_75643120: 219; SSE2: # BB#0: 220; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 221; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 222; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 223; SSE2-NEXT: retq 224; 225; SSSE3-LABEL: shuffle_v8i16_75643120: 226; SSSE3: # BB#0: 227; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 228; SSSE3-NEXT: retq 229; 230; SSE41-LABEL: shuffle_v8i16_75643120: 231; SSE41: # BB#0: 232; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 233; SSE41-NEXT: retq 234; 235; AVX-LABEL: shuffle_v8i16_75643120: 236; AVX: # BB#0: 237; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 238; AVX-NEXT: retq 239 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0> 240 ret <8 x i16> %shuffle 241} 242 243define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) { 244; SSE2-LABEL: shuffle_v8i16_10545410: 245; SSE2: # BB#0: 246; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 247; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] 248; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] 249; SSE2-NEXT: retq 250; 251; SSSE3-LABEL: shuffle_v8i16_10545410: 252; SSSE3: # BB#0: 253; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 254; SSSE3-NEXT: retq 255; 256; SSE41-LABEL: shuffle_v8i16_10545410: 257; SSE41: # BB#0: 258; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 259; SSE41-NEXT: retq 260; 261; AVX-LABEL: shuffle_v8i16_10545410: 262; AVX: # BB#0: 263; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 264; AVX-NEXT: retq 265 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0> 266 ret <8 x i16> %shuffle 267} 268define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) { 269; SSE2-LABEL: shuffle_v8i16_54105410: 270; SSE2: # BB#0: 271; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 272; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 273; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] 274; SSE2-NEXT: retq 275; 276; SSSE3-LABEL: shuffle_v8i16_54105410: 277; SSSE3: # BB#0: 278; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 279; SSSE3-NEXT: retq 280; 281; SSE41-LABEL: shuffle_v8i16_54105410: 282; SSE41: # BB#0: 283; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 284; SSE41-NEXT: retq 285; 286; AVX-LABEL: shuffle_v8i16_54105410: 287; AVX: # BB#0: 288; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 289; AVX-NEXT: retq 290 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0> 291 ret <8 x i16> %shuffle 292} 293define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) { 294; SSE2-LABEL: shuffle_v8i16_54101054: 295; SSE2: # BB#0: 296; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 297; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 298; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 299; SSE2-NEXT: retq 300; 301; SSSE3-LABEL: shuffle_v8i16_54101054: 302; SSSE3: # BB#0: 303; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 304; SSSE3-NEXT: retq 305; 306; SSE41-LABEL: shuffle_v8i16_54101054: 307; SSE41: # BB#0: 308; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 309; SSE41-NEXT: retq 310; 311; AVX-LABEL: shuffle_v8i16_54101054: 312; AVX: # BB#0: 313; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 314; AVX-NEXT: retq 315 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4> 316 ret <8 x i16> %shuffle 317} 318define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) { 319; SSE2-LABEL: shuffle_v8i16_04400440: 320; SSE2: # BB#0: 321; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 322; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 323; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6] 324; SSE2-NEXT: retq 325; 326; SSSE3-LABEL: shuffle_v8i16_04400440: 327; SSSE3: # BB#0: 328; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 329; SSSE3-NEXT: retq 330; 331; SSE41-LABEL: shuffle_v8i16_04400440: 332; SSE41: # BB#0: 333; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 334; SSE41-NEXT: retq 335; 336; AVX-LABEL: shuffle_v8i16_04400440: 337; AVX: # BB#0: 338; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 339; AVX-NEXT: retq 340 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0> 341 ret <8 x i16> %shuffle 342} 343define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) { 344; SSE2-LABEL: shuffle_v8i16_40044004: 345; SSE2: # BB#0: 346; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 347; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7] 348; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4] 349; SSE2-NEXT: retq 350; 351; SSSE3-LABEL: shuffle_v8i16_40044004: 352; SSSE3: # BB#0: 353; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 354; SSSE3-NEXT: retq 355; 356; SSE41-LABEL: shuffle_v8i16_40044004: 357; SSE41: # BB#0: 358; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 359; SSE41-NEXT: retq 360; 361; AVX-LABEL: shuffle_v8i16_40044004: 362; AVX: # BB#0: 363; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 364; AVX-NEXT: retq 365 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4> 366 ret <8 x i16> %shuffle 367} 368 369define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) { 370; SSE2-LABEL: shuffle_v8i16_26405173: 371; SSE2: # BB#0: 372; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 373; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 374; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 375; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] 376; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7] 377; SSE2-NEXT: retq 378; 379; SSSE3-LABEL: shuffle_v8i16_26405173: 380; SSSE3: # BB#0: 381; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 382; SSSE3-NEXT: retq 383; 384; SSE41-LABEL: shuffle_v8i16_26405173: 385; SSE41: # BB#0: 386; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 387; SSE41-NEXT: retq 388; 389; AVX-LABEL: shuffle_v8i16_26405173: 390; AVX: # BB#0: 391; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 392; AVX-NEXT: retq 393 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3> 394 ret <8 x i16> %shuffle 395} 396define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) { 397; SSE2-LABEL: shuffle_v8i16_20645173: 398; SSE2: # BB#0: 399; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 400; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 401; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 402; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7] 403; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7] 404; SSE2-NEXT: retq 405; 406; SSSE3-LABEL: shuffle_v8i16_20645173: 407; SSSE3: # BB#0: 408; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 409; SSSE3-NEXT: retq 410; 411; SSE41-LABEL: shuffle_v8i16_20645173: 412; SSE41: # BB#0: 413; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 414; SSE41-NEXT: retq 415; 416; AVX-LABEL: shuffle_v8i16_20645173: 417; AVX: # BB#0: 418; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 419; AVX-NEXT: retq 420 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3> 421 ret <8 x i16> %shuffle 422} 423define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) { 424; SSE2-LABEL: shuffle_v8i16_26401375: 425; SSE2: # BB#0: 426; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 427; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 428; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2] 429; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] 430; SSE2-NEXT: retq 431; 432; SSSE3-LABEL: shuffle_v8i16_26401375: 433; SSSE3: # BB#0: 434; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 435; SSSE3-NEXT: retq 436; 437; SSE41-LABEL: shuffle_v8i16_26401375: 438; SSE41: # BB#0: 439; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 440; SSE41-NEXT: retq 441; 442; AVX-LABEL: shuffle_v8i16_26401375: 443; AVX: # BB#0: 444; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 445; AVX-NEXT: retq 446 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5> 447 ret <8 x i16> %shuffle 448} 449 450define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) { 451; SSE2-LABEL: shuffle_v8i16_66751643: 452; SSE2: # BB#0: 453; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7] 454; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] 455; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0] 456; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7] 457; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6] 458; SSE2-NEXT: retq 459; 460; SSSE3-LABEL: shuffle_v8i16_66751643: 461; SSSE3: # BB#0: 462; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 463; SSSE3-NEXT: retq 464; 465; SSE41-LABEL: shuffle_v8i16_66751643: 466; SSE41: # BB#0: 467; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 468; SSE41-NEXT: retq 469; 470; AVX-LABEL: shuffle_v8i16_66751643: 471; AVX: # BB#0: 472; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 473; AVX-NEXT: retq 474 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3> 475 ret <8 x i16> %shuffle 476} 477 478define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) { 479; SSE2-LABEL: shuffle_v8i16_60514754: 480; SSE2: # BB#0: 481; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] 482; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 483; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 484; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6] 485; SSE2-NEXT: retq 486; 487; SSSE3-LABEL: shuffle_v8i16_60514754: 488; SSSE3: # BB#0: 489; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] 490; SSSE3-NEXT: retq 491; 492; SSE41-LABEL: shuffle_v8i16_60514754: 493; SSE41: # BB#0: 494; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] 495; SSE41-NEXT: retq 496; 497; AVX-LABEL: shuffle_v8i16_60514754: 498; AVX: # BB#0: 499; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] 500; AVX-NEXT: retq 501 %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4> 502 ret <8 x i16> %shuffle 503} 504 505define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) { 506; SSE2-LABEL: shuffle_v8i16_00444444: 507; SSE2: # BB#0: 508; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 509; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 510; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 511; SSE2-NEXT: retq 512; 513; SSSE3-LABEL: shuffle_v8i16_00444444: 514; SSSE3: # BB#0: 515; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 516; SSSE3-NEXT: retq 517; 518; SSE41-LABEL: shuffle_v8i16_00444444: 519; SSE41: # BB#0: 520; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 521; SSE41-NEXT: retq 522; 523; AVX-LABEL: shuffle_v8i16_00444444: 524; AVX: # BB#0: 525; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 526; AVX-NEXT: retq 527 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 528 ret <8 x i16> %shuffle 529} 530define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) { 531; SSE2-LABEL: shuffle_v8i16_44004444: 532; SSE2: # BB#0: 533; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 534; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7] 535; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 536; SSE2-NEXT: retq 537; 538; SSSE3-LABEL: shuffle_v8i16_44004444: 539; SSSE3: # BB#0: 540; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 541; SSSE3-NEXT: retq 542; 543; SSE41-LABEL: shuffle_v8i16_44004444: 544; SSE41: # BB#0: 545; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 546; SSE41-NEXT: retq 547; 548; AVX-LABEL: shuffle_v8i16_44004444: 549; AVX: # BB#0: 550; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 551; AVX-NEXT: retq 552 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 553 ret <8 x i16> %shuffle 554} 555define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) { 556; SSE2-LABEL: shuffle_v8i16_04404444: 557; SSE2: # BB#0: 558; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 559; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 560; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 561; SSE2-NEXT: retq 562; 563; SSSE3-LABEL: shuffle_v8i16_04404444: 564; SSSE3: # BB#0: 565; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 566; SSSE3-NEXT: retq 567; 568; SSE41-LABEL: shuffle_v8i16_04404444: 569; SSE41: # BB#0: 570; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 571; SSE41-NEXT: retq 572; 573; AVX-LABEL: shuffle_v8i16_04404444: 574; AVX: # BB#0: 575; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 576; AVX-NEXT: retq 577 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4> 578 ret <8 x i16> %shuffle 579} 580define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) { 581; SSE2-LABEL: shuffle_v8i16_04400000: 582; SSE2: # BB#0: 583; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3] 584; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 585; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 586; SSE2-NEXT: retq 587; 588; SSSE3-LABEL: shuffle_v8i16_04400000: 589; SSSE3: # BB#0: 590; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 591; SSSE3-NEXT: retq 592; 593; SSE41-LABEL: shuffle_v8i16_04400000: 594; SSE41: # BB#0: 595; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 596; SSE41-NEXT: retq 597; 598; AVX-LABEL: shuffle_v8i16_04400000: 599; AVX: # BB#0: 600; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 601; AVX-NEXT: retq 602 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0> 603 ret <8 x i16> %shuffle 604} 605define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) { 606; SSE-LABEL: shuffle_v8i16_04404567: 607; SSE: # BB#0: 608; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 609; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 610; SSE-NEXT: retq 611; 612; AVX-LABEL: shuffle_v8i16_04404567: 613; AVX: # BB#0: 614; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 615; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 616; AVX-NEXT: retq 617 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7> 618 ret <8 x i16> %shuffle 619} 620 621define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) { 622; SSE2-LABEL: shuffle_v8i16_0X444444: 623; SSE2: # BB#0: 624; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 625; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7] 626; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 627; SSE2-NEXT: retq 628; 629; SSSE3-LABEL: shuffle_v8i16_0X444444: 630; SSSE3: # BB#0: 631; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 632; SSSE3-NEXT: retq 633; 634; SSE41-LABEL: shuffle_v8i16_0X444444: 635; SSE41: # BB#0: 636; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 637; SSE41-NEXT: retq 638; 639; AVX-LABEL: shuffle_v8i16_0X444444: 640; AVX: # BB#0: 641; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 642; AVX-NEXT: retq 643 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 644 ret <8 x i16> %shuffle 645} 646define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) { 647; SSE2-LABEL: shuffle_v8i16_44X04444: 648; SSE2: # BB#0: 649; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 650; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7] 651; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 652; SSE2-NEXT: retq 653; 654; SSSE3-LABEL: shuffle_v8i16_44X04444: 655; SSSE3: # BB#0: 656; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 657; SSSE3-NEXT: retq 658; 659; SSE41-LABEL: shuffle_v8i16_44X04444: 660; SSE41: # BB#0: 661; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 662; SSE41-NEXT: retq 663; 664; AVX-LABEL: shuffle_v8i16_44X04444: 665; AVX: # BB#0: 666; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 667; AVX-NEXT: retq 668 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4> 669 ret <8 x i16> %shuffle 670} 671define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) { 672; SSE2-LABEL: shuffle_v8i16_X4404444: 673; SSE2: # BB#0: 674; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 675; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 676; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 677; SSE2-NEXT: retq 678; 679; SSSE3-LABEL: shuffle_v8i16_X4404444: 680; SSSE3: # BB#0: 681; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 682; SSSE3-NEXT: retq 683; 684; SSE41-LABEL: shuffle_v8i16_X4404444: 685; SSE41: # BB#0: 686; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 687; SSE41-NEXT: retq 688; 689; AVX-LABEL: shuffle_v8i16_X4404444: 690; AVX: # BB#0: 691; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 692; AVX-NEXT: retq 693 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4> 694 ret <8 x i16> %shuffle 695} 696 697define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) { 698; SSE2-LABEL: shuffle_v8i16_0127XXXX: 699; SSE2: # BB#0: 700; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 701; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] 702; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 703; SSE2-NEXT: retq 704; 705; SSSE3-LABEL: shuffle_v8i16_0127XXXX: 706; SSSE3: # BB#0: 707; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 708; SSSE3-NEXT: retq 709; 710; SSE41-LABEL: shuffle_v8i16_0127XXXX: 711; SSE41: # BB#0: 712; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 713; SSE41-NEXT: retq 714; 715; AVX-LABEL: shuffle_v8i16_0127XXXX: 716; AVX: # BB#0: 717; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 718; AVX-NEXT: retq 719 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 720 ret <8 x i16> %shuffle 721} 722 723define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) { 724; SSE2-LABEL: shuffle_v8i16_XXXX4563: 725; SSE2: # BB#0: 726; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 727; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] 728; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0] 729; SSE2-NEXT: retq 730; 731; SSSE3-LABEL: shuffle_v8i16_XXXX4563: 732; SSSE3: # BB#0: 733; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 734; SSSE3-NEXT: retq 735; 736; SSE41-LABEL: shuffle_v8i16_XXXX4563: 737; SSE41: # BB#0: 738; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 739; SSE41-NEXT: retq 740; 741; AVX-LABEL: shuffle_v8i16_XXXX4563: 742; AVX: # BB#0: 743; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 744; AVX-NEXT: retq 745 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3> 746 ret <8 x i16> %shuffle 747} 748 749define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) { 750; SSE2-LABEL: shuffle_v8i16_4563XXXX: 751; SSE2: # BB#0: 752; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 753; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] 754; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3] 755; SSE2-NEXT: retq 756; 757; SSSE3-LABEL: shuffle_v8i16_4563XXXX: 758; SSSE3: # BB#0: 759; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 760; SSSE3-NEXT: retq 761; 762; SSE41-LABEL: shuffle_v8i16_4563XXXX: 763; SSE41: # BB#0: 764; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 765; SSE41-NEXT: retq 766; 767; AVX-LABEL: shuffle_v8i16_4563XXXX: 768; AVX: # BB#0: 769; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 770; AVX-NEXT: retq 771 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 772 ret <8 x i16> %shuffle 773} 774 775define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) { 776; SSE2-LABEL: shuffle_v8i16_01274563: 777; SSE2: # BB#0: 778; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 779; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] 780; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2] 781; SSE2-NEXT: retq 782; 783; SSSE3-LABEL: shuffle_v8i16_01274563: 784; SSSE3: # BB#0: 785; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 786; SSSE3-NEXT: retq 787; 788; SSE41-LABEL: shuffle_v8i16_01274563: 789; SSE41: # BB#0: 790; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 791; SSE41-NEXT: retq 792; 793; AVX-LABEL: shuffle_v8i16_01274563: 794; AVX: # BB#0: 795; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 796; AVX-NEXT: retq 797 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3> 798 ret <8 x i16> %shuffle 799} 800 801define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) { 802; SSE2-LABEL: shuffle_v8i16_45630127: 803; SSE2: # BB#0: 804; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 805; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 806; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1] 807; SSE2-NEXT: retq 808; 809; SSSE3-LABEL: shuffle_v8i16_45630127: 810; SSSE3: # BB#0: 811; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 812; SSSE3-NEXT: retq 813; 814; SSE41-LABEL: shuffle_v8i16_45630127: 815; SSE41: # BB#0: 816; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 817; SSE41-NEXT: retq 818; 819; AVX-LABEL: shuffle_v8i16_45630127: 820; AVX: # BB#0: 821; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 822; AVX-NEXT: retq 823 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7> 824 ret <8 x i16> %shuffle 825} 826 827define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) { 828; SSE2-LABEL: shuffle_v8i16_37102735: 829; SSE2: # BB#0: 830; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] 831; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 832; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 833; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 834; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 835; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6] 836; SSE2-NEXT: retq 837; 838; SSSE3-LABEL: shuffle_v8i16_37102735: 839; SSSE3: # BB#0: 840; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 841; SSSE3-NEXT: retq 842; 843; SSE41-LABEL: shuffle_v8i16_37102735: 844; SSE41: # BB#0: 845; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 846; SSE41-NEXT: retq 847; 848; AVX-LABEL: shuffle_v8i16_37102735: 849; AVX: # BB#0: 850; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 851; AVX-NEXT: retq 852 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5> 853 ret <8 x i16> %shuffle 854} 855 856define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) { 857; SSE-LABEL: shuffle_v8i16_08192a3b: 858; SSE: # BB#0: 859; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 860; SSE-NEXT: retq 861; 862; AVX-LABEL: shuffle_v8i16_08192a3b: 863; AVX: # BB#0: 864; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 865; AVX-NEXT: retq 866 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 867 ret <8 x i16> %shuffle 868} 869 870define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) { 871; SSE-LABEL: shuffle_v8i16_0c1d2e3f: 872; SSE: # BB#0: 873; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 874; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 875; SSE-NEXT: retq 876; 877; AVX-LABEL: shuffle_v8i16_0c1d2e3f: 878; AVX: # BB#0: 879; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 880; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 881; AVX-NEXT: retq 882 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15> 883 ret <8 x i16> %shuffle 884} 885 886define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) { 887; SSE-LABEL: shuffle_v8i16_4c5d6e7f: 888; SSE: # BB#0: 889; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 890; SSE-NEXT: retq 891; 892; AVX-LABEL: shuffle_v8i16_4c5d6e7f: 893; AVX: # BB#0: 894; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 895; AVX-NEXT: retq 896 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 897 ret <8 x i16> %shuffle 898} 899 900define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) { 901; SSE-LABEL: shuffle_v8i16_48596a7b: 902; SSE: # BB#0: 903; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 904; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 905; SSE-NEXT: retq 906; 907; AVX-LABEL: shuffle_v8i16_48596a7b: 908; AVX: # BB#0: 909; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 910; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 911; AVX-NEXT: retq 912 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11> 913 ret <8 x i16> %shuffle 914} 915 916define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) { 917; SSE-LABEL: shuffle_v8i16_08196e7f: 918; SSE: # BB#0: 919; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] 920; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 921; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 922; SSE-NEXT: retq 923; 924; AVX-LABEL: shuffle_v8i16_08196e7f: 925; AVX: # BB#0: 926; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] 927; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 928; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 929; AVX-NEXT: retq 930 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15> 931 ret <8 x i16> %shuffle 932} 933 934define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) { 935; SSE-LABEL: shuffle_v8i16_0c1d6879: 936; SSE: # BB#0: 937; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3] 938; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 939; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 940; SSE-NEXT: retq 941; 942; AVX-LABEL: shuffle_v8i16_0c1d6879: 943; AVX: # BB#0: 944; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3] 945; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 946; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 947; AVX-NEXT: retq 948 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9> 949 ret <8 x i16> %shuffle 950} 951 952define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) { 953; SSE-LABEL: shuffle_v8i16_109832ba: 954; SSE: # BB#0: 955; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 956; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 957; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 958; SSE-NEXT: retq 959; 960; AVX-LABEL: shuffle_v8i16_109832ba: 961; AVX: # BB#0: 962; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 963; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 964; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 965; AVX-NEXT: retq 966 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10> 967 ret <8 x i16> %shuffle 968} 969 970define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) { 971; SSE-LABEL: shuffle_v8i16_8091a2b3: 972; SSE: # BB#0: 973; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 974; SSE-NEXT: movdqa %xmm1, %xmm0 975; SSE-NEXT: retq 976; 977; AVX-LABEL: shuffle_v8i16_8091a2b3: 978; AVX: # BB#0: 979; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 980; AVX-NEXT: retq 981 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3> 982 ret <8 x i16> %shuffle 983} 984define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) { 985; SSE-LABEL: shuffle_v8i16_c4d5e6f7: 986; SSE: # BB#0: 987; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 988; SSE-NEXT: movdqa %xmm1, %xmm0 989; SSE-NEXT: retq 990; 991; AVX-LABEL: shuffle_v8i16_c4d5e6f7: 992; AVX: # BB#0: 993; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 994; AVX-NEXT: retq 995 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7> 996 ret <8 x i16> %shuffle 997} 998 999define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) { 1000; SSE-LABEL: shuffle_v8i16_0213cedf: 1001; SSE: # BB#0: 1002; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1003; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1004; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] 1005; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1006; SSE-NEXT: retq 1007; 1008; AVX-LABEL: shuffle_v8i16_0213cedf: 1009; AVX: # BB#0: 1010; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1011; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1012; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] 1013; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1014; AVX-NEXT: retq 1015 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15> 1016 ret <8 x i16> %shuffle 1017} 1018 1019define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) { 1020; SSE2-LABEL: shuffle_v8i16_443aXXXX: 1021; SSE2: # BB#0: 1022; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535] 1023; SSE2-NEXT: pand %xmm2, %xmm0 1024; SSE2-NEXT: pandn %xmm1, %xmm2 1025; SSE2-NEXT: por %xmm0, %xmm2 1026; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3] 1027; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1028; SSE2-NEXT: retq 1029; 1030; SSSE3-LABEL: shuffle_v8i16_443aXXXX: 1031; SSSE3: # BB#0: 1032; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u] 1033; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u] 1034; SSSE3-NEXT: por %xmm1, %xmm0 1035; SSSE3-NEXT: retq 1036; 1037; SSE41-LABEL: shuffle_v8i16_443aXXXX: 1038; SSE41: # BB#0: 1039; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1040; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1041; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1042; SSE41-NEXT: retq 1043; 1044; AVX-LABEL: shuffle_v8i16_443aXXXX: 1045; AVX: # BB#0: 1046; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1047; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1048; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1049; AVX-NEXT: retq 1050 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef> 1051 ret <8 x i16> %shuffle 1052} 1053 1054define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) { 1055; SSE2-LABEL: shuffle_v8i16_032dXXXX: 1056; SSE2: # BB#0: 1057; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1058; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0] 1059; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7] 1060; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1061; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 1062; SSE2-NEXT: retq 1063; 1064; SSSE3-LABEL: shuffle_v8i16_032dXXXX: 1065; SSSE3: # BB#0: 1066; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u] 1067; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 1068; SSSE3-NEXT: por %xmm1, %xmm0 1069; SSSE3-NEXT: retq 1070; 1071; SSE41-LABEL: shuffle_v8i16_032dXXXX: 1072; SSE41: # BB#0: 1073; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1074; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 1075; SSE41-NEXT: retq 1076; 1077; AVX1-LABEL: shuffle_v8i16_032dXXXX: 1078; AVX1: # BB#0: 1079; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1080; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 1081; AVX1-NEXT: retq 1082; 1083; AVX2-LABEL: shuffle_v8i16_032dXXXX: 1084; AVX2: # BB#0: 1085; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1086; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 1087; AVX2-NEXT: retq 1088 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 1089 ret <8 x i16> %shuffle 1090} 1091define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) { 1092; SSE-LABEL: shuffle_v8i16_XXXdXXXX: 1093; SSE: # BB#0: 1094; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] 1095; SSE-NEXT: retq 1096; 1097; AVX-LABEL: shuffle_v8i16_XXXdXXXX: 1098; AVX: # BB#0: 1099; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] 1100; AVX-NEXT: retq 1101 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 1102 ret <8 x i16> %shuffle 1103} 1104 1105define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) { 1106; SSE2-LABEL: shuffle_v8i16_012dXXXX: 1107; SSE2: # BB#0: 1108; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535] 1109; SSE2-NEXT: pand %xmm2, %xmm0 1110; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1111; SSE2-NEXT: pandn %xmm1, %xmm2 1112; SSE2-NEXT: por %xmm2, %xmm0 1113; SSE2-NEXT: retq 1114; 1115; SSSE3-LABEL: shuffle_v8i16_012dXXXX: 1116; SSSE3: # BB#0: 1117; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u] 1118; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 1119; SSSE3-NEXT: por %xmm1, %xmm0 1120; SSSE3-NEXT: retq 1121; 1122; SSE41-LABEL: shuffle_v8i16_012dXXXX: 1123; SSE41: # BB#0: 1124; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1125; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 1126; SSE41-NEXT: retq 1127; 1128; AVX-LABEL: shuffle_v8i16_012dXXXX: 1129; AVX: # BB#0: 1130; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1131; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 1132; AVX-NEXT: retq 1133 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 1134 ret <8 x i16> %shuffle 1135} 1136 1137define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) { 1138; SSE2-LABEL: shuffle_v8i16_XXXXcde3: 1139; SSE2: # BB#0: 1140; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0] 1141; SSE2-NEXT: pand %xmm2, %xmm1 1142; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1143; SSE2-NEXT: pandn %xmm0, %xmm2 1144; SSE2-NEXT: por %xmm1, %xmm2 1145; SSE2-NEXT: movdqa %xmm2, %xmm0 1146; SSE2-NEXT: retq 1147; 1148; SSSE3-LABEL: shuffle_v8i16_XXXXcde3: 1149; SSSE3: # BB#0: 1150; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7] 1151; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero 1152; SSSE3-NEXT: por %xmm1, %xmm0 1153; SSSE3-NEXT: retq 1154; 1155; SSE41-LABEL: shuffle_v8i16_XXXXcde3: 1156; SSE41: # BB#0: 1157; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1158; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 1159; SSE41-NEXT: retq 1160; 1161; AVX1-LABEL: shuffle_v8i16_XXXXcde3: 1162; AVX1: # BB#0: 1163; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1164; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 1165; AVX1-NEXT: retq 1166; 1167; AVX2-LABEL: shuffle_v8i16_XXXXcde3: 1168; AVX2: # BB#0: 1169; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 1170; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 1171; AVX2-NEXT: retq 1172 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3> 1173 ret <8 x i16> %shuffle 1174} 1175 1176define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) { 1177; SSE2-LABEL: shuffle_v8i16_cde3XXXX: 1178; SSE2: # BB#0: 1179; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535] 1180; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1181; SSE2-NEXT: pand %xmm2, %xmm1 1182; SSE2-NEXT: pandn %xmm0, %xmm2 1183; SSE2-NEXT: por %xmm1, %xmm2 1184; SSE2-NEXT: movdqa %xmm2, %xmm0 1185; SSE2-NEXT: retq 1186; 1187; SSSE3-LABEL: shuffle_v8i16_cde3XXXX: 1188; SSSE3: # BB#0: 1189; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u] 1190; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u] 1191; SSSE3-NEXT: por %xmm1, %xmm0 1192; SSSE3-NEXT: retq 1193; 1194; SSE41-LABEL: shuffle_v8i16_cde3XXXX: 1195; SSE41: # BB#0: 1196; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1197; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] 1198; SSE41-NEXT: retq 1199; 1200; AVX-LABEL: shuffle_v8i16_cde3XXXX: 1201; AVX: # BB#0: 1202; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1203; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] 1204; AVX-NEXT: retq 1205 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 1206 ret <8 x i16> %shuffle 1207} 1208 1209define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) { 1210; SSE2-LABEL: shuffle_v8i16_012dcde3: 1211; SSE2: # BB#0: 1212; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1213; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,2,1] 1214; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 1215; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 1216; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 1217; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7] 1218; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 1219; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7] 1220; SSE2-NEXT: retq 1221; 1222; SSSE3-LABEL: shuffle_v8i16_012dcde3: 1223; SSSE3: # BB#0: 1224; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero 1225; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7] 1226; SSSE3-NEXT: por %xmm1, %xmm0 1227; SSSE3-NEXT: retq 1228; 1229; SSE41-LABEL: shuffle_v8i16_012dcde3: 1230; SSE41: # BB#0: 1231; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1232; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 1233; SSE41-NEXT: retq 1234; 1235; AVX1-LABEL: shuffle_v8i16_012dcde3: 1236; AVX1: # BB#0: 1237; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1238; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 1239; AVX1-NEXT: retq 1240; 1241; AVX2-LABEL: shuffle_v8i16_012dcde3: 1242; AVX2: # BB#0: 1243; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1244; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 1245; AVX2-NEXT: retq 1246 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3> 1247 ret <8 x i16> %shuffle 1248} 1249 1250define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) { 1251; SSE2-LABEL: shuffle_v8i16_0923cde7: 1252; SSE2: # BB#0: 1253; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535] 1254; SSE2-NEXT: andps %xmm2, %xmm0 1255; SSE2-NEXT: andnps %xmm1, %xmm2 1256; SSE2-NEXT: orps %xmm2, %xmm0 1257; SSE2-NEXT: retq 1258; 1259; SSSE3-LABEL: shuffle_v8i16_0923cde7: 1260; SSSE3: # BB#0: 1261; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535] 1262; SSSE3-NEXT: andps %xmm2, %xmm0 1263; SSSE3-NEXT: andnps %xmm1, %xmm2 1264; SSSE3-NEXT: orps %xmm2, %xmm0 1265; SSSE3-NEXT: retq 1266; 1267; SSE41-LABEL: shuffle_v8i16_0923cde7: 1268; SSE41: # BB#0: 1269; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7] 1270; SSE41-NEXT: retq 1271; 1272; AVX-LABEL: shuffle_v8i16_0923cde7: 1273; AVX: # BB#0: 1274; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7] 1275; AVX-NEXT: retq 1276 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7> 1277 ret <8 x i16> %shuffle 1278} 1279 1280define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) { 1281; SSE2-LABEL: shuffle_v8i16_XXX1X579: 1282; SSE2: # BB#0: 1283; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0] 1284; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0] 1285; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1286; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1287; SSE2-NEXT: pand %xmm1, %xmm0 1288; SSE2-NEXT: pandn %xmm2, %xmm1 1289; SSE2-NEXT: por %xmm0, %xmm1 1290; SSE2-NEXT: movdqa %xmm1, %xmm0 1291; SSE2-NEXT: retq 1292; 1293; SSSE3-LABEL: shuffle_v8i16_XXX1X579: 1294; SSSE3: # BB#0: 1295; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3] 1296; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero 1297; SSSE3-NEXT: por %xmm1, %xmm0 1298; SSSE3-NEXT: retq 1299; 1300; SSE41-LABEL: shuffle_v8i16_XXX1X579: 1301; SSE41: # BB#0: 1302; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 1303; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1304; SSE41-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1305; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1306; SSE41-NEXT: retq 1307; 1308; AVX1-LABEL: shuffle_v8i16_XXX1X579: 1309; AVX1: # BB#0: 1310; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 1311; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1312; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1313; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1314; AVX1-NEXT: retq 1315; 1316; AVX2-LABEL: shuffle_v8i16_XXX1X579: 1317; AVX2: # BB#0: 1318; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 1319; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1320; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1321; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1322; AVX2-NEXT: retq 1323 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9> 1324 ret <8 x i16> %shuffle 1325} 1326 1327define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) { 1328; SSE2-LABEL: shuffle_v8i16_XX4X8acX: 1329; SSE2: # BB#0: 1330; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3] 1331; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7] 1332; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0] 1333; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 1334; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1335; SSE2-NEXT: retq 1336; 1337; SSSE3-LABEL: shuffle_v8i16_XX4X8acX: 1338; SSSE3: # BB#0: 1339; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u] 1340; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u] 1341; SSSE3-NEXT: por %xmm1, %xmm0 1342; SSSE3-NEXT: retq 1343; 1344; SSE41-LABEL: shuffle_v8i16_XX4X8acX: 1345; SSE41: # BB#0: 1346; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 1347; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1348; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1349; SSE41-NEXT: retq 1350; 1351; AVX1-LABEL: shuffle_v8i16_XX4X8acX: 1352; AVX1: # BB#0: 1353; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 1354; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1355; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1356; AVX1-NEXT: retq 1357; 1358; AVX2-LABEL: shuffle_v8i16_XX4X8acX: 1359; AVX2: # BB#0: 1360; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 1361; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1362; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1363; AVX2-NEXT: retq 1364 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef> 1365 ret <8 x i16> %shuffle 1366} 1367 1368define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) { 1369; SSE-LABEL: shuffle_v8i16_8zzzzzzz: 1370; SSE: # BB#0: 1371; SSE-NEXT: movzwl %di, %eax 1372; SSE-NEXT: movd %eax, %xmm0 1373; SSE-NEXT: retq 1374; 1375; AVX-LABEL: shuffle_v8i16_8zzzzzzz: 1376; AVX: # BB#0: 1377; AVX-NEXT: movzwl %di, %eax 1378; AVX-NEXT: vmovd %eax, %xmm0 1379; AVX-NEXT: retq 1380 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1381 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1382 ret <8 x i16> %shuffle 1383} 1384 1385define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) { 1386; SSE-LABEL: shuffle_v8i16_z8zzzzzz: 1387; SSE: # BB#0: 1388; SSE-NEXT: pxor %xmm0, %xmm0 1389; SSE-NEXT: pinsrw $1, %edi, %xmm0 1390; SSE-NEXT: retq 1391; 1392; AVX-LABEL: shuffle_v8i16_z8zzzzzz: 1393; AVX: # BB#0: 1394; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1395; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 1396; AVX-NEXT: retq 1397 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1398 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3> 1399 ret <8 x i16> %shuffle 1400} 1401 1402define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) { 1403; SSE-LABEL: shuffle_v8i16_zzzzz8zz: 1404; SSE: # BB#0: 1405; SSE-NEXT: pxor %xmm0, %xmm0 1406; SSE-NEXT: pinsrw $5, %edi, %xmm0 1407; SSE-NEXT: retq 1408; 1409; AVX-LABEL: shuffle_v8i16_zzzzz8zz: 1410; AVX: # BB#0: 1411; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1412; AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 1413; AVX-NEXT: retq 1414 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1415 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0> 1416 ret <8 x i16> %shuffle 1417} 1418 1419define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) { 1420; SSE-LABEL: shuffle_v8i16_zuuzuuz8: 1421; SSE: # BB#0: 1422; SSE-NEXT: pxor %xmm0, %xmm0 1423; SSE-NEXT: pinsrw $7, %edi, %xmm0 1424; SSE-NEXT: retq 1425; 1426; AVX-LABEL: shuffle_v8i16_zuuzuuz8: 1427; AVX: # BB#0: 1428; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1429; AVX-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 1430; AVX-NEXT: retq 1431 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1432 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8> 1433 ret <8 x i16> %shuffle 1434} 1435 1436define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) { 1437; SSE-LABEL: shuffle_v8i16_zzBzzzzz: 1438; SSE: # BB#0: 1439; SSE-NEXT: pxor %xmm0, %xmm0 1440; SSE-NEXT: pinsrw $2, %edi, %xmm0 1441; SSE-NEXT: retq 1442; 1443; AVX-LABEL: shuffle_v8i16_zzBzzzzz: 1444; AVX: # BB#0: 1445; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1446; AVX-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0 1447; AVX-NEXT: retq 1448 %a = insertelement <8 x i16> undef, i16 %i, i32 3 1449 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7> 1450 ret <8 x i16> %shuffle 1451} 1452 1453define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) { 1454; SSE2-LABEL: shuffle_v8i16_def01234: 1455; SSE2: # BB#0: 1456; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1457; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1458; SSE2-NEXT: por %xmm1, %xmm0 1459; SSE2-NEXT: retq 1460; 1461; SSSE3-LABEL: shuffle_v8i16_def01234: 1462; SSSE3: # BB#0: 1463; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1464; SSSE3-NEXT: retq 1465; 1466; SSE41-LABEL: shuffle_v8i16_def01234: 1467; SSE41: # BB#0: 1468; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1469; SSE41-NEXT: retq 1470; 1471; AVX-LABEL: shuffle_v8i16_def01234: 1472; AVX: # BB#0: 1473; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1474; AVX-NEXT: retq 1475 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4> 1476 ret <8 x i16> %shuffle 1477} 1478 1479define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) { 1480; SSE2-LABEL: shuffle_v8i16_ueuu123u: 1481; SSE2: # BB#0: 1482; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1483; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1484; SSE2-NEXT: por %xmm1, %xmm0 1485; SSE2-NEXT: retq 1486; 1487; SSSE3-LABEL: shuffle_v8i16_ueuu123u: 1488; SSSE3: # BB#0: 1489; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1490; SSSE3-NEXT: retq 1491; 1492; SSE41-LABEL: shuffle_v8i16_ueuu123u: 1493; SSE41: # BB#0: 1494; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1495; SSE41-NEXT: retq 1496; 1497; AVX-LABEL: shuffle_v8i16_ueuu123u: 1498; AVX: # BB#0: 1499; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1500; AVX-NEXT: retq 1501 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef> 1502 ret <8 x i16> %shuffle 1503} 1504 1505define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) { 1506; SSE2-LABEL: shuffle_v8i16_56701234: 1507; SSE2: # BB#0: 1508; SSE2-NEXT: movdqa %xmm0, %xmm1 1509; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1510; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1511; SSE2-NEXT: por %xmm1, %xmm0 1512; SSE2-NEXT: retq 1513; 1514; SSSE3-LABEL: shuffle_v8i16_56701234: 1515; SSSE3: # BB#0: 1516; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1517; SSSE3-NEXT: retq 1518; 1519; SSE41-LABEL: shuffle_v8i16_56701234: 1520; SSE41: # BB#0: 1521; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1522; SSE41-NEXT: retq 1523; 1524; AVX-LABEL: shuffle_v8i16_56701234: 1525; AVX: # BB#0: 1526; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1527; AVX-NEXT: retq 1528 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4> 1529 ret <8 x i16> %shuffle 1530} 1531 1532define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) { 1533; SSE2-LABEL: shuffle_v8i16_u6uu123u: 1534; SSE2: # BB#0: 1535; SSE2-NEXT: movdqa %xmm0, %xmm1 1536; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1537; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1538; SSE2-NEXT: por %xmm1, %xmm0 1539; SSE2-NEXT: retq 1540; 1541; SSSE3-LABEL: shuffle_v8i16_u6uu123u: 1542; SSSE3: # BB#0: 1543; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1544; SSSE3-NEXT: retq 1545; 1546; SSE41-LABEL: shuffle_v8i16_u6uu123u: 1547; SSE41: # BB#0: 1548; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1549; SSE41-NEXT: retq 1550; 1551; AVX-LABEL: shuffle_v8i16_u6uu123u: 1552; AVX: # BB#0: 1553; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1554; AVX-NEXT: retq 1555 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef> 1556 ret <8 x i16> %shuffle 1557} 1558 1559define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) { 1560; SSE-LABEL: shuffle_v8i16_uuuu123u: 1561; SSE: # BB#0: 1562; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1563; SSE-NEXT: retq 1564; 1565; AVX-LABEL: shuffle_v8i16_uuuu123u: 1566; AVX: # BB#0: 1567; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1568; AVX-NEXT: retq 1569 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef> 1570 ret <8 x i16> %shuffle 1571} 1572 1573define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) { 1574; SSE2-LABEL: shuffle_v8i16_bcdef012: 1575; SSE2: # BB#0: 1576; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1577; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1578; SSE2-NEXT: por %xmm1, %xmm0 1579; SSE2-NEXT: retq 1580; 1581; SSSE3-LABEL: shuffle_v8i16_bcdef012: 1582; SSSE3: # BB#0: 1583; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1584; SSSE3-NEXT: retq 1585; 1586; SSE41-LABEL: shuffle_v8i16_bcdef012: 1587; SSE41: # BB#0: 1588; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1589; SSE41-NEXT: retq 1590; 1591; AVX-LABEL: shuffle_v8i16_bcdef012: 1592; AVX: # BB#0: 1593; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1594; AVX-NEXT: retq 1595 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2> 1596 ret <8 x i16> %shuffle 1597} 1598 1599define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) { 1600; SSE2-LABEL: shuffle_v8i16_ucdeuu1u: 1601; SSE2: # BB#0: 1602; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1603; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1604; SSE2-NEXT: por %xmm1, %xmm0 1605; SSE2-NEXT: retq 1606; 1607; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u: 1608; SSSE3: # BB#0: 1609; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1610; SSSE3-NEXT: retq 1611; 1612; SSE41-LABEL: shuffle_v8i16_ucdeuu1u: 1613; SSE41: # BB#0: 1614; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1615; SSE41-NEXT: retq 1616; 1617; AVX-LABEL: shuffle_v8i16_ucdeuu1u: 1618; AVX: # BB#0: 1619; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1620; AVX-NEXT: retq 1621 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef> 1622 ret <8 x i16> %shuffle 1623} 1624 1625define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) { 1626; SSE2-LABEL: shuffle_v8i16_34567012: 1627; SSE2: # BB#0: 1628; SSE2-NEXT: movdqa %xmm0, %xmm1 1629; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1630; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1631; SSE2-NEXT: por %xmm1, %xmm0 1632; SSE2-NEXT: retq 1633; 1634; SSSE3-LABEL: shuffle_v8i16_34567012: 1635; SSSE3: # BB#0: 1636; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1637; SSSE3-NEXT: retq 1638; 1639; SSE41-LABEL: shuffle_v8i16_34567012: 1640; SSE41: # BB#0: 1641; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1642; SSE41-NEXT: retq 1643; 1644; AVX-LABEL: shuffle_v8i16_34567012: 1645; AVX: # BB#0: 1646; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1647; AVX-NEXT: retq 1648 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2> 1649 ret <8 x i16> %shuffle 1650} 1651 1652define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) { 1653; SSE2-LABEL: shuffle_v8i16_u456uu1u: 1654; SSE2: # BB#0: 1655; SSE2-NEXT: movdqa %xmm0, %xmm1 1656; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1657; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1658; SSE2-NEXT: por %xmm1, %xmm0 1659; SSE2-NEXT: retq 1660; 1661; SSSE3-LABEL: shuffle_v8i16_u456uu1u: 1662; SSSE3: # BB#0: 1663; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1664; SSSE3-NEXT: retq 1665; 1666; SSE41-LABEL: shuffle_v8i16_u456uu1u: 1667; SSE41: # BB#0: 1668; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1669; SSE41-NEXT: retq 1670; 1671; AVX-LABEL: shuffle_v8i16_u456uu1u: 1672; AVX: # BB#0: 1673; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1674; AVX-NEXT: retq 1675 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef> 1676 ret <8 x i16> %shuffle 1677} 1678 1679define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) { 1680; SSE-LABEL: shuffle_v8i16_u456uuuu: 1681; SSE: # BB#0: 1682; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1683; SSE-NEXT: retq 1684; 1685; AVX-LABEL: shuffle_v8i16_u456uuuu: 1686; AVX: # BB#0: 1687; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1688; AVX-NEXT: retq 1689 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef> 1690 ret <8 x i16> %shuffle 1691} 1692 1693define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) { 1694; SSE2-LABEL: shuffle_v8i16_3456789a: 1695; SSE2: # BB#0: 1696; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1697; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5] 1698; SSE2-NEXT: por %xmm1, %xmm0 1699; SSE2-NEXT: retq 1700; 1701; SSSE3-LABEL: shuffle_v8i16_3456789a: 1702; SSSE3: # BB#0: 1703; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1704; SSSE3-NEXT: movdqa %xmm1, %xmm0 1705; SSSE3-NEXT: retq 1706; 1707; SSE41-LABEL: shuffle_v8i16_3456789a: 1708; SSE41: # BB#0: 1709; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1710; SSE41-NEXT: movdqa %xmm1, %xmm0 1711; SSE41-NEXT: retq 1712; 1713; AVX-LABEL: shuffle_v8i16_3456789a: 1714; AVX: # BB#0: 1715; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1716; AVX-NEXT: retq 1717 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10> 1718 ret <8 x i16> %shuffle 1719} 1720 1721define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) { 1722; SSE2-LABEL: shuffle_v8i16_u456uu9u: 1723; SSE2: # BB#0: 1724; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1725; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5] 1726; SSE2-NEXT: por %xmm1, %xmm0 1727; SSE2-NEXT: retq 1728; 1729; SSSE3-LABEL: shuffle_v8i16_u456uu9u: 1730; SSSE3: # BB#0: 1731; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1732; SSSE3-NEXT: movdqa %xmm1, %xmm0 1733; SSSE3-NEXT: retq 1734; 1735; SSE41-LABEL: shuffle_v8i16_u456uu9u: 1736; SSE41: # BB#0: 1737; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1738; SSE41-NEXT: movdqa %xmm1, %xmm0 1739; SSE41-NEXT: retq 1740; 1741; AVX-LABEL: shuffle_v8i16_u456uu9u: 1742; AVX: # BB#0: 1743; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1744; AVX-NEXT: retq 1745 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef> 1746 ret <8 x i16> %shuffle 1747} 1748 1749define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) { 1750; SSE2-LABEL: shuffle_v8i16_56789abc: 1751; SSE2: # BB#0: 1752; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1753; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9] 1754; SSE2-NEXT: por %xmm1, %xmm0 1755; SSE2-NEXT: retq 1756; 1757; SSSE3-LABEL: shuffle_v8i16_56789abc: 1758; SSSE3: # BB#0: 1759; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1760; SSSE3-NEXT: movdqa %xmm1, %xmm0 1761; SSSE3-NEXT: retq 1762; 1763; SSE41-LABEL: shuffle_v8i16_56789abc: 1764; SSE41: # BB#0: 1765; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1766; SSE41-NEXT: movdqa %xmm1, %xmm0 1767; SSE41-NEXT: retq 1768; 1769; AVX-LABEL: shuffle_v8i16_56789abc: 1770; AVX: # BB#0: 1771; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1772; AVX-NEXT: retq 1773 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> 1774 ret <8 x i16> %shuffle 1775} 1776 1777define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) { 1778; SSE2-LABEL: shuffle_v8i16_u6uu9abu: 1779; SSE2: # BB#0: 1780; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1781; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9] 1782; SSE2-NEXT: por %xmm1, %xmm0 1783; SSE2-NEXT: retq 1784; 1785; SSSE3-LABEL: shuffle_v8i16_u6uu9abu: 1786; SSSE3: # BB#0: 1787; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1788; SSSE3-NEXT: movdqa %xmm1, %xmm0 1789; SSSE3-NEXT: retq 1790; 1791; SSE41-LABEL: shuffle_v8i16_u6uu9abu: 1792; SSE41: # BB#0: 1793; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1794; SSE41-NEXT: movdqa %xmm1, %xmm0 1795; SSE41-NEXT: retq 1796; 1797; AVX-LABEL: shuffle_v8i16_u6uu9abu: 1798; AVX: # BB#0: 1799; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1800; AVX-NEXT: retq 1801 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 1802 ret <8 x i16> %shuffle 1803} 1804 1805define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) { 1806; SSE2-LABEL: shuffle_v8i16_0uuu1uuu: 1807; SSE2: # BB#0: 1808; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 1809; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] 1810; SSE2-NEXT: retq 1811; 1812; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu: 1813; SSSE3: # BB#0: 1814; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 1815; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] 1816; SSSE3-NEXT: retq 1817; 1818; SSE41-LABEL: shuffle_v8i16_0uuu1uuu: 1819; SSE41: # BB#0: 1820; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1821; SSE41-NEXT: retq 1822; 1823; AVX-LABEL: shuffle_v8i16_0uuu1uuu: 1824; AVX: # BB#0: 1825; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1826; AVX-NEXT: retq 1827 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef> 1828 ret <8 x i16> %shuffle 1829} 1830 1831define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) { 1832; SSE2-LABEL: shuffle_v8i16_0zzz1zzz: 1833; SSE2: # BB#0: 1834; SSE2-NEXT: pxor %xmm1, %xmm1 1835; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1836; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1837; SSE2-NEXT: retq 1838; 1839; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz: 1840; SSSE3: # BB#0: 1841; SSSE3-NEXT: pxor %xmm1, %xmm1 1842; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1843; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1844; SSSE3-NEXT: retq 1845; 1846; SSE41-LABEL: shuffle_v8i16_0zzz1zzz: 1847; SSE41: # BB#0: 1848; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1849; SSE41-NEXT: retq 1850; 1851; AVX-LABEL: shuffle_v8i16_0zzz1zzz: 1852; AVX: # BB#0: 1853; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1854; AVX-NEXT: retq 1855 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 1856 ret <8 x i16> %shuffle 1857} 1858 1859define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) { 1860; SSE2-LABEL: shuffle_v8i16_0u1u2u3u: 1861; SSE2: # BB#0: 1862; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1863; SSE2-NEXT: retq 1864; 1865; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u: 1866; SSSE3: # BB#0: 1867; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1868; SSSE3-NEXT: retq 1869; 1870; SSE41-LABEL: shuffle_v8i16_0u1u2u3u: 1871; SSE41: # BB#0: 1872; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1873; SSE41-NEXT: retq 1874; 1875; AVX-LABEL: shuffle_v8i16_0u1u2u3u: 1876; AVX: # BB#0: 1877; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1878; AVX-NEXT: retq 1879 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef> 1880 ret <8 x i16> %shuffle 1881} 1882 1883define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) { 1884; SSE2-LABEL: shuffle_v8i16_0z1z2z3z: 1885; SSE2: # BB#0: 1886; SSE2-NEXT: pxor %xmm1, %xmm1 1887; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1888; SSE2-NEXT: retq 1889; 1890; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z: 1891; SSSE3: # BB#0: 1892; SSSE3-NEXT: pxor %xmm1, %xmm1 1893; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1894; SSSE3-NEXT: retq 1895; 1896; SSE41-LABEL: shuffle_v8i16_0z1z2z3z: 1897; SSE41: # BB#0: 1898; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1899; SSE41-NEXT: retq 1900; 1901; AVX-LABEL: shuffle_v8i16_0z1z2z3z: 1902; AVX: # BB#0: 1903; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1904; AVX-NEXT: retq 1905 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 1906 ret <8 x i16> %shuffle 1907} 1908 1909; 1910; Shuffle to logical bit shifts 1911; 1912define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) { 1913; SSE-LABEL: shuffle_v8i16_z0z2z4z6: 1914; SSE: # BB#0: 1915; SSE-NEXT: pslld $16, %xmm0 1916; SSE-NEXT: retq 1917; 1918; AVX-LABEL: shuffle_v8i16_z0z2z4z6: 1919; AVX: # BB#0: 1920; AVX-NEXT: vpslld $16, %xmm0, %xmm0 1921; AVX-NEXT: retq 1922 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6> 1923 ret <8 x i16> %shuffle 1924} 1925 1926define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) { 1927; SSE-LABEL: shuffle_v8i16_zzz0zzz4: 1928; SSE: # BB#0: 1929; SSE-NEXT: psllq $48, %xmm0 1930; SSE-NEXT: retq 1931; 1932; AVX-LABEL: shuffle_v8i16_zzz0zzz4: 1933; AVX: # BB#0: 1934; AVX-NEXT: vpsllq $48, %xmm0, %xmm0 1935; AVX-NEXT: retq 1936 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4> 1937 ret <8 x i16> %shuffle 1938} 1939 1940define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) { 1941; SSE-LABEL: shuffle_v8i16_zz01zX4X: 1942; SSE: # BB#0: 1943; SSE-NEXT: psllq $32, %xmm0 1944; SSE-NEXT: retq 1945; 1946; AVX-LABEL: shuffle_v8i16_zz01zX4X: 1947; AVX: # BB#0: 1948; AVX-NEXT: vpsllq $32, %xmm0, %xmm0 1949; AVX-NEXT: retq 1950 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef> 1951 ret <8 x i16> %shuffle 1952} 1953 1954define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) { 1955; SSE-LABEL: shuffle_v8i16_z0X2z456: 1956; SSE: # BB#0: 1957; SSE-NEXT: psllq $16, %xmm0 1958; SSE-NEXT: retq 1959; 1960; AVX-LABEL: shuffle_v8i16_z0X2z456: 1961; AVX: # BB#0: 1962; AVX-NEXT: vpsllq $16, %xmm0, %xmm0 1963; AVX-NEXT: retq 1964 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6> 1965 ret <8 x i16> %shuffle 1966} 1967 1968define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) { 1969; SSE-LABEL: shuffle_v8i16_1z3zXz7z: 1970; SSE: # BB#0: 1971; SSE-NEXT: psrld $16, %xmm0 1972; SSE-NEXT: retq 1973; 1974; AVX-LABEL: shuffle_v8i16_1z3zXz7z: 1975; AVX: # BB#0: 1976; AVX-NEXT: vpsrld $16, %xmm0, %xmm0 1977; AVX-NEXT: retq 1978 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8> 1979 ret <8 x i16> %shuffle 1980} 1981 1982define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) { 1983; SSE-LABEL: shuffle_v8i16_1X3z567z: 1984; SSE: # BB#0: 1985; SSE-NEXT: psrlq $16, %xmm0 1986; SSE-NEXT: retq 1987; 1988; AVX-LABEL: shuffle_v8i16_1X3z567z: 1989; AVX: # BB#0: 1990; AVX-NEXT: vpsrlq $16, %xmm0, %xmm0 1991; AVX-NEXT: retq 1992 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8> 1993 ret <8 x i16> %shuffle 1994} 1995 1996define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) { 1997; SSE-LABEL: shuffle_v8i16_23zz67zz: 1998; SSE: # BB#0: 1999; SSE-NEXT: psrlq $32, %xmm0 2000; SSE-NEXT: retq 2001; 2002; AVX-LABEL: shuffle_v8i16_23zz67zz: 2003; AVX: # BB#0: 2004; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 2005; AVX-NEXT: retq 2006 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8> 2007 ret <8 x i16> %shuffle 2008} 2009 2010define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) { 2011; SSE-LABEL: shuffle_v8i16_3zXXXzzz: 2012; SSE: # BB#0: 2013; SSE-NEXT: psrlq $48, %xmm0 2014; SSE-NEXT: retq 2015; 2016; AVX-LABEL: shuffle_v8i16_3zXXXzzz: 2017; AVX: # BB#0: 2018; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0 2019; AVX-NEXT: retq 2020 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8> 2021 ret <8 x i16> %shuffle 2022} 2023 2024define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) { 2025; SSE-LABEL: shuffle_v8i16_01u3zzuz: 2026; SSE: # BB#0: 2027; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 2028; SSE-NEXT: retq 2029; 2030; AVX-LABEL: shuffle_v8i16_01u3zzuz: 2031; AVX: # BB#0: 2032; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2033; AVX-NEXT: retq 2034 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8> 2035 ret <8 x i16> %shuffle 2036} 2037 2038define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) { 2039; SSE2-LABEL: shuffle_v8i16_0z234567: 2040; SSE2: # BB#0: 2041; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 2042; SSE2-NEXT: retq 2043; 2044; SSSE3-LABEL: shuffle_v8i16_0z234567: 2045; SSSE3: # BB#0: 2046; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 2047; SSSE3-NEXT: retq 2048; 2049; SSE41-LABEL: shuffle_v8i16_0z234567: 2050; SSE41: # BB#0: 2051; SSE41-NEXT: pxor %xmm1, %xmm1 2052; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 2053; SSE41-NEXT: retq 2054; 2055; AVX-LABEL: shuffle_v8i16_0z234567: 2056; AVX: # BB#0: 2057; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2058; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 2059; AVX-NEXT: retq 2060 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2061 ret <8 x i16> %shuffle 2062} 2063 2064define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) { 2065; SSE2-LABEL: shuffle_v8i16_0zzzz5z7: 2066; SSE2: # BB#0: 2067; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 2068; SSE2-NEXT: retq 2069; 2070; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7: 2071; SSSE3: # BB#0: 2072; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 2073; SSSE3-NEXT: retq 2074; 2075; SSE41-LABEL: shuffle_v8i16_0zzzz5z7: 2076; SSE41: # BB#0: 2077; SSE41-NEXT: pxor %xmm1, %xmm1 2078; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7] 2079; SSE41-NEXT: retq 2080; 2081; AVX-LABEL: shuffle_v8i16_0zzzz5z7: 2082; AVX: # BB#0: 2083; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2084; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7] 2085; AVX-NEXT: retq 2086 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7> 2087 ret <8 x i16> %shuffle 2088} 2089 2090define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) { 2091; SSE2-LABEL: shuffle_v8i16_0123456z: 2092; SSE2: # BB#0: 2093; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 2094; SSE2-NEXT: retq 2095; 2096; SSSE3-LABEL: shuffle_v8i16_0123456z: 2097; SSSE3: # BB#0: 2098; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 2099; SSSE3-NEXT: retq 2100; 2101; SSE41-LABEL: shuffle_v8i16_0123456z: 2102; SSE41: # BB#0: 2103; SSE41-NEXT: pxor %xmm1, %xmm1 2104; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7] 2105; SSE41-NEXT: retq 2106; 2107; AVX-LABEL: shuffle_v8i16_0123456z: 2108; AVX: # BB#0: 2109; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2110; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7] 2111; AVX-NEXT: retq 2112 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15> 2113 ret <8 x i16> %shuffle 2114} 2115 2116define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) { 2117; SSE-LABEL: shuffle_v8i16_fu3ucc5u: 2118; SSE: # BB#0: 2119; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2120; SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] 2121; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2122; SSE-NEXT: movdqa %xmm1, %xmm0 2123; SSE-NEXT: retq 2124; 2125; AVX-LABEL: shuffle_v8i16_fu3ucc5u: 2126; AVX: # BB#0: 2127; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2128; AVX-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] 2129; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2130; AVX-NEXT: retq 2131 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef> 2132 ret <8 x i16> %shuffle 2133} 2134 2135define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) { 2136; SSE-LABEL: shuffle_v8i16_8012345u: 2137; SSE: # BB#0: 2138; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2139; SSE-NEXT: retq 2140; 2141; AVX-LABEL: shuffle_v8i16_8012345u: 2142; AVX: # BB#0: 2143; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2144; AVX-NEXT: retq 2145 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef> 2146 2147 ret <8 x i16> %shuffle 2148} 2149 2150define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) { 2151; SSE2-LABEL: insert_dup_mem_v8i16_i32: 2152; SSE2: # BB#0: 2153; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2154; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 2155; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2156; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 2157; SSE2-NEXT: retq 2158; 2159; SSSE3-LABEL: insert_dup_mem_v8i16_i32: 2160; SSSE3: # BB#0: 2161; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2162; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2163; SSSE3-NEXT: retq 2164; 2165; SSE41-LABEL: insert_dup_mem_v8i16_i32: 2166; SSE41: # BB#0: 2167; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2168; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2169; SSE41-NEXT: retq 2170; 2171; AVX1-LABEL: insert_dup_mem_v8i16_i32: 2172; AVX1: # BB#0: 2173; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2174; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2175; AVX1-NEXT: retq 2176; 2177; AVX2-LABEL: insert_dup_mem_v8i16_i32: 2178; AVX2: # BB#0: 2179; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0 2180; AVX2-NEXT: retq 2181 %tmp = load i32, i32* %ptr, align 4 2182 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2183 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 2184 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer 2185 ret <8 x i16> %tmp3 2186} 2187 2188define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) { 2189; SSE2-LABEL: insert_dup_mem_v8i16_sext_i16: 2190; SSE2: # BB#0: 2191; SSE2-NEXT: movswl (%rdi), %eax 2192; SSE2-NEXT: movd %eax, %xmm0 2193; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 2194; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2195; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 2196; SSE2-NEXT: retq 2197; 2198; SSSE3-LABEL: insert_dup_mem_v8i16_sext_i16: 2199; SSSE3: # BB#0: 2200; SSSE3-NEXT: movswl (%rdi), %eax 2201; SSSE3-NEXT: movd %eax, %xmm0 2202; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2203; SSSE3-NEXT: retq 2204; 2205; SSE41-LABEL: insert_dup_mem_v8i16_sext_i16: 2206; SSE41: # BB#0: 2207; SSE41-NEXT: movswl (%rdi), %eax 2208; SSE41-NEXT: movd %eax, %xmm0 2209; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2210; SSE41-NEXT: retq 2211; 2212; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16: 2213; AVX1: # BB#0: 2214; AVX1-NEXT: movswl (%rdi), %eax 2215; AVX1-NEXT: vmovd %eax, %xmm0 2216; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2217; AVX1-NEXT: retq 2218; 2219; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16: 2220; AVX2: # BB#0: 2221; AVX2-NEXT: movswl (%rdi), %eax 2222; AVX2-NEXT: vmovd %eax, %xmm0 2223; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2224; AVX2-NEXT: retq 2225 %tmp = load i16, i16* %ptr, align 2 2226 %tmp1 = sext i16 %tmp to i32 2227 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 2228 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 2229 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer 2230 ret <8 x i16> %tmp4 2231} 2232 2233define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) { 2234; SSE2-LABEL: insert_dup_elt1_mem_v8i16_i32: 2235; SSE2: # BB#0: 2236; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2237; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 2238; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 2239; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 2240; SSE2-NEXT: retq 2241; 2242; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_i32: 2243; SSSE3: # BB#0: 2244; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2245; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2246; SSSE3-NEXT: retq 2247; 2248; SSE41-LABEL: insert_dup_elt1_mem_v8i16_i32: 2249; SSE41: # BB#0: 2250; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2251; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2252; SSE41-NEXT: retq 2253; 2254; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32: 2255; AVX1: # BB#0: 2256; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2257; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2258; AVX1-NEXT: retq 2259; 2260; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i32: 2261; AVX2: # BB#0: 2262; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0 2263; AVX2-NEXT: retq 2264 %tmp = load i32, i32* %ptr, align 4 2265 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2266 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 2267 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 2268 ret <8 x i16> %tmp3 2269} 2270 2271define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) { 2272; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32: 2273; SSE2: # BB#0: 2274; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2275; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0] 2276; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] 2277; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7] 2278; SSE2-NEXT: retq 2279; 2280; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32: 2281; SSSE3: # BB#0: 2282; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2283; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2284; SSSE3-NEXT: retq 2285; 2286; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32: 2287; SSE41: # BB#0: 2288; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2289; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2290; SSE41-NEXT: retq 2291; 2292; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32: 2293; AVX1: # BB#0: 2294; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2295; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2296; AVX1-NEXT: retq 2297; 2298; AVX2-LABEL: insert_dup_elt3_mem_v8i16_i32: 2299; AVX2: # BB#0: 2300; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0 2301; AVX2-NEXT: retq 2302 %tmp = load i32, i32* %ptr, align 4 2303 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1 2304 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 2305 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2306 ret <8 x i16> %tmp3 2307} 2308 2309define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) { 2310; SSE2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2311; SSE2: # BB#0: 2312; SSE2-NEXT: movswl (%rdi), %eax 2313; SSE2-NEXT: movd %eax, %xmm0 2314; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 2315; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 2316; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 2317; SSE2-NEXT: retq 2318; 2319; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2320; SSSE3: # BB#0: 2321; SSSE3-NEXT: movswl (%rdi), %eax 2322; SSSE3-NEXT: movd %eax, %xmm0 2323; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2324; SSSE3-NEXT: retq 2325; 2326; SSE41-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2327; SSE41: # BB#0: 2328; SSE41-NEXT: movswl (%rdi), %eax 2329; SSE41-NEXT: movd %eax, %xmm0 2330; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2331; SSE41-NEXT: retq 2332; 2333; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2334; AVX1: # BB#0: 2335; AVX1-NEXT: movswl (%rdi), %eax 2336; AVX1-NEXT: vmovd %eax, %xmm0 2337; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2338; AVX1-NEXT: retq 2339; 2340; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2341; AVX2: # BB#0: 2342; AVX2-NEXT: movswl (%rdi), %eax 2343; AVX2-NEXT: shrl $16, %eax 2344; AVX2-NEXT: vmovd %eax, %xmm0 2345; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2346; AVX2-NEXT: retq 2347 %tmp = load i16, i16* %ptr, align 2 2348 %tmp1 = sext i16 %tmp to i32 2349 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 2350 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 2351 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 2352 ret <8 x i16> %tmp4 2353} 2354 2355define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) { 2356; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2357; SSE2: # BB#0: 2358; SSE2-NEXT: movswl (%rdi), %eax 2359; SSE2-NEXT: movd %eax, %xmm0 2360; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0] 2361; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] 2362; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7] 2363; SSE2-NEXT: retq 2364; 2365; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2366; SSSE3: # BB#0: 2367; SSSE3-NEXT: movswl (%rdi), %eax 2368; SSSE3-NEXT: movd %eax, %xmm0 2369; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2370; SSSE3-NEXT: retq 2371; 2372; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2373; SSE41: # BB#0: 2374; SSE41-NEXT: movswl (%rdi), %eax 2375; SSE41-NEXT: movd %eax, %xmm0 2376; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2377; SSE41-NEXT: retq 2378; 2379; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2380; AVX1: # BB#0: 2381; AVX1-NEXT: movswl (%rdi), %eax 2382; AVX1-NEXT: vmovd %eax, %xmm0 2383; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2384; AVX1-NEXT: retq 2385; 2386; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2387; AVX2: # BB#0: 2388; AVX2-NEXT: movswl (%rdi), %eax 2389; AVX2-NEXT: shrl $16, %eax 2390; AVX2-NEXT: vmovd %eax, %xmm0 2391; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2392; AVX2-NEXT: retq 2393 %tmp = load i16, i16* %ptr, align 2 2394 %tmp1 = sext i16 %tmp to i32 2395 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1 2396 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 2397 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2398 ret <8 x i16> %tmp4 2399} 2400