1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 7 8target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 9target triple = "x86_64-unknown-unknown" 10 11define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) { 12; SSE-LABEL: shuffle_v8i16_01012323: 13; SSE: # BB#0: 14; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 15; SSE-NEXT: retq 16; 17; AVX-LABEL: shuffle_v8i16_01012323: 18; AVX: # BB#0: 19; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 20; AVX-NEXT: retq 21 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3> 22 ret <8 x i16> %shuffle 23} 24define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) { 25; SSE-LABEL: shuffle_v8i16_67452301: 26; SSE: # BB#0: 27; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 28; SSE-NEXT: retq 29; 30; AVX-LABEL: shuffle_v8i16_67452301: 31; AVX: # BB#0: 32; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 33; AVX-NEXT: retq 34 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1> 35 ret <8 x i16> %shuffle 36} 37define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) { 38; SSE2-LABEL: shuffle_v8i16_456789AB: 39; SSE2: # BB#0: 40; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 41; SSE2-NEXT: retq 42; 43; SSSE3-LABEL: shuffle_v8i16_456789AB: 44; SSSE3: # BB#0: 45; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 46; SSSE3-NEXT: movdqa %xmm1, %xmm0 47; SSSE3-NEXT: retq 48; 49; SSE41-LABEL: shuffle_v8i16_456789AB: 50; SSE41: # BB#0: 51; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 52; SSE41-NEXT: movdqa %xmm1, %xmm0 53; SSE41-NEXT: retq 54; 55; AVX-LABEL: shuffle_v8i16_456789AB: 56; AVX: # BB#0: 57; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 58; AVX-NEXT: retq 59 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 60 ret <8 x i16> %shuffle 61} 62 63define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) { 64; SSE-LABEL: shuffle_v8i16_00000000: 65; SSE: # BB#0: 66; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 67; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 68; SSE-NEXT: retq 69; 70; AVX1-LABEL: shuffle_v8i16_00000000: 71; AVX1: # BB#0: 72; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 73; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 74; AVX1-NEXT: retq 75; 76; AVX2-LABEL: shuffle_v8i16_00000000: 77; AVX2: # BB#0: 78; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 79; AVX2-NEXT: retq 80 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 81 ret <8 x i16> %shuffle 82} 83define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) { 84; SSE-LABEL: shuffle_v8i16_00004444: 85; SSE: # BB#0: 86; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 87; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 88; SSE-NEXT: retq 89; 90; AVX-LABEL: shuffle_v8i16_00004444: 91; AVX: # BB#0: 92; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 93; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 94; AVX-NEXT: retq 95 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 96 ret <8 x i16> %shuffle 97} 98define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) { 99; SSE-LABEL: shuffle_v8i16_u0u1u2u3: 100; SSE: # BB#0: 101; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 102; SSE-NEXT: retq 103; 104; AVX-LABEL: shuffle_v8i16_u0u1u2u3: 105; AVX: # BB#0: 106; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 107; AVX-NEXT: retq 108 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3> 109 ret <8 x i16> %shuffle 110} 111define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) { 112; SSE-LABEL: shuffle_v8i16_u4u5u6u7: 113; SSE: # BB#0: 114; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 115; SSE-NEXT: retq 116; 117; AVX-LABEL: shuffle_v8i16_u4u5u6u7: 118; AVX: # BB#0: 119; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 120; AVX-NEXT: retq 121 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7> 122 ret <8 x i16> %shuffle 123} 124define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) { 125; SSE-LABEL: shuffle_v8i16_31206745: 126; SSE: # BB#0: 127; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 128; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 129; SSE-NEXT: retq 130; 131; AVX-LABEL: shuffle_v8i16_31206745: 132; AVX: # BB#0: 133; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 134; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 135; AVX-NEXT: retq 136 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5> 137 ret <8 x i16> %shuffle 138} 139define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) { 140; SSE2-LABEL: shuffle_v8i16_44440000: 141; SSE2: # BB#0: 142; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3] 143; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 144; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 145; SSE2-NEXT: retq 146; 147; SSSE3-LABEL: shuffle_v8i16_44440000: 148; SSSE3: # BB#0: 149; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 150; SSSE3-NEXT: retq 151; 152; SSE41-LABEL: shuffle_v8i16_44440000: 153; SSE41: # BB#0: 154; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 155; SSE41-NEXT: retq 156; 157; AVX-LABEL: shuffle_v8i16_44440000: 158; AVX: # BB#0: 159; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 160; AVX-NEXT: retq 161 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0> 162 ret <8 x i16> %shuffle 163} 164define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) { 165; SSE-LABEL: shuffle_v8i16_23016745: 166; SSE: # BB#0: 167; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 168; SSE-NEXT: retq 169; 170; AVX-LABEL: shuffle_v8i16_23016745: 171; AVX: # BB#0: 172; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 173; AVX-NEXT: retq 174 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> 175 ret <8 x i16> %shuffle 176} 177define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) { 178; SSE-LABEL: shuffle_v8i16_23026745: 179; SSE: # BB#0: 180; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 181; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 182; SSE-NEXT: retq 183; 184; AVX-LABEL: shuffle_v8i16_23026745: 185; AVX: # BB#0: 186; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 187; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 188; AVX-NEXT: retq 189 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5> 190 ret <8 x i16> %shuffle 191} 192define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) { 193; SSE-LABEL: shuffle_v8i16_23016747: 194; SSE: # BB#0: 195; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 196; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 197; SSE-NEXT: retq 198; 199; AVX-LABEL: shuffle_v8i16_23016747: 200; AVX: # BB#0: 201; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 202; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 203; AVX-NEXT: retq 204 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7> 205 ret <8 x i16> %shuffle 206} 207define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) { 208; SSE2-LABEL: shuffle_v8i16_75643120: 209; SSE2: # BB#0: 210; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 211; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 212; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 213; SSE2-NEXT: retq 214; 215; SSSE3-LABEL: shuffle_v8i16_75643120: 216; SSSE3: # BB#0: 217; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 218; SSSE3-NEXT: retq 219; 220; SSE41-LABEL: shuffle_v8i16_75643120: 221; SSE41: # BB#0: 222; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 223; SSE41-NEXT: retq 224; 225; AVX-LABEL: shuffle_v8i16_75643120: 226; AVX: # BB#0: 227; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 228; AVX-NEXT: retq 229 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0> 230 ret <8 x i16> %shuffle 231} 232 233define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) { 234; SSE2-LABEL: shuffle_v8i16_10545410: 235; SSE2: # BB#0: 236; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 237; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] 238; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] 239; SSE2-NEXT: retq 240; 241; SSSE3-LABEL: shuffle_v8i16_10545410: 242; SSSE3: # BB#0: 243; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 244; SSSE3-NEXT: retq 245; 246; SSE41-LABEL: shuffle_v8i16_10545410: 247; SSE41: # BB#0: 248; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 249; SSE41-NEXT: retq 250; 251; AVX-LABEL: shuffle_v8i16_10545410: 252; AVX: # BB#0: 253; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 254; AVX-NEXT: retq 255 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0> 256 ret <8 x i16> %shuffle 257} 258define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) { 259; SSE2-LABEL: shuffle_v8i16_54105410: 260; SSE2: # BB#0: 261; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 262; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 263; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] 264; SSE2-NEXT: retq 265; 266; SSSE3-LABEL: shuffle_v8i16_54105410: 267; SSSE3: # BB#0: 268; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 269; SSSE3-NEXT: retq 270; 271; SSE41-LABEL: shuffle_v8i16_54105410: 272; SSE41: # BB#0: 273; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 274; SSE41-NEXT: retq 275; 276; AVX-LABEL: shuffle_v8i16_54105410: 277; AVX: # BB#0: 278; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 279; AVX-NEXT: retq 280 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0> 281 ret <8 x i16> %shuffle 282} 283define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) { 284; SSE2-LABEL: shuffle_v8i16_54101054: 285; SSE2: # BB#0: 286; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 287; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 288; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 289; SSE2-NEXT: retq 290; 291; SSSE3-LABEL: shuffle_v8i16_54101054: 292; SSSE3: # BB#0: 293; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 294; SSSE3-NEXT: retq 295; 296; SSE41-LABEL: shuffle_v8i16_54101054: 297; SSE41: # BB#0: 298; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 299; SSE41-NEXT: retq 300; 301; AVX-LABEL: shuffle_v8i16_54101054: 302; AVX: # BB#0: 303; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 304; AVX-NEXT: retq 305 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4> 306 ret <8 x i16> %shuffle 307} 308define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) { 309; SSE2-LABEL: shuffle_v8i16_04400440: 310; SSE2: # BB#0: 311; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 312; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 313; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6] 314; SSE2-NEXT: retq 315; 316; SSSE3-LABEL: shuffle_v8i16_04400440: 317; SSSE3: # BB#0: 318; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 319; SSSE3-NEXT: retq 320; 321; SSE41-LABEL: shuffle_v8i16_04400440: 322; SSE41: # BB#0: 323; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 324; SSE41-NEXT: retq 325; 326; AVX-LABEL: shuffle_v8i16_04400440: 327; AVX: # BB#0: 328; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 329; AVX-NEXT: retq 330 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0> 331 ret <8 x i16> %shuffle 332} 333define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) { 334; SSE2-LABEL: shuffle_v8i16_40044004: 335; SSE2: # BB#0: 336; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 337; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7] 338; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4] 339; SSE2-NEXT: retq 340; 341; SSSE3-LABEL: shuffle_v8i16_40044004: 342; SSSE3: # BB#0: 343; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 344; SSSE3-NEXT: retq 345; 346; SSE41-LABEL: shuffle_v8i16_40044004: 347; SSE41: # BB#0: 348; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 349; SSE41-NEXT: retq 350; 351; AVX-LABEL: shuffle_v8i16_40044004: 352; AVX: # BB#0: 353; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 354; AVX-NEXT: retq 355 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4> 356 ret <8 x i16> %shuffle 357} 358 359define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) { 360; SSE2-LABEL: shuffle_v8i16_26405173: 361; SSE2: # BB#0: 362; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 363; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 364; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 365; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] 366; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7] 367; SSE2-NEXT: retq 368; 369; SSSE3-LABEL: shuffle_v8i16_26405173: 370; SSSE3: # BB#0: 371; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 372; SSSE3-NEXT: retq 373; 374; SSE41-LABEL: shuffle_v8i16_26405173: 375; SSE41: # BB#0: 376; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 377; SSE41-NEXT: retq 378; 379; AVX-LABEL: shuffle_v8i16_26405173: 380; AVX: # BB#0: 381; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 382; AVX-NEXT: retq 383 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3> 384 ret <8 x i16> %shuffle 385} 386define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) { 387; SSE2-LABEL: shuffle_v8i16_20645173: 388; SSE2: # BB#0: 389; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 390; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 391; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 392; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7] 393; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7] 394; SSE2-NEXT: retq 395; 396; SSSE3-LABEL: shuffle_v8i16_20645173: 397; SSSE3: # BB#0: 398; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 399; SSSE3-NEXT: retq 400; 401; SSE41-LABEL: shuffle_v8i16_20645173: 402; SSE41: # BB#0: 403; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 404; SSE41-NEXT: retq 405; 406; AVX-LABEL: shuffle_v8i16_20645173: 407; AVX: # BB#0: 408; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 409; AVX-NEXT: retq 410 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3> 411 ret <8 x i16> %shuffle 412} 413define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) { 414; SSE2-LABEL: shuffle_v8i16_26401375: 415; SSE2: # BB#0: 416; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 417; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 418; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2] 419; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] 420; SSE2-NEXT: retq 421; 422; SSSE3-LABEL: shuffle_v8i16_26401375: 423; SSSE3: # BB#0: 424; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 425; SSSE3-NEXT: retq 426; 427; SSE41-LABEL: shuffle_v8i16_26401375: 428; SSE41: # BB#0: 429; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 430; SSE41-NEXT: retq 431; 432; AVX-LABEL: shuffle_v8i16_26401375: 433; AVX: # BB#0: 434; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 435; AVX-NEXT: retq 436 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5> 437 ret <8 x i16> %shuffle 438} 439 440define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) { 441; SSE2-LABEL: shuffle_v8i16_66751643: 442; SSE2: # BB#0: 443; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7] 444; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] 445; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0] 446; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7] 447; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6] 448; SSE2-NEXT: retq 449; 450; SSSE3-LABEL: shuffle_v8i16_66751643: 451; SSSE3: # BB#0: 452; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 453; SSSE3-NEXT: retq 454; 455; SSE41-LABEL: shuffle_v8i16_66751643: 456; SSE41: # BB#0: 457; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 458; SSE41-NEXT: retq 459; 460; AVX-LABEL: shuffle_v8i16_66751643: 461; AVX: # BB#0: 462; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 463; AVX-NEXT: retq 464 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3> 465 ret <8 x i16> %shuffle 466} 467 468define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) { 469; SSE2-LABEL: shuffle_v8i16_60514754: 470; SSE2: # BB#0: 471; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] 472; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 473; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 474; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6] 475; SSE2-NEXT: retq 476; 477; SSSE3-LABEL: shuffle_v8i16_60514754: 478; SSSE3: # BB#0: 479; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] 480; SSSE3-NEXT: retq 481; 482; SSE41-LABEL: shuffle_v8i16_60514754: 483; SSE41: # BB#0: 484; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] 485; SSE41-NEXT: retq 486; 487; AVX-LABEL: shuffle_v8i16_60514754: 488; AVX: # BB#0: 489; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] 490; AVX-NEXT: retq 491 %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4> 492 ret <8 x i16> %shuffle 493} 494 495define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) { 496; SSE2-LABEL: shuffle_v8i16_00444444: 497; SSE2: # BB#0: 498; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 499; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 500; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 501; SSE2-NEXT: retq 502; 503; SSSE3-LABEL: shuffle_v8i16_00444444: 504; SSSE3: # BB#0: 505; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 506; SSSE3-NEXT: retq 507; 508; SSE41-LABEL: shuffle_v8i16_00444444: 509; SSE41: # BB#0: 510; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 511; SSE41-NEXT: retq 512; 513; AVX-LABEL: shuffle_v8i16_00444444: 514; AVX: # BB#0: 515; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 516; AVX-NEXT: retq 517 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 518 ret <8 x i16> %shuffle 519} 520define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) { 521; SSE2-LABEL: shuffle_v8i16_44004444: 522; SSE2: # BB#0: 523; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 524; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7] 525; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 526; SSE2-NEXT: retq 527; 528; SSSE3-LABEL: shuffle_v8i16_44004444: 529; SSSE3: # BB#0: 530; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 531; SSSE3-NEXT: retq 532; 533; SSE41-LABEL: shuffle_v8i16_44004444: 534; SSE41: # BB#0: 535; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 536; SSE41-NEXT: retq 537; 538; AVX-LABEL: shuffle_v8i16_44004444: 539; AVX: # BB#0: 540; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 541; AVX-NEXT: retq 542 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 543 ret <8 x i16> %shuffle 544} 545define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) { 546; SSE2-LABEL: shuffle_v8i16_04404444: 547; SSE2: # BB#0: 548; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 549; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 550; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 551; SSE2-NEXT: retq 552; 553; SSSE3-LABEL: shuffle_v8i16_04404444: 554; SSSE3: # BB#0: 555; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 556; SSSE3-NEXT: retq 557; 558; SSE41-LABEL: shuffle_v8i16_04404444: 559; SSE41: # BB#0: 560; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 561; SSE41-NEXT: retq 562; 563; AVX-LABEL: shuffle_v8i16_04404444: 564; AVX: # BB#0: 565; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 566; AVX-NEXT: retq 567 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4> 568 ret <8 x i16> %shuffle 569} 570define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) { 571; SSE2-LABEL: shuffle_v8i16_04400000: 572; SSE2: # BB#0: 573; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3] 574; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 575; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 576; SSE2-NEXT: retq 577; 578; SSSE3-LABEL: shuffle_v8i16_04400000: 579; SSSE3: # BB#0: 580; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 581; SSSE3-NEXT: retq 582; 583; SSE41-LABEL: shuffle_v8i16_04400000: 584; SSE41: # BB#0: 585; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 586; SSE41-NEXT: retq 587; 588; AVX-LABEL: shuffle_v8i16_04400000: 589; AVX: # BB#0: 590; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 591; AVX-NEXT: retq 592 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0> 593 ret <8 x i16> %shuffle 594} 595define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) { 596; SSE-LABEL: shuffle_v8i16_04404567: 597; SSE: # BB#0: 598; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 599; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 600; SSE-NEXT: retq 601; 602; AVX-LABEL: shuffle_v8i16_04404567: 603; AVX: # BB#0: 604; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 605; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 606; AVX-NEXT: retq 607 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7> 608 ret <8 x i16> %shuffle 609} 610 611define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) { 612; SSE2-LABEL: shuffle_v8i16_0X444444: 613; SSE2: # BB#0: 614; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 615; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7] 616; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 617; SSE2-NEXT: retq 618; 619; SSSE3-LABEL: shuffle_v8i16_0X444444: 620; SSSE3: # BB#0: 621; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 622; SSSE3-NEXT: retq 623; 624; SSE41-LABEL: shuffle_v8i16_0X444444: 625; SSE41: # BB#0: 626; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 627; SSE41-NEXT: retq 628; 629; AVX-LABEL: shuffle_v8i16_0X444444: 630; AVX: # BB#0: 631; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 632; AVX-NEXT: retq 633 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 634 ret <8 x i16> %shuffle 635} 636define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) { 637; SSE2-LABEL: shuffle_v8i16_44X04444: 638; SSE2: # BB#0: 639; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 640; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7] 641; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 642; SSE2-NEXT: retq 643; 644; SSSE3-LABEL: shuffle_v8i16_44X04444: 645; SSSE3: # BB#0: 646; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 647; SSSE3-NEXT: retq 648; 649; SSE41-LABEL: shuffle_v8i16_44X04444: 650; SSE41: # BB#0: 651; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 652; SSE41-NEXT: retq 653; 654; AVX-LABEL: shuffle_v8i16_44X04444: 655; AVX: # BB#0: 656; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 657; AVX-NEXT: retq 658 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4> 659 ret <8 x i16> %shuffle 660} 661define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) { 662; SSE2-LABEL: shuffle_v8i16_X4404444: 663; SSE2: # BB#0: 664; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 665; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 666; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 667; SSE2-NEXT: retq 668; 669; SSSE3-LABEL: shuffle_v8i16_X4404444: 670; SSSE3: # BB#0: 671; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 672; SSSE3-NEXT: retq 673; 674; SSE41-LABEL: shuffle_v8i16_X4404444: 675; SSE41: # BB#0: 676; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 677; SSE41-NEXT: retq 678; 679; AVX-LABEL: shuffle_v8i16_X4404444: 680; AVX: # BB#0: 681; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 682; AVX-NEXT: retq 683 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4> 684 ret <8 x i16> %shuffle 685} 686 687define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) { 688; SSE2-LABEL: shuffle_v8i16_0127XXXX: 689; SSE2: # BB#0: 690; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 691; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] 692; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 693; SSE2-NEXT: retq 694; 695; SSSE3-LABEL: shuffle_v8i16_0127XXXX: 696; SSSE3: # BB#0: 697; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 698; SSSE3-NEXT: retq 699; 700; SSE41-LABEL: shuffle_v8i16_0127XXXX: 701; SSE41: # BB#0: 702; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 703; SSE41-NEXT: retq 704; 705; AVX-LABEL: shuffle_v8i16_0127XXXX: 706; AVX: # BB#0: 707; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 708; AVX-NEXT: retq 709 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 710 ret <8 x i16> %shuffle 711} 712 713define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) { 714; SSE2-LABEL: shuffle_v8i16_XXXX4563: 715; SSE2: # BB#0: 716; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 717; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] 718; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0] 719; SSE2-NEXT: retq 720; 721; SSSE3-LABEL: shuffle_v8i16_XXXX4563: 722; SSSE3: # BB#0: 723; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 724; SSSE3-NEXT: retq 725; 726; SSE41-LABEL: shuffle_v8i16_XXXX4563: 727; SSE41: # BB#0: 728; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 729; SSE41-NEXT: retq 730; 731; AVX-LABEL: shuffle_v8i16_XXXX4563: 732; AVX: # BB#0: 733; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 734; AVX-NEXT: retq 735 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3> 736 ret <8 x i16> %shuffle 737} 738 739define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) { 740; SSE2-LABEL: shuffle_v8i16_4563XXXX: 741; SSE2: # BB#0: 742; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 743; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] 744; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3] 745; SSE2-NEXT: retq 746; 747; SSSE3-LABEL: shuffle_v8i16_4563XXXX: 748; SSSE3: # BB#0: 749; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 750; SSSE3-NEXT: retq 751; 752; SSE41-LABEL: shuffle_v8i16_4563XXXX: 753; SSE41: # BB#0: 754; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 755; SSE41-NEXT: retq 756; 757; AVX-LABEL: shuffle_v8i16_4563XXXX: 758; AVX: # BB#0: 759; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 760; AVX-NEXT: retq 761 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 762 ret <8 x i16> %shuffle 763} 764 765define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) { 766; SSE2-LABEL: shuffle_v8i16_01274563: 767; SSE2: # BB#0: 768; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 769; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] 770; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2] 771; SSE2-NEXT: retq 772; 773; SSSE3-LABEL: shuffle_v8i16_01274563: 774; SSSE3: # BB#0: 775; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 776; SSSE3-NEXT: retq 777; 778; SSE41-LABEL: shuffle_v8i16_01274563: 779; SSE41: # BB#0: 780; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 781; SSE41-NEXT: retq 782; 783; AVX-LABEL: shuffle_v8i16_01274563: 784; AVX: # BB#0: 785; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 786; AVX-NEXT: retq 787 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3> 788 ret <8 x i16> %shuffle 789} 790 791define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) { 792; SSE2-LABEL: shuffle_v8i16_45630127: 793; SSE2: # BB#0: 794; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 795; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 796; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1] 797; SSE2-NEXT: retq 798; 799; SSSE3-LABEL: shuffle_v8i16_45630127: 800; SSSE3: # BB#0: 801; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 802; SSSE3-NEXT: retq 803; 804; SSE41-LABEL: shuffle_v8i16_45630127: 805; SSE41: # BB#0: 806; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 807; SSE41-NEXT: retq 808; 809; AVX-LABEL: shuffle_v8i16_45630127: 810; AVX: # BB#0: 811; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 812; AVX-NEXT: retq 813 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7> 814 ret <8 x i16> %shuffle 815} 816 817define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) { 818; SSE2-LABEL: shuffle_v8i16_37102735: 819; SSE2: # BB#0: 820; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] 821; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 822; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 823; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 824; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 825; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6] 826; SSE2-NEXT: retq 827; 828; SSSE3-LABEL: shuffle_v8i16_37102735: 829; SSSE3: # BB#0: 830; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 831; SSSE3-NEXT: retq 832; 833; SSE41-LABEL: shuffle_v8i16_37102735: 834; SSE41: # BB#0: 835; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 836; SSE41-NEXT: retq 837; 838; AVX-LABEL: shuffle_v8i16_37102735: 839; AVX: # BB#0: 840; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 841; AVX-NEXT: retq 842 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5> 843 ret <8 x i16> %shuffle 844} 845 846define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) { 847; SSE-LABEL: shuffle_v8i16_08192a3b: 848; SSE: # BB#0: 849; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 850; SSE-NEXT: retq 851; 852; AVX-LABEL: shuffle_v8i16_08192a3b: 853; AVX: # BB#0: 854; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 855; AVX-NEXT: retq 856 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 857 ret <8 x i16> %shuffle 858} 859 860define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) { 861; SSE-LABEL: shuffle_v8i16_0c1d2e3f: 862; SSE: # BB#0: 863; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 864; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 865; SSE-NEXT: retq 866; 867; AVX-LABEL: shuffle_v8i16_0c1d2e3f: 868; AVX: # BB#0: 869; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 870; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 871; AVX-NEXT: retq 872 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15> 873 ret <8 x i16> %shuffle 874} 875 876define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) { 877; SSE-LABEL: shuffle_v8i16_4c5d6e7f: 878; SSE: # BB#0: 879; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 880; SSE-NEXT: retq 881; 882; AVX-LABEL: shuffle_v8i16_4c5d6e7f: 883; AVX: # BB#0: 884; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 885; AVX-NEXT: retq 886 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 887 ret <8 x i16> %shuffle 888} 889 890define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) { 891; SSE-LABEL: shuffle_v8i16_48596a7b: 892; SSE: # BB#0: 893; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 894; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 895; SSE-NEXT: retq 896; 897; AVX-LABEL: shuffle_v8i16_48596a7b: 898; AVX: # BB#0: 899; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 900; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 901; AVX-NEXT: retq 902 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11> 903 ret <8 x i16> %shuffle 904} 905 906define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) { 907; SSE-LABEL: shuffle_v8i16_08196e7f: 908; SSE: # BB#0: 909; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] 910; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 911; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 912; SSE-NEXT: retq 913; 914; AVX-LABEL: shuffle_v8i16_08196e7f: 915; AVX: # BB#0: 916; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] 917; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 918; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 919; AVX-NEXT: retq 920 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15> 921 ret <8 x i16> %shuffle 922} 923 924define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) { 925; SSE-LABEL: shuffle_v8i16_0c1d6879: 926; SSE: # BB#0: 927; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3] 928; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 929; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 930; SSE-NEXT: retq 931; 932; AVX-LABEL: shuffle_v8i16_0c1d6879: 933; AVX: # BB#0: 934; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3] 935; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 936; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 937; AVX-NEXT: retq 938 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9> 939 ret <8 x i16> %shuffle 940} 941 942define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) { 943; SSE-LABEL: shuffle_v8i16_109832ba: 944; SSE: # BB#0: 945; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 946; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 947; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 948; SSE-NEXT: retq 949; 950; AVX-LABEL: shuffle_v8i16_109832ba: 951; AVX: # BB#0: 952; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 953; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 954; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 955; AVX-NEXT: retq 956 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10> 957 ret <8 x i16> %shuffle 958} 959 960define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) { 961; SSE-LABEL: shuffle_v8i16_8091a2b3: 962; SSE: # BB#0: 963; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 964; SSE-NEXT: movdqa %xmm1, %xmm0 965; SSE-NEXT: retq 966; 967; AVX-LABEL: shuffle_v8i16_8091a2b3: 968; AVX: # BB#0: 969; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 970; AVX-NEXT: retq 971 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3> 972 ret <8 x i16> %shuffle 973} 974define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) { 975; SSE-LABEL: shuffle_v8i16_c4d5e6f7: 976; SSE: # BB#0: 977; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 978; SSE-NEXT: movdqa %xmm1, %xmm0 979; SSE-NEXT: retq 980; 981; AVX-LABEL: shuffle_v8i16_c4d5e6f7: 982; AVX: # BB#0: 983; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 984; AVX-NEXT: retq 985 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7> 986 ret <8 x i16> %shuffle 987} 988 989define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) { 990; SSE-LABEL: shuffle_v8i16_0213cedf: 991; SSE: # BB#0: 992; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 993; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 994; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] 995; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 996; SSE-NEXT: retq 997; 998; AVX-LABEL: shuffle_v8i16_0213cedf: 999; AVX: # BB#0: 1000; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1001; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1002; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] 1003; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1004; AVX-NEXT: retq 1005 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15> 1006 ret <8 x i16> %shuffle 1007} 1008 1009define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) { 1010; SSE2-LABEL: shuffle_v8i16_443aXXXX: 1011; SSE2: # BB#0: 1012; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535] 1013; SSE2-NEXT: pand %xmm2, %xmm0 1014; SSE2-NEXT: pandn %xmm1, %xmm2 1015; SSE2-NEXT: por %xmm0, %xmm2 1016; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3] 1017; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1018; SSE2-NEXT: retq 1019; 1020; SSSE3-LABEL: shuffle_v8i16_443aXXXX: 1021; SSSE3: # BB#0: 1022; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u] 1023; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u] 1024; SSSE3-NEXT: por %xmm1, %xmm0 1025; SSSE3-NEXT: retq 1026; 1027; SSE41-LABEL: shuffle_v8i16_443aXXXX: 1028; SSE41: # BB#0: 1029; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1030; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1031; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1032; SSE41-NEXT: retq 1033; 1034; AVX-LABEL: shuffle_v8i16_443aXXXX: 1035; AVX: # BB#0: 1036; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1037; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1038; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1039; AVX-NEXT: retq 1040 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef> 1041 ret <8 x i16> %shuffle 1042} 1043 1044define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) { 1045; SSE2-LABEL: shuffle_v8i16_032dXXXX: 1046; SSE2: # BB#0: 1047; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1048; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0] 1049; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7] 1050; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1051; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 1052; SSE2-NEXT: retq 1053; 1054; SSSE3-LABEL: shuffle_v8i16_032dXXXX: 1055; SSSE3: # BB#0: 1056; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u] 1057; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 1058; SSSE3-NEXT: por %xmm1, %xmm0 1059; SSSE3-NEXT: retq 1060; 1061; SSE41-LABEL: shuffle_v8i16_032dXXXX: 1062; SSE41: # BB#0: 1063; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1064; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 1065; SSE41-NEXT: retq 1066; 1067; AVX1-LABEL: shuffle_v8i16_032dXXXX: 1068; AVX1: # BB#0: 1069; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1070; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 1071; AVX1-NEXT: retq 1072; 1073; AVX2-LABEL: shuffle_v8i16_032dXXXX: 1074; AVX2: # BB#0: 1075; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1076; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 1077; AVX2-NEXT: retq 1078 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 1079 ret <8 x i16> %shuffle 1080} 1081define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) { 1082; SSE-LABEL: shuffle_v8i16_XXXdXXXX: 1083; SSE: # BB#0: 1084; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] 1085; SSE-NEXT: retq 1086; 1087; AVX-LABEL: shuffle_v8i16_XXXdXXXX: 1088; AVX: # BB#0: 1089; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] 1090; AVX-NEXT: retq 1091 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 1092 ret <8 x i16> %shuffle 1093} 1094 1095define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) { 1096; SSE2-LABEL: shuffle_v8i16_012dXXXX: 1097; SSE2: # BB#0: 1098; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535] 1099; SSE2-NEXT: pand %xmm2, %xmm0 1100; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1101; SSE2-NEXT: pandn %xmm1, %xmm2 1102; SSE2-NEXT: por %xmm2, %xmm0 1103; SSE2-NEXT: retq 1104; 1105; SSSE3-LABEL: shuffle_v8i16_012dXXXX: 1106; SSSE3: # BB#0: 1107; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u] 1108; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 1109; SSSE3-NEXT: por %xmm1, %xmm0 1110; SSSE3-NEXT: retq 1111; 1112; SSE41-LABEL: shuffle_v8i16_012dXXXX: 1113; SSE41: # BB#0: 1114; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1115; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 1116; SSE41-NEXT: retq 1117; 1118; AVX-LABEL: shuffle_v8i16_012dXXXX: 1119; AVX: # BB#0: 1120; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1121; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 1122; AVX-NEXT: retq 1123 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 1124 ret <8 x i16> %shuffle 1125} 1126 1127define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) { 1128; SSE2-LABEL: shuffle_v8i16_XXXXcde3: 1129; SSE2: # BB#0: 1130; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0] 1131; SSE2-NEXT: pand %xmm2, %xmm1 1132; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1133; SSE2-NEXT: pandn %xmm0, %xmm2 1134; SSE2-NEXT: por %xmm1, %xmm2 1135; SSE2-NEXT: movdqa %xmm2, %xmm0 1136; SSE2-NEXT: retq 1137; 1138; SSSE3-LABEL: shuffle_v8i16_XXXXcde3: 1139; SSSE3: # BB#0: 1140; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7] 1141; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero 1142; SSSE3-NEXT: por %xmm1, %xmm0 1143; SSSE3-NEXT: retq 1144; 1145; SSE41-LABEL: shuffle_v8i16_XXXXcde3: 1146; SSE41: # BB#0: 1147; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1148; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 1149; SSE41-NEXT: retq 1150; 1151; AVX1-LABEL: shuffle_v8i16_XXXXcde3: 1152; AVX1: # BB#0: 1153; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1154; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 1155; AVX1-NEXT: retq 1156; 1157; AVX2-LABEL: shuffle_v8i16_XXXXcde3: 1158; AVX2: # BB#0: 1159; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 1160; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 1161; AVX2-NEXT: retq 1162 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3> 1163 ret <8 x i16> %shuffle 1164} 1165 1166define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) { 1167; SSE2-LABEL: shuffle_v8i16_cde3XXXX: 1168; SSE2: # BB#0: 1169; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535] 1170; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1171; SSE2-NEXT: pand %xmm2, %xmm1 1172; SSE2-NEXT: pandn %xmm0, %xmm2 1173; SSE2-NEXT: por %xmm1, %xmm2 1174; SSE2-NEXT: movdqa %xmm2, %xmm0 1175; SSE2-NEXT: retq 1176; 1177; SSSE3-LABEL: shuffle_v8i16_cde3XXXX: 1178; SSSE3: # BB#0: 1179; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u] 1180; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u] 1181; SSSE3-NEXT: por %xmm1, %xmm0 1182; SSSE3-NEXT: retq 1183; 1184; SSE41-LABEL: shuffle_v8i16_cde3XXXX: 1185; SSE41: # BB#0: 1186; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1187; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] 1188; SSE41-NEXT: retq 1189; 1190; AVX-LABEL: shuffle_v8i16_cde3XXXX: 1191; AVX: # BB#0: 1192; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1193; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] 1194; AVX-NEXT: retq 1195 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 1196 ret <8 x i16> %shuffle 1197} 1198 1199define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) { 1200; SSE2-LABEL: shuffle_v8i16_012dcde3: 1201; SSE2: # BB#0: 1202; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1203; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,2,1] 1204; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 1205; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 1206; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 1207; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7] 1208; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 1209; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7] 1210; SSE2-NEXT: retq 1211; 1212; SSSE3-LABEL: shuffle_v8i16_012dcde3: 1213; SSSE3: # BB#0: 1214; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero 1215; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7] 1216; SSSE3-NEXT: por %xmm1, %xmm0 1217; SSSE3-NEXT: retq 1218; 1219; SSE41-LABEL: shuffle_v8i16_012dcde3: 1220; SSE41: # BB#0: 1221; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1222; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 1223; SSE41-NEXT: retq 1224; 1225; AVX1-LABEL: shuffle_v8i16_012dcde3: 1226; AVX1: # BB#0: 1227; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1228; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 1229; AVX1-NEXT: retq 1230; 1231; AVX2-LABEL: shuffle_v8i16_012dcde3: 1232; AVX2: # BB#0: 1233; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1234; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 1235; AVX2-NEXT: retq 1236 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3> 1237 ret <8 x i16> %shuffle 1238} 1239 1240define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) { 1241; SSE2-LABEL: shuffle_v8i16_0923cde7: 1242; SSE2: # BB#0: 1243; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535] 1244; SSE2-NEXT: andps %xmm2, %xmm0 1245; SSE2-NEXT: andnps %xmm1, %xmm2 1246; SSE2-NEXT: orps %xmm2, %xmm0 1247; SSE2-NEXT: retq 1248; 1249; SSSE3-LABEL: shuffle_v8i16_0923cde7: 1250; SSSE3: # BB#0: 1251; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535] 1252; SSSE3-NEXT: andps %xmm2, %xmm0 1253; SSSE3-NEXT: andnps %xmm1, %xmm2 1254; SSSE3-NEXT: orps %xmm2, %xmm0 1255; SSSE3-NEXT: retq 1256; 1257; SSE41-LABEL: shuffle_v8i16_0923cde7: 1258; SSE41: # BB#0: 1259; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7] 1260; SSE41-NEXT: retq 1261; 1262; AVX-LABEL: shuffle_v8i16_0923cde7: 1263; AVX: # BB#0: 1264; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7] 1265; AVX-NEXT: retq 1266 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7> 1267 ret <8 x i16> %shuffle 1268} 1269 1270define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) { 1271; SSE2-LABEL: shuffle_v8i16_XXX1X579: 1272; SSE2: # BB#0: 1273; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0] 1274; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0] 1275; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1276; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1277; SSE2-NEXT: pand %xmm1, %xmm0 1278; SSE2-NEXT: pandn %xmm2, %xmm1 1279; SSE2-NEXT: por %xmm0, %xmm1 1280; SSE2-NEXT: movdqa %xmm1, %xmm0 1281; SSE2-NEXT: retq 1282; 1283; SSSE3-LABEL: shuffle_v8i16_XXX1X579: 1284; SSSE3: # BB#0: 1285; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3] 1286; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero 1287; SSSE3-NEXT: por %xmm1, %xmm0 1288; SSSE3-NEXT: retq 1289; 1290; SSE41-LABEL: shuffle_v8i16_XXX1X579: 1291; SSE41: # BB#0: 1292; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 1293; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1294; SSE41-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1295; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1296; SSE41-NEXT: retq 1297; 1298; AVX1-LABEL: shuffle_v8i16_XXX1X579: 1299; AVX1: # BB#0: 1300; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 1301; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1302; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1303; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1304; AVX1-NEXT: retq 1305; 1306; AVX2-LABEL: shuffle_v8i16_XXX1X579: 1307; AVX2: # BB#0: 1308; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 1309; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1310; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1311; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1312; AVX2-NEXT: retq 1313 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9> 1314 ret <8 x i16> %shuffle 1315} 1316 1317define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) { 1318; SSE2-LABEL: shuffle_v8i16_XX4X8acX: 1319; SSE2: # BB#0: 1320; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3] 1321; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7] 1322; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0] 1323; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 1324; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1325; SSE2-NEXT: retq 1326; 1327; SSSE3-LABEL: shuffle_v8i16_XX4X8acX: 1328; SSSE3: # BB#0: 1329; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u] 1330; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u] 1331; SSSE3-NEXT: por %xmm1, %xmm0 1332; SSSE3-NEXT: retq 1333; 1334; SSE41-LABEL: shuffle_v8i16_XX4X8acX: 1335; SSE41: # BB#0: 1336; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 1337; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1338; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1339; SSE41-NEXT: retq 1340; 1341; AVX1-LABEL: shuffle_v8i16_XX4X8acX: 1342; AVX1: # BB#0: 1343; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 1344; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1345; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1346; AVX1-NEXT: retq 1347; 1348; AVX2-LABEL: shuffle_v8i16_XX4X8acX: 1349; AVX2: # BB#0: 1350; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 1351; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1352; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1353; AVX2-NEXT: retq 1354 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef> 1355 ret <8 x i16> %shuffle 1356} 1357 1358define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) { 1359; SSE-LABEL: shuffle_v8i16_8zzzzzzz: 1360; SSE: # BB#0: 1361; SSE-NEXT: movzwl %di, %eax 1362; SSE-NEXT: movd %eax, %xmm0 1363; SSE-NEXT: retq 1364; 1365; AVX-LABEL: shuffle_v8i16_8zzzzzzz: 1366; AVX: # BB#0: 1367; AVX-NEXT: movzwl %di, %eax 1368; AVX-NEXT: vmovd %eax, %xmm0 1369; AVX-NEXT: retq 1370 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1371 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1372 ret <8 x i16> %shuffle 1373} 1374 1375define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) { 1376; SSE-LABEL: shuffle_v8i16_z8zzzzzz: 1377; SSE: # BB#0: 1378; SSE-NEXT: pxor %xmm0, %xmm0 1379; SSE-NEXT: pinsrw $1, %edi, %xmm0 1380; SSE-NEXT: retq 1381; 1382; AVX-LABEL: shuffle_v8i16_z8zzzzzz: 1383; AVX: # BB#0: 1384; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1385; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 1386; AVX-NEXT: retq 1387 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1388 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3> 1389 ret <8 x i16> %shuffle 1390} 1391 1392define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) { 1393; SSE-LABEL: shuffle_v8i16_zzzzz8zz: 1394; SSE: # BB#0: 1395; SSE-NEXT: pxor %xmm0, %xmm0 1396; SSE-NEXT: pinsrw $5, %edi, %xmm0 1397; SSE-NEXT: retq 1398; 1399; AVX-LABEL: shuffle_v8i16_zzzzz8zz: 1400; AVX: # BB#0: 1401; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1402; AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 1403; AVX-NEXT: retq 1404 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1405 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0> 1406 ret <8 x i16> %shuffle 1407} 1408 1409define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) { 1410; SSE-LABEL: shuffle_v8i16_zuuzuuz8: 1411; SSE: # BB#0: 1412; SSE-NEXT: pxor %xmm0, %xmm0 1413; SSE-NEXT: pinsrw $7, %edi, %xmm0 1414; SSE-NEXT: retq 1415; 1416; AVX-LABEL: shuffle_v8i16_zuuzuuz8: 1417; AVX: # BB#0: 1418; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1419; AVX-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 1420; AVX-NEXT: retq 1421 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1422 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8> 1423 ret <8 x i16> %shuffle 1424} 1425 1426define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) { 1427; SSE-LABEL: shuffle_v8i16_zzBzzzzz: 1428; SSE: # BB#0: 1429; SSE-NEXT: pxor %xmm0, %xmm0 1430; SSE-NEXT: pinsrw $2, %edi, %xmm0 1431; SSE-NEXT: retq 1432; 1433; AVX-LABEL: shuffle_v8i16_zzBzzzzz: 1434; AVX: # BB#0: 1435; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1436; AVX-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0 1437; AVX-NEXT: retq 1438 %a = insertelement <8 x i16> undef, i16 %i, i32 3 1439 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7> 1440 ret <8 x i16> %shuffle 1441} 1442 1443define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) { 1444; SSE2-LABEL: shuffle_v8i16_def01234: 1445; SSE2: # BB#0: 1446; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1447; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1448; SSE2-NEXT: por %xmm1, %xmm0 1449; SSE2-NEXT: retq 1450; 1451; SSSE3-LABEL: shuffle_v8i16_def01234: 1452; SSSE3: # BB#0: 1453; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1454; SSSE3-NEXT: retq 1455; 1456; SSE41-LABEL: shuffle_v8i16_def01234: 1457; SSE41: # BB#0: 1458; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1459; SSE41-NEXT: retq 1460; 1461; AVX-LABEL: shuffle_v8i16_def01234: 1462; AVX: # BB#0: 1463; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1464; AVX-NEXT: retq 1465 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4> 1466 ret <8 x i16> %shuffle 1467} 1468 1469define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) { 1470; SSE2-LABEL: shuffle_v8i16_ueuu123u: 1471; SSE2: # BB#0: 1472; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1473; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1474; SSE2-NEXT: por %xmm1, %xmm0 1475; SSE2-NEXT: retq 1476; 1477; SSSE3-LABEL: shuffle_v8i16_ueuu123u: 1478; SSSE3: # BB#0: 1479; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1480; SSSE3-NEXT: retq 1481; 1482; SSE41-LABEL: shuffle_v8i16_ueuu123u: 1483; SSE41: # BB#0: 1484; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1485; SSE41-NEXT: retq 1486; 1487; AVX-LABEL: shuffle_v8i16_ueuu123u: 1488; AVX: # BB#0: 1489; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1490; AVX-NEXT: retq 1491 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef> 1492 ret <8 x i16> %shuffle 1493} 1494 1495define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) { 1496; SSE2-LABEL: shuffle_v8i16_56701234: 1497; SSE2: # BB#0: 1498; SSE2-NEXT: movdqa %xmm0, %xmm1 1499; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1500; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1501; SSE2-NEXT: por %xmm1, %xmm0 1502; SSE2-NEXT: retq 1503; 1504; SSSE3-LABEL: shuffle_v8i16_56701234: 1505; SSSE3: # BB#0: 1506; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1507; SSSE3-NEXT: retq 1508; 1509; SSE41-LABEL: shuffle_v8i16_56701234: 1510; SSE41: # BB#0: 1511; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1512; SSE41-NEXT: retq 1513; 1514; AVX-LABEL: shuffle_v8i16_56701234: 1515; AVX: # BB#0: 1516; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1517; AVX-NEXT: retq 1518 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4> 1519 ret <8 x i16> %shuffle 1520} 1521 1522define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) { 1523; SSE2-LABEL: shuffle_v8i16_u6uu123u: 1524; SSE2: # BB#0: 1525; SSE2-NEXT: movdqa %xmm0, %xmm1 1526; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1527; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1528; SSE2-NEXT: por %xmm1, %xmm0 1529; SSE2-NEXT: retq 1530; 1531; SSSE3-LABEL: shuffle_v8i16_u6uu123u: 1532; SSSE3: # BB#0: 1533; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1534; SSSE3-NEXT: retq 1535; 1536; SSE41-LABEL: shuffle_v8i16_u6uu123u: 1537; SSE41: # BB#0: 1538; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1539; SSE41-NEXT: retq 1540; 1541; AVX-LABEL: shuffle_v8i16_u6uu123u: 1542; AVX: # BB#0: 1543; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1544; AVX-NEXT: retq 1545 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef> 1546 ret <8 x i16> %shuffle 1547} 1548 1549define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) { 1550; SSE-LABEL: shuffle_v8i16_uuuu123u: 1551; SSE: # BB#0: 1552; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1553; SSE-NEXT: retq 1554; 1555; AVX-LABEL: shuffle_v8i16_uuuu123u: 1556; AVX: # BB#0: 1557; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1558; AVX-NEXT: retq 1559 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef> 1560 ret <8 x i16> %shuffle 1561} 1562 1563define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) { 1564; SSE2-LABEL: shuffle_v8i16_bcdef012: 1565; SSE2: # BB#0: 1566; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1567; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1568; SSE2-NEXT: por %xmm1, %xmm0 1569; SSE2-NEXT: retq 1570; 1571; SSSE3-LABEL: shuffle_v8i16_bcdef012: 1572; SSSE3: # BB#0: 1573; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1574; SSSE3-NEXT: retq 1575; 1576; SSE41-LABEL: shuffle_v8i16_bcdef012: 1577; SSE41: # BB#0: 1578; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1579; SSE41-NEXT: retq 1580; 1581; AVX-LABEL: shuffle_v8i16_bcdef012: 1582; AVX: # BB#0: 1583; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1584; AVX-NEXT: retq 1585 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2> 1586 ret <8 x i16> %shuffle 1587} 1588 1589define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) { 1590; SSE2-LABEL: shuffle_v8i16_ucdeuu1u: 1591; SSE2: # BB#0: 1592; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1593; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1594; SSE2-NEXT: por %xmm1, %xmm0 1595; SSE2-NEXT: retq 1596; 1597; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u: 1598; SSSE3: # BB#0: 1599; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1600; SSSE3-NEXT: retq 1601; 1602; SSE41-LABEL: shuffle_v8i16_ucdeuu1u: 1603; SSE41: # BB#0: 1604; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1605; SSE41-NEXT: retq 1606; 1607; AVX-LABEL: shuffle_v8i16_ucdeuu1u: 1608; AVX: # BB#0: 1609; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1610; AVX-NEXT: retq 1611 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef> 1612 ret <8 x i16> %shuffle 1613} 1614 1615define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) { 1616; SSE2-LABEL: shuffle_v8i16_34567012: 1617; SSE2: # BB#0: 1618; SSE2-NEXT: movdqa %xmm0, %xmm1 1619; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1620; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1621; SSE2-NEXT: por %xmm1, %xmm0 1622; SSE2-NEXT: retq 1623; 1624; SSSE3-LABEL: shuffle_v8i16_34567012: 1625; SSSE3: # BB#0: 1626; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1627; SSSE3-NEXT: retq 1628; 1629; SSE41-LABEL: shuffle_v8i16_34567012: 1630; SSE41: # BB#0: 1631; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1632; SSE41-NEXT: retq 1633; 1634; AVX-LABEL: shuffle_v8i16_34567012: 1635; AVX: # BB#0: 1636; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1637; AVX-NEXT: retq 1638 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2> 1639 ret <8 x i16> %shuffle 1640} 1641 1642define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) { 1643; SSE2-LABEL: shuffle_v8i16_u456uu1u: 1644; SSE2: # BB#0: 1645; SSE2-NEXT: movdqa %xmm0, %xmm1 1646; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1647; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1648; SSE2-NEXT: por %xmm1, %xmm0 1649; SSE2-NEXT: retq 1650; 1651; SSSE3-LABEL: shuffle_v8i16_u456uu1u: 1652; SSSE3: # BB#0: 1653; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1654; SSSE3-NEXT: retq 1655; 1656; SSE41-LABEL: shuffle_v8i16_u456uu1u: 1657; SSE41: # BB#0: 1658; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1659; SSE41-NEXT: retq 1660; 1661; AVX-LABEL: shuffle_v8i16_u456uu1u: 1662; AVX: # BB#0: 1663; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1664; AVX-NEXT: retq 1665 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef> 1666 ret <8 x i16> %shuffle 1667} 1668 1669define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) { 1670; SSE-LABEL: shuffle_v8i16_u456uuuu: 1671; SSE: # BB#0: 1672; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1673; SSE-NEXT: retq 1674; 1675; AVX-LABEL: shuffle_v8i16_u456uuuu: 1676; AVX: # BB#0: 1677; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1678; AVX-NEXT: retq 1679 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef> 1680 ret <8 x i16> %shuffle 1681} 1682 1683define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) { 1684; SSE2-LABEL: shuffle_v8i16_3456789a: 1685; SSE2: # BB#0: 1686; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1687; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5] 1688; SSE2-NEXT: por %xmm1, %xmm0 1689; SSE2-NEXT: retq 1690; 1691; SSSE3-LABEL: shuffle_v8i16_3456789a: 1692; SSSE3: # BB#0: 1693; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1694; SSSE3-NEXT: movdqa %xmm1, %xmm0 1695; SSSE3-NEXT: retq 1696; 1697; SSE41-LABEL: shuffle_v8i16_3456789a: 1698; SSE41: # BB#0: 1699; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1700; SSE41-NEXT: movdqa %xmm1, %xmm0 1701; SSE41-NEXT: retq 1702; 1703; AVX-LABEL: shuffle_v8i16_3456789a: 1704; AVX: # BB#0: 1705; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1706; AVX-NEXT: retq 1707 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10> 1708 ret <8 x i16> %shuffle 1709} 1710 1711define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) { 1712; SSE2-LABEL: shuffle_v8i16_u456uu9u: 1713; SSE2: # BB#0: 1714; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1715; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5] 1716; SSE2-NEXT: por %xmm1, %xmm0 1717; SSE2-NEXT: retq 1718; 1719; SSSE3-LABEL: shuffle_v8i16_u456uu9u: 1720; SSSE3: # BB#0: 1721; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1722; SSSE3-NEXT: movdqa %xmm1, %xmm0 1723; SSSE3-NEXT: retq 1724; 1725; SSE41-LABEL: shuffle_v8i16_u456uu9u: 1726; SSE41: # BB#0: 1727; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1728; SSE41-NEXT: movdqa %xmm1, %xmm0 1729; SSE41-NEXT: retq 1730; 1731; AVX-LABEL: shuffle_v8i16_u456uu9u: 1732; AVX: # BB#0: 1733; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1734; AVX-NEXT: retq 1735 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef> 1736 ret <8 x i16> %shuffle 1737} 1738 1739define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) { 1740; SSE2-LABEL: shuffle_v8i16_56789abc: 1741; SSE2: # BB#0: 1742; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1743; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9] 1744; SSE2-NEXT: por %xmm1, %xmm0 1745; SSE2-NEXT: retq 1746; 1747; SSSE3-LABEL: shuffle_v8i16_56789abc: 1748; SSSE3: # BB#0: 1749; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1750; SSSE3-NEXT: movdqa %xmm1, %xmm0 1751; SSSE3-NEXT: retq 1752; 1753; SSE41-LABEL: shuffle_v8i16_56789abc: 1754; SSE41: # BB#0: 1755; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1756; SSE41-NEXT: movdqa %xmm1, %xmm0 1757; SSE41-NEXT: retq 1758; 1759; AVX-LABEL: shuffle_v8i16_56789abc: 1760; AVX: # BB#0: 1761; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1762; AVX-NEXT: retq 1763 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> 1764 ret <8 x i16> %shuffle 1765} 1766 1767define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) { 1768; SSE2-LABEL: shuffle_v8i16_u6uu9abu: 1769; SSE2: # BB#0: 1770; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1771; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9] 1772; SSE2-NEXT: por %xmm1, %xmm0 1773; SSE2-NEXT: retq 1774; 1775; SSSE3-LABEL: shuffle_v8i16_u6uu9abu: 1776; SSSE3: # BB#0: 1777; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1778; SSSE3-NEXT: movdqa %xmm1, %xmm0 1779; SSSE3-NEXT: retq 1780; 1781; SSE41-LABEL: shuffle_v8i16_u6uu9abu: 1782; SSE41: # BB#0: 1783; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1784; SSE41-NEXT: movdqa %xmm1, %xmm0 1785; SSE41-NEXT: retq 1786; 1787; AVX-LABEL: shuffle_v8i16_u6uu9abu: 1788; AVX: # BB#0: 1789; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1790; AVX-NEXT: retq 1791 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 1792 ret <8 x i16> %shuffle 1793} 1794 1795define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) { 1796; SSE2-LABEL: shuffle_v8i16_0uuu1uuu: 1797; SSE2: # BB#0: 1798; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 1799; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] 1800; SSE2-NEXT: retq 1801; 1802; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu: 1803; SSSE3: # BB#0: 1804; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 1805; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] 1806; SSSE3-NEXT: retq 1807; 1808; SSE41-LABEL: shuffle_v8i16_0uuu1uuu: 1809; SSE41: # BB#0: 1810; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1811; SSE41-NEXT: retq 1812; 1813; AVX-LABEL: shuffle_v8i16_0uuu1uuu: 1814; AVX: # BB#0: 1815; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1816; AVX-NEXT: retq 1817 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef> 1818 ret <8 x i16> %shuffle 1819} 1820 1821define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) { 1822; SSE2-LABEL: shuffle_v8i16_0zzz1zzz: 1823; SSE2: # BB#0: 1824; SSE2-NEXT: pxor %xmm1, %xmm1 1825; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1826; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1827; SSE2-NEXT: retq 1828; 1829; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz: 1830; SSSE3: # BB#0: 1831; SSSE3-NEXT: pxor %xmm1, %xmm1 1832; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1833; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1834; SSSE3-NEXT: retq 1835; 1836; SSE41-LABEL: shuffle_v8i16_0zzz1zzz: 1837; SSE41: # BB#0: 1838; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1839; SSE41-NEXT: retq 1840; 1841; AVX-LABEL: shuffle_v8i16_0zzz1zzz: 1842; AVX: # BB#0: 1843; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1844; AVX-NEXT: retq 1845 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 1846 ret <8 x i16> %shuffle 1847} 1848 1849define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) { 1850; SSE2-LABEL: shuffle_v8i16_0u1u2u3u: 1851; SSE2: # BB#0: 1852; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1853; SSE2-NEXT: retq 1854; 1855; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u: 1856; SSSE3: # BB#0: 1857; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1858; SSSE3-NEXT: retq 1859; 1860; SSE41-LABEL: shuffle_v8i16_0u1u2u3u: 1861; SSE41: # BB#0: 1862; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1863; SSE41-NEXT: retq 1864; 1865; AVX-LABEL: shuffle_v8i16_0u1u2u3u: 1866; AVX: # BB#0: 1867; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1868; AVX-NEXT: retq 1869 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef> 1870 ret <8 x i16> %shuffle 1871} 1872 1873define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) { 1874; SSE2-LABEL: shuffle_v8i16_0z1z2z3z: 1875; SSE2: # BB#0: 1876; SSE2-NEXT: pxor %xmm1, %xmm1 1877; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1878; SSE2-NEXT: retq 1879; 1880; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z: 1881; SSSE3: # BB#0: 1882; SSSE3-NEXT: pxor %xmm1, %xmm1 1883; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1884; SSSE3-NEXT: retq 1885; 1886; SSE41-LABEL: shuffle_v8i16_0z1z2z3z: 1887; SSE41: # BB#0: 1888; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1889; SSE41-NEXT: retq 1890; 1891; AVX-LABEL: shuffle_v8i16_0z1z2z3z: 1892; AVX: # BB#0: 1893; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1894; AVX-NEXT: retq 1895 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 1896 ret <8 x i16> %shuffle 1897} 1898 1899; 1900; Shuffle to logical bit shifts 1901; 1902define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) { 1903; SSE-LABEL: shuffle_v8i16_z0z2z4z6: 1904; SSE: # BB#0: 1905; SSE-NEXT: pslld $16, %xmm0 1906; SSE-NEXT: retq 1907; 1908; AVX-LABEL: shuffle_v8i16_z0z2z4z6: 1909; AVX: # BB#0: 1910; AVX-NEXT: vpslld $16, %xmm0, %xmm0 1911; AVX-NEXT: retq 1912 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6> 1913 ret <8 x i16> %shuffle 1914} 1915 1916define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) { 1917; SSE-LABEL: shuffle_v8i16_zzz0zzz4: 1918; SSE: # BB#0: 1919; SSE-NEXT: psllq $48, %xmm0 1920; SSE-NEXT: retq 1921; 1922; AVX-LABEL: shuffle_v8i16_zzz0zzz4: 1923; AVX: # BB#0: 1924; AVX-NEXT: vpsllq $48, %xmm0, %xmm0 1925; AVX-NEXT: retq 1926 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4> 1927 ret <8 x i16> %shuffle 1928} 1929 1930define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) { 1931; SSE-LABEL: shuffle_v8i16_zz01zX4X: 1932; SSE: # BB#0: 1933; SSE-NEXT: psllq $32, %xmm0 1934; SSE-NEXT: retq 1935; 1936; AVX-LABEL: shuffle_v8i16_zz01zX4X: 1937; AVX: # BB#0: 1938; AVX-NEXT: vpsllq $32, %xmm0, %xmm0 1939; AVX-NEXT: retq 1940 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef> 1941 ret <8 x i16> %shuffle 1942} 1943 1944define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) { 1945; SSE-LABEL: shuffle_v8i16_z0X2z456: 1946; SSE: # BB#0: 1947; SSE-NEXT: psllq $16, %xmm0 1948; SSE-NEXT: retq 1949; 1950; AVX-LABEL: shuffle_v8i16_z0X2z456: 1951; AVX: # BB#0: 1952; AVX-NEXT: vpsllq $16, %xmm0, %xmm0 1953; AVX-NEXT: retq 1954 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6> 1955 ret <8 x i16> %shuffle 1956} 1957 1958define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) { 1959; SSE-LABEL: shuffle_v8i16_1z3zXz7z: 1960; SSE: # BB#0: 1961; SSE-NEXT: psrld $16, %xmm0 1962; SSE-NEXT: retq 1963; 1964; AVX-LABEL: shuffle_v8i16_1z3zXz7z: 1965; AVX: # BB#0: 1966; AVX-NEXT: vpsrld $16, %xmm0, %xmm0 1967; AVX-NEXT: retq 1968 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8> 1969 ret <8 x i16> %shuffle 1970} 1971 1972define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) { 1973; SSE-LABEL: shuffle_v8i16_1X3z567z: 1974; SSE: # BB#0: 1975; SSE-NEXT: psrlq $16, %xmm0 1976; SSE-NEXT: retq 1977; 1978; AVX-LABEL: shuffle_v8i16_1X3z567z: 1979; AVX: # BB#0: 1980; AVX-NEXT: vpsrlq $16, %xmm0, %xmm0 1981; AVX-NEXT: retq 1982 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8> 1983 ret <8 x i16> %shuffle 1984} 1985 1986define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) { 1987; SSE-LABEL: shuffle_v8i16_23zz67zz: 1988; SSE: # BB#0: 1989; SSE-NEXT: psrlq $32, %xmm0 1990; SSE-NEXT: retq 1991; 1992; AVX-LABEL: shuffle_v8i16_23zz67zz: 1993; AVX: # BB#0: 1994; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 1995; AVX-NEXT: retq 1996 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8> 1997 ret <8 x i16> %shuffle 1998} 1999 2000define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) { 2001; SSE-LABEL: shuffle_v8i16_3zXXXzzz: 2002; SSE: # BB#0: 2003; SSE-NEXT: psrlq $48, %xmm0 2004; SSE-NEXT: retq 2005; 2006; AVX-LABEL: shuffle_v8i16_3zXXXzzz: 2007; AVX: # BB#0: 2008; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0 2009; AVX-NEXT: retq 2010 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8> 2011 ret <8 x i16> %shuffle 2012} 2013 2014define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) { 2015; SSE-LABEL: shuffle_v8i16_01u3zzuz: 2016; SSE: # BB#0: 2017; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 2018; SSE-NEXT: retq 2019; 2020; AVX-LABEL: shuffle_v8i16_01u3zzuz: 2021; AVX: # BB#0: 2022; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2023; AVX-NEXT: retq 2024 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8> 2025 ret <8 x i16> %shuffle 2026} 2027 2028define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) { 2029; SSE2-LABEL: shuffle_v8i16_0z234567: 2030; SSE2: # BB#0: 2031; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 2032; SSE2-NEXT: retq 2033; 2034; SSSE3-LABEL: shuffle_v8i16_0z234567: 2035; SSSE3: # BB#0: 2036; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 2037; SSSE3-NEXT: retq 2038; 2039; SSE41-LABEL: shuffle_v8i16_0z234567: 2040; SSE41: # BB#0: 2041; SSE41-NEXT: pxor %xmm1, %xmm1 2042; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 2043; SSE41-NEXT: retq 2044; 2045; AVX-LABEL: shuffle_v8i16_0z234567: 2046; AVX: # BB#0: 2047; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2048; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 2049; AVX-NEXT: retq 2050 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2051 ret <8 x i16> %shuffle 2052} 2053 2054define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) { 2055; SSE2-LABEL: shuffle_v8i16_0zzzz5z7: 2056; SSE2: # BB#0: 2057; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 2058; SSE2-NEXT: retq 2059; 2060; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7: 2061; SSSE3: # BB#0: 2062; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 2063; SSSE3-NEXT: retq 2064; 2065; SSE41-LABEL: shuffle_v8i16_0zzzz5z7: 2066; SSE41: # BB#0: 2067; SSE41-NEXT: pxor %xmm1, %xmm1 2068; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7] 2069; SSE41-NEXT: retq 2070; 2071; AVX-LABEL: shuffle_v8i16_0zzzz5z7: 2072; AVX: # BB#0: 2073; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2074; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7] 2075; AVX-NEXT: retq 2076 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7> 2077 ret <8 x i16> %shuffle 2078} 2079 2080define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) { 2081; SSE2-LABEL: shuffle_v8i16_0123456z: 2082; SSE2: # BB#0: 2083; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 2084; SSE2-NEXT: retq 2085; 2086; SSSE3-LABEL: shuffle_v8i16_0123456z: 2087; SSSE3: # BB#0: 2088; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 2089; SSSE3-NEXT: retq 2090; 2091; SSE41-LABEL: shuffle_v8i16_0123456z: 2092; SSE41: # BB#0: 2093; SSE41-NEXT: pxor %xmm1, %xmm1 2094; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7] 2095; SSE41-NEXT: retq 2096; 2097; AVX-LABEL: shuffle_v8i16_0123456z: 2098; AVX: # BB#0: 2099; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2100; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7] 2101; AVX-NEXT: retq 2102 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15> 2103 ret <8 x i16> %shuffle 2104} 2105 2106define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) { 2107; SSE-LABEL: shuffle_v8i16_fu3ucc5u: 2108; SSE: # BB#0: 2109; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2110; SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] 2111; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2112; SSE-NEXT: movdqa %xmm1, %xmm0 2113; SSE-NEXT: retq 2114; 2115; AVX-LABEL: shuffle_v8i16_fu3ucc5u: 2116; AVX: # BB#0: 2117; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2118; AVX-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] 2119; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2120; AVX-NEXT: retq 2121 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef> 2122 ret <8 x i16> %shuffle 2123} 2124 2125define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) { 2126; SSE-LABEL: shuffle_v8i16_8012345u: 2127; SSE: # BB#0: 2128; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2129; SSE-NEXT: retq 2130; 2131; AVX-LABEL: shuffle_v8i16_8012345u: 2132; AVX: # BB#0: 2133; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2134; AVX-NEXT: retq 2135 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef> 2136 2137 ret <8 x i16> %shuffle 2138} 2139 2140define <8 x i16> @mask_v8i16_012345ef(<8 x i16> %a, <8 x i16> %b) { 2141; SSE2-LABEL: mask_v8i16_012345ef: 2142; SSE2: # BB#0: 2143; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 2144; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 2145; SSE2-NEXT: movaps %xmm1, %xmm0 2146; SSE2-NEXT: retq 2147; 2148; SSSE3-LABEL: mask_v8i16_012345ef: 2149; SSSE3: # BB#0: 2150; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 2151; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 2152; SSSE3-NEXT: movaps %xmm1, %xmm0 2153; SSSE3-NEXT: retq 2154; 2155; SSE41-LABEL: mask_v8i16_012345ef: 2156; SSE41: # BB#0: 2157; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] 2158; SSE41-NEXT: retq 2159; 2160; AVX1-LABEL: mask_v8i16_012345ef: 2161; AVX1: # BB#0: 2162; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] 2163; AVX1-NEXT: retq 2164; 2165; AVX2-LABEL: mask_v8i16_012345ef: 2166; AVX2: # BB#0: 2167; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 2168; AVX2-NEXT: retq 2169 %1 = bitcast <8 x i16> %a to <2 x i64> 2170 %2 = bitcast <8 x i16> %b to <2 x i64> 2171 %3 = and <2 x i64> %1, <i64 0, i64 -4294967296> 2172 %4 = and <2 x i64> %2, <i64 -1, i64 4294967295> 2173 %5 = or <2 x i64> %4, %3 2174 %6 = bitcast <2 x i64> %5 to <8 x i16> 2175 ret <8 x i16> %6 2176} 2177 2178define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) { 2179; SSE2-LABEL: insert_dup_mem_v8i16_i32: 2180; SSE2: # BB#0: 2181; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2182; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2183; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2184; SSE2-NEXT: retq 2185; 2186; SSSE3-LABEL: insert_dup_mem_v8i16_i32: 2187; SSSE3: # BB#0: 2188; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2189; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2190; SSSE3-NEXT: retq 2191; 2192; SSE41-LABEL: insert_dup_mem_v8i16_i32: 2193; SSE41: # BB#0: 2194; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2195; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2196; SSE41-NEXT: retq 2197; 2198; AVX1-LABEL: insert_dup_mem_v8i16_i32: 2199; AVX1: # BB#0: 2200; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2201; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2202; AVX1-NEXT: retq 2203; 2204; AVX2-LABEL: insert_dup_mem_v8i16_i32: 2205; AVX2: # BB#0: 2206; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0 2207; AVX2-NEXT: retq 2208 %tmp = load i32, i32* %ptr, align 4 2209 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2210 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 2211 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer 2212 ret <8 x i16> %tmp3 2213} 2214 2215define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) { 2216; SSE2-LABEL: insert_dup_mem_v8i16_sext_i16: 2217; SSE2: # BB#0: 2218; SSE2-NEXT: movswl (%rdi), %eax 2219; SSE2-NEXT: movd %eax, %xmm0 2220; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2221; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2222; SSE2-NEXT: retq 2223; 2224; SSSE3-LABEL: insert_dup_mem_v8i16_sext_i16: 2225; SSSE3: # BB#0: 2226; SSSE3-NEXT: movswl (%rdi), %eax 2227; SSSE3-NEXT: movd %eax, %xmm0 2228; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2229; SSSE3-NEXT: retq 2230; 2231; SSE41-LABEL: insert_dup_mem_v8i16_sext_i16: 2232; SSE41: # BB#0: 2233; SSE41-NEXT: movswl (%rdi), %eax 2234; SSE41-NEXT: movd %eax, %xmm0 2235; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2236; SSE41-NEXT: retq 2237; 2238; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16: 2239; AVX1: # BB#0: 2240; AVX1-NEXT: movswl (%rdi), %eax 2241; AVX1-NEXT: vmovd %eax, %xmm0 2242; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2243; AVX1-NEXT: retq 2244; 2245; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16: 2246; AVX2: # BB#0: 2247; AVX2-NEXT: movswl (%rdi), %eax 2248; AVX2-NEXT: vmovd %eax, %xmm0 2249; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2250; AVX2-NEXT: retq 2251 %tmp = load i16, i16* %ptr, align 2 2252 %tmp1 = sext i16 %tmp to i32 2253 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 2254 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 2255 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer 2256 ret <8 x i16> %tmp4 2257} 2258 2259define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) { 2260; SSE2-LABEL: insert_dup_elt1_mem_v8i16_i32: 2261; SSE2: # BB#0: 2262; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2263; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 2264; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2265; SSE2-NEXT: retq 2266; 2267; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_i32: 2268; SSSE3: # BB#0: 2269; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2270; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2271; SSSE3-NEXT: retq 2272; 2273; SSE41-LABEL: insert_dup_elt1_mem_v8i16_i32: 2274; SSE41: # BB#0: 2275; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2276; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2277; SSE41-NEXT: retq 2278; 2279; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32: 2280; AVX1: # BB#0: 2281; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2282; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2283; AVX1-NEXT: retq 2284; 2285; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i32: 2286; AVX2: # BB#0: 2287; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0 2288; AVX2-NEXT: retq 2289 %tmp = load i32, i32* %ptr, align 4 2290 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2291 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 2292 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 2293 ret <8 x i16> %tmp3 2294} 2295 2296define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) { 2297; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32: 2298; SSE2: # BB#0: 2299; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2300; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] 2301; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] 2302; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2303; SSE2-NEXT: retq 2304; 2305; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32: 2306; SSSE3: # BB#0: 2307; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2308; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2309; SSSE3-NEXT: retq 2310; 2311; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32: 2312; SSE41: # BB#0: 2313; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2314; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2315; SSE41-NEXT: retq 2316; 2317; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32: 2318; AVX1: # BB#0: 2319; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2320; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2321; AVX1-NEXT: retq 2322; 2323; AVX2-LABEL: insert_dup_elt3_mem_v8i16_i32: 2324; AVX2: # BB#0: 2325; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0 2326; AVX2-NEXT: retq 2327 %tmp = load i32, i32* %ptr, align 4 2328 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1 2329 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 2330 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2331 ret <8 x i16> %tmp3 2332} 2333 2334define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) { 2335; SSE2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2336; SSE2: # BB#0: 2337; SSE2-NEXT: movswl (%rdi), %eax 2338; SSE2-NEXT: movd %eax, %xmm0 2339; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 2340; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2341; SSE2-NEXT: retq 2342; 2343; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2344; SSSE3: # BB#0: 2345; SSSE3-NEXT: movswl (%rdi), %eax 2346; SSSE3-NEXT: movd %eax, %xmm0 2347; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2348; SSSE3-NEXT: retq 2349; 2350; SSE41-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2351; SSE41: # BB#0: 2352; SSE41-NEXT: movswl (%rdi), %eax 2353; SSE41-NEXT: movd %eax, %xmm0 2354; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2355; SSE41-NEXT: retq 2356; 2357; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2358; AVX1: # BB#0: 2359; AVX1-NEXT: movswl (%rdi), %eax 2360; AVX1-NEXT: vmovd %eax, %xmm0 2361; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2362; AVX1-NEXT: retq 2363; 2364; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2365; AVX2: # BB#0: 2366; AVX2-NEXT: movswl (%rdi), %eax 2367; AVX2-NEXT: shrl $16, %eax 2368; AVX2-NEXT: vmovd %eax, %xmm0 2369; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2370; AVX2-NEXT: retq 2371 %tmp = load i16, i16* %ptr, align 2 2372 %tmp1 = sext i16 %tmp to i32 2373 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 2374 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 2375 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 2376 ret <8 x i16> %tmp4 2377} 2378 2379define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) { 2380; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2381; SSE2: # BB#0: 2382; SSE2-NEXT: movswl (%rdi), %eax 2383; SSE2-NEXT: movd %eax, %xmm0 2384; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] 2385; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] 2386; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2387; SSE2-NEXT: retq 2388; 2389; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2390; SSSE3: # BB#0: 2391; SSSE3-NEXT: movswl (%rdi), %eax 2392; SSSE3-NEXT: movd %eax, %xmm0 2393; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2394; SSSE3-NEXT: retq 2395; 2396; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2397; SSE41: # BB#0: 2398; SSE41-NEXT: movswl (%rdi), %eax 2399; SSE41-NEXT: movd %eax, %xmm0 2400; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2401; SSE41-NEXT: retq 2402; 2403; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2404; AVX1: # BB#0: 2405; AVX1-NEXT: movswl (%rdi), %eax 2406; AVX1-NEXT: vmovd %eax, %xmm0 2407; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2408; AVX1-NEXT: retq 2409; 2410; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2411; AVX2: # BB#0: 2412; AVX2-NEXT: movswl (%rdi), %eax 2413; AVX2-NEXT: shrl $16, %eax 2414; AVX2-NEXT: vmovd %eax, %xmm0 2415; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2416; AVX2-NEXT: retq 2417 %tmp = load i16, i16* %ptr, align 2 2418 %tmp1 = sext i16 %tmp to i32 2419 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1 2420 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 2421 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2422 ret <8 x i16> %tmp4 2423} 2424