1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 8 9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 10target triple = "x86_64-unknown-unknown" 11 12define <4 x i32> @shuffle_v4i32_0001(<4 x i32> %a, <4 x i32> %b) { 13; SSE-LABEL: shuffle_v4i32_0001: 14; SSE: # BB#0: 15; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1] 16; SSE-NEXT: retq 17; 18; AVX-LABEL: shuffle_v4i32_0001: 19; AVX: # BB#0: 20; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1] 21; AVX-NEXT: retq 22 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 23 ret <4 x i32> %shuffle 24} 25define <4 x i32> @shuffle_v4i32_0020(<4 x i32> %a, <4 x i32> %b) { 26; SSE-LABEL: shuffle_v4i32_0020: 27; SSE: # BB#0: 28; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,0] 29; SSE-NEXT: retq 30; 31; AVX-LABEL: shuffle_v4i32_0020: 32; AVX: # BB#0: 33; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,0] 34; AVX-NEXT: retq 35 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 36 ret <4 x i32> %shuffle 37} 38define <4 x i32> @shuffle_v4i32_0112(<4 x i32> %a, <4 x i32> %b) { 39; SSE-LABEL: shuffle_v4i32_0112: 40; SSE: # BB#0: 41; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2] 42; SSE-NEXT: retq 43; 44; AVX-LABEL: shuffle_v4i32_0112: 45; AVX: # BB#0: 46; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2] 47; AVX-NEXT: retq 48 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2> 49 ret <4 x i32> %shuffle 50} 51define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) { 52; SSE-LABEL: shuffle_v4i32_0300: 53; SSE: # BB#0: 54; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,0,0] 55; SSE-NEXT: retq 56; 57; AVX-LABEL: shuffle_v4i32_0300: 58; AVX: # BB#0: 59; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,0] 60; AVX-NEXT: retq 61 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 62 ret <4 x i32> %shuffle 63} 64define <4 x i32> @shuffle_v4i32_1000(<4 x i32> %a, <4 x i32> %b) { 65; SSE-LABEL: shuffle_v4i32_1000: 66; SSE: # BB#0: 67; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0] 68; SSE-NEXT: retq 69; 70; AVX-LABEL: shuffle_v4i32_1000: 71; AVX: # BB#0: 72; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0] 73; AVX-NEXT: retq 74 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 75 ret <4 x i32> %shuffle 76} 77define <4 x i32> @shuffle_v4i32_2200(<4 x i32> %a, <4 x i32> %b) { 78; SSE-LABEL: shuffle_v4i32_2200: 79; SSE: # BB#0: 80; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,0,0] 81; SSE-NEXT: retq 82; 83; AVX-LABEL: shuffle_v4i32_2200: 84; AVX: # BB#0: 85; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,0,0] 86; AVX-NEXT: retq 87 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 88 ret <4 x i32> %shuffle 89} 90define <4 x i32> @shuffle_v4i32_3330(<4 x i32> %a, <4 x i32> %b) { 91; SSE-LABEL: shuffle_v4i32_3330: 92; SSE: # BB#0: 93; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,0] 94; SSE-NEXT: retq 95; 96; AVX-LABEL: shuffle_v4i32_3330: 97; AVX: # BB#0: 98; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,0] 99; AVX-NEXT: retq 100 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 101 ret <4 x i32> %shuffle 102} 103define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) { 104; SSE-LABEL: shuffle_v4i32_3210: 105; SSE: # BB#0: 106; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 107; SSE-NEXT: retq 108; 109; AVX-LABEL: shuffle_v4i32_3210: 110; AVX: # BB#0: 111; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 112; AVX-NEXT: retq 113 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 114 ret <4 x i32> %shuffle 115} 116 117define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) { 118; SSE-LABEL: shuffle_v4i32_2121: 119; SSE: # BB#0: 120; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,1] 121; SSE-NEXT: retq 122; 123; AVX-LABEL: shuffle_v4i32_2121: 124; AVX: # BB#0: 125; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,1] 126; AVX-NEXT: retq 127 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 1, i32 2, i32 1> 128 ret <4 x i32> %shuffle 129} 130 131define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) { 132; SSE-LABEL: shuffle_v4f32_0001: 133; SSE: # BB#0: 134; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,1] 135; SSE-NEXT: retq 136; 137; AVX-LABEL: shuffle_v4f32_0001: 138; AVX: # BB#0: 139; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,1] 140; AVX-NEXT: retq 141 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 142 ret <4 x float> %shuffle 143} 144define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) { 145; SSE-LABEL: shuffle_v4f32_0020: 146; SSE: # BB#0: 147; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,0] 148; SSE-NEXT: retq 149; 150; AVX-LABEL: shuffle_v4f32_0020: 151; AVX: # BB#0: 152; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,0] 153; AVX-NEXT: retq 154 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 155 ret <4 x float> %shuffle 156} 157define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) { 158; SSE-LABEL: shuffle_v4f32_0300: 159; SSE: # BB#0: 160; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,0,0] 161; SSE-NEXT: retq 162; 163; AVX-LABEL: shuffle_v4f32_0300: 164; AVX: # BB#0: 165; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,0,0] 166; AVX-NEXT: retq 167 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 168 ret <4 x float> %shuffle 169} 170define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) { 171; SSE-LABEL: shuffle_v4f32_1000: 172; SSE: # BB#0: 173; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0,0,0] 174; SSE-NEXT: retq 175; 176; AVX-LABEL: shuffle_v4f32_1000: 177; AVX: # BB#0: 178; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0] 179; AVX-NEXT: retq 180 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 181 ret <4 x float> %shuffle 182} 183define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) { 184; SSE-LABEL: shuffle_v4f32_2200: 185; SSE: # BB#0: 186; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2,0,0] 187; SSE-NEXT: retq 188; 189; AVX-LABEL: shuffle_v4f32_2200: 190; AVX: # BB#0: 191; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,0,0] 192; AVX-NEXT: retq 193 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 194 ret <4 x float> %shuffle 195} 196define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) { 197; SSE-LABEL: shuffle_v4f32_3330: 198; SSE: # BB#0: 199; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,0] 200; SSE-NEXT: retq 201; 202; AVX-LABEL: shuffle_v4f32_3330: 203; AVX: # BB#0: 204; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,0] 205; AVX-NEXT: retq 206 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 207 ret <4 x float> %shuffle 208} 209define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) { 210; SSE-LABEL: shuffle_v4f32_3210: 211; SSE: # BB#0: 212; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] 213; SSE-NEXT: retq 214; 215; AVX-LABEL: shuffle_v4f32_3210: 216; AVX: # BB#0: 217; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 218; AVX-NEXT: retq 219 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 220 ret <4 x float> %shuffle 221} 222define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) { 223; SSE-LABEL: shuffle_v4f32_0011: 224; SSE: # BB#0: 225; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1] 226; SSE-NEXT: retq 227; 228; AVX-LABEL: shuffle_v4f32_0011: 229; AVX: # BB#0: 230; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1] 231; AVX-NEXT: retq 232 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 233 ret <4 x float> %shuffle 234} 235define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) { 236; SSE-LABEL: shuffle_v4f32_2233: 237; SSE: # BB#0: 238; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3] 239; SSE-NEXT: retq 240; 241; AVX-LABEL: shuffle_v4f32_2233: 242; AVX: # BB#0: 243; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 244; AVX-NEXT: retq 245 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 246 ret <4 x float> %shuffle 247} 248define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) { 249; SSE2-LABEL: shuffle_v4f32_0022: 250; SSE2: # BB#0: 251; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,2] 252; SSE2-NEXT: retq 253; 254; SSE3-LABEL: shuffle_v4f32_0022: 255; SSE3: # BB#0: 256; SSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] 257; SSE3-NEXT: retq 258; 259; SSSE3-LABEL: shuffle_v4f32_0022: 260; SSSE3: # BB#0: 261; SSSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] 262; SSSE3-NEXT: retq 263; 264; SSE41-LABEL: shuffle_v4f32_0022: 265; SSE41: # BB#0: 266; SSE41-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] 267; SSE41-NEXT: retq 268; 269; AVX-LABEL: shuffle_v4f32_0022: 270; AVX: # BB#0: 271; AVX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] 272; AVX-NEXT: retq 273 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 274 ret <4 x float> %shuffle 275} 276define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) { 277; SSE2-LABEL: shuffle_v4f32_1133: 278; SSE2: # BB#0: 279; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3] 280; SSE2-NEXT: retq 281; 282; SSE3-LABEL: shuffle_v4f32_1133: 283; SSE3: # BB#0: 284; SSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 285; SSE3-NEXT: retq 286; 287; SSSE3-LABEL: shuffle_v4f32_1133: 288; SSSE3: # BB#0: 289; SSSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 290; SSSE3-NEXT: retq 291; 292; SSE41-LABEL: shuffle_v4f32_1133: 293; SSE41: # BB#0: 294; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 295; SSE41-NEXT: retq 296; 297; AVX-LABEL: shuffle_v4f32_1133: 298; AVX: # BB#0: 299; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 300; AVX-NEXT: retq 301 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 302 ret <4 x float> %shuffle 303} 304 305define <4 x float> @shuffle_v4f32_0145(<4 x float> %a, <4 x float> %b) { 306; SSE-LABEL: shuffle_v4f32_0145: 307; SSE: # BB#0: 308; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 309; SSE-NEXT: retq 310; 311; AVX-LABEL: shuffle_v4f32_0145: 312; AVX: # BB#0: 313; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 314; AVX-NEXT: retq 315 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 316 ret <4 x float> %shuffle 317} 318 319define <4 x float> @shuffle_v4f32_6723(<4 x float> %a, <4 x float> %b) { 320; SSE-LABEL: shuffle_v4f32_6723: 321; SSE: # BB#0: 322; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 323; SSE-NEXT: movapd %xmm1, %xmm0 324; SSE-NEXT: retq 325; 326; AVX-LABEL: shuffle_v4f32_6723: 327; AVX: # BB#0: 328; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] 329; AVX-NEXT: retq 330 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 331 ret <4 x float> %shuffle 332} 333 334define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) { 335; SSE2-LABEL: shuffle_v4i32_0124: 336; SSE2: # BB#0: 337; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 338; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 339; SSE2-NEXT: retq 340; 341; SSE3-LABEL: shuffle_v4i32_0124: 342; SSE3: # BB#0: 343; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 344; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 345; SSE3-NEXT: retq 346; 347; SSSE3-LABEL: shuffle_v4i32_0124: 348; SSSE3: # BB#0: 349; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 350; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 351; SSSE3-NEXT: retq 352; 353; SSE41-LABEL: shuffle_v4i32_0124: 354; SSE41: # BB#0: 355; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 356; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] 357; SSE41-NEXT: retq 358; 359; AVX1-LABEL: shuffle_v4i32_0124: 360; AVX1: # BB#0: 361; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 362; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] 363; AVX1-NEXT: retq 364; 365; AVX2-LABEL: shuffle_v4i32_0124: 366; AVX2: # BB#0: 367; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 368; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 369; AVX2-NEXT: retq 370 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 371 ret <4 x i32> %shuffle 372} 373define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) { 374; SSE2-LABEL: shuffle_v4i32_0142: 375; SSE2: # BB#0: 376; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 377; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 378; SSE2-NEXT: retq 379; 380; SSE3-LABEL: shuffle_v4i32_0142: 381; SSE3: # BB#0: 382; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 383; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 384; SSE3-NEXT: retq 385; 386; SSSE3-LABEL: shuffle_v4i32_0142: 387; SSSE3: # BB#0: 388; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 389; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 390; SSSE3-NEXT: retq 391; 392; SSE41-LABEL: shuffle_v4i32_0142: 393; SSE41: # BB#0: 394; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 395; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2] 396; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 397; SSE41-NEXT: retq 398; 399; AVX1-LABEL: shuffle_v4i32_0142: 400; AVX1: # BB#0: 401; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 402; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2] 403; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 404; AVX1-NEXT: retq 405; 406; AVX2-LABEL: shuffle_v4i32_0142: 407; AVX2: # BB#0: 408; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1 409; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2] 410; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 411; AVX2-NEXT: retq 412 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2> 413 ret <4 x i32> %shuffle 414} 415define <4 x i32> @shuffle_v4i32_0412(<4 x i32> %a, <4 x i32> %b) { 416; SSE2-LABEL: shuffle_v4i32_0412: 417; SSE2: # BB#0: 418; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] 419; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2] 420; SSE2-NEXT: movaps %xmm1, %xmm0 421; SSE2-NEXT: retq 422; 423; SSE3-LABEL: shuffle_v4i32_0412: 424; SSE3: # BB#0: 425; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] 426; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2] 427; SSE3-NEXT: movaps %xmm1, %xmm0 428; SSE3-NEXT: retq 429; 430; SSSE3-LABEL: shuffle_v4i32_0412: 431; SSSE3: # BB#0: 432; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] 433; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2] 434; SSSE3-NEXT: movaps %xmm1, %xmm0 435; SSSE3-NEXT: retq 436; 437; SSE41-LABEL: shuffle_v4i32_0412: 438; SSE41: # BB#0: 439; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 440; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2] 441; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 442; SSE41-NEXT: retq 443; 444; AVX1-LABEL: shuffle_v4i32_0412: 445; AVX1: # BB#0: 446; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 447; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2] 448; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 449; AVX1-NEXT: retq 450; 451; AVX2-LABEL: shuffle_v4i32_0412: 452; AVX2: # BB#0: 453; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 454; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2] 455; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 456; AVX2-NEXT: retq 457 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2> 458 ret <4 x i32> %shuffle 459} 460define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) { 461; SSE2-LABEL: shuffle_v4i32_4012: 462; SSE2: # BB#0: 463; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] 464; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2] 465; SSE2-NEXT: movaps %xmm1, %xmm0 466; SSE2-NEXT: retq 467; 468; SSE3-LABEL: shuffle_v4i32_4012: 469; SSE3: # BB#0: 470; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] 471; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2] 472; SSE3-NEXT: movaps %xmm1, %xmm0 473; SSE3-NEXT: retq 474; 475; SSSE3-LABEL: shuffle_v4i32_4012: 476; SSSE3: # BB#0: 477; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] 478; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2] 479; SSSE3-NEXT: movaps %xmm1, %xmm0 480; SSSE3-NEXT: retq 481; 482; SSE41-LABEL: shuffle_v4i32_4012: 483; SSE41: # BB#0: 484; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,2] 485; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 486; SSE41-NEXT: retq 487; 488; AVX1-LABEL: shuffle_v4i32_4012: 489; AVX1: # BB#0: 490; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,2] 491; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 492; AVX1-NEXT: retq 493; 494; AVX2-LABEL: shuffle_v4i32_4012: 495; AVX2: # BB#0: 496; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,2] 497; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 498; AVX2-NEXT: retq 499 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2> 500 ret <4 x i32> %shuffle 501} 502define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) { 503; SSE-LABEL: shuffle_v4i32_0145: 504; SSE: # BB#0: 505; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 506; SSE-NEXT: retq 507; 508; AVX-LABEL: shuffle_v4i32_0145: 509; AVX: # BB#0: 510; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 511; AVX-NEXT: retq 512 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 513 ret <4 x i32> %shuffle 514} 515define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) { 516; SSE2-LABEL: shuffle_v4i32_0451: 517; SSE2: # BB#0: 518; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 519; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 520; SSE2-NEXT: retq 521; 522; SSE3-LABEL: shuffle_v4i32_0451: 523; SSE3: # BB#0: 524; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 525; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 526; SSE3-NEXT: retq 527; 528; SSSE3-LABEL: shuffle_v4i32_0451: 529; SSSE3: # BB#0: 530; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 531; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 532; SSSE3-NEXT: retq 533; 534; SSE41-LABEL: shuffle_v4i32_0451: 535; SSE41: # BB#0: 536; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 537; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 538; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7] 539; SSE41-NEXT: retq 540; 541; AVX1-LABEL: shuffle_v4i32_0451: 542; AVX1: # BB#0: 543; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 544; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 545; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7] 546; AVX1-NEXT: retq 547; 548; AVX2-LABEL: shuffle_v4i32_0451: 549; AVX2: # BB#0: 550; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 551; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 552; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 553; AVX2-NEXT: retq 554 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1> 555 ret <4 x i32> %shuffle 556} 557define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) { 558; SSE-LABEL: shuffle_v4i32_4501: 559; SSE: # BB#0: 560; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 561; SSE-NEXT: movdqa %xmm1, %xmm0 562; SSE-NEXT: retq 563; 564; AVX-LABEL: shuffle_v4i32_4501: 565; AVX: # BB#0: 566; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 567; AVX-NEXT: retq 568 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 569 ret <4 x i32> %shuffle 570} 571define <4 x i32> @shuffle_v4i32_4015(<4 x i32> %a, <4 x i32> %b) { 572; SSE2-LABEL: shuffle_v4i32_4015: 573; SSE2: # BB#0: 574; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 575; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 576; SSE2-NEXT: retq 577; 578; SSE3-LABEL: shuffle_v4i32_4015: 579; SSE3: # BB#0: 580; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 581; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 582; SSE3-NEXT: retq 583; 584; SSSE3-LABEL: shuffle_v4i32_4015: 585; SSSE3: # BB#0: 586; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 587; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 588; SSSE3-NEXT: retq 589; 590; SSE41-LABEL: shuffle_v4i32_4015: 591; SSE41: # BB#0: 592; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 593; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 594; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7] 595; SSE41-NEXT: retq 596; 597; AVX1-LABEL: shuffle_v4i32_4015: 598; AVX1: # BB#0: 599; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 600; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 601; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7] 602; AVX1-NEXT: retq 603; 604; AVX2-LABEL: shuffle_v4i32_4015: 605; AVX2: # BB#0: 606; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1 607; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 608; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 609; AVX2-NEXT: retq 610 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5> 611 ret <4 x i32> %shuffle 612} 613 614define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) { 615; SSE2-LABEL: shuffle_v4f32_4zzz: 616; SSE2: # BB#0: 617; SSE2-NEXT: xorps %xmm1, %xmm1 618; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 619; SSE2-NEXT: movaps %xmm1, %xmm0 620; SSE2-NEXT: retq 621; 622; SSE3-LABEL: shuffle_v4f32_4zzz: 623; SSE3: # BB#0: 624; SSE3-NEXT: xorps %xmm1, %xmm1 625; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 626; SSE3-NEXT: movaps %xmm1, %xmm0 627; SSE3-NEXT: retq 628; 629; SSSE3-LABEL: shuffle_v4f32_4zzz: 630; SSSE3: # BB#0: 631; SSSE3-NEXT: xorps %xmm1, %xmm1 632; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 633; SSSE3-NEXT: movaps %xmm1, %xmm0 634; SSSE3-NEXT: retq 635; 636; SSE41-LABEL: shuffle_v4f32_4zzz: 637; SSE41: # BB#0: 638; SSE41-NEXT: xorps %xmm1, %xmm1 639; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 640; SSE41-NEXT: retq 641; 642; AVX-LABEL: shuffle_v4f32_4zzz: 643; AVX: # BB#0: 644; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 645; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 646; AVX-NEXT: retq 647 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 648 ret <4 x float> %shuffle 649} 650 651define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) { 652; SSE2-LABEL: shuffle_v4f32_z4zz: 653; SSE2: # BB#0: 654; SSE2-NEXT: xorps %xmm1, %xmm1 655; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 656; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 657; SSE2-NEXT: retq 658; 659; SSE3-LABEL: shuffle_v4f32_z4zz: 660; SSE3: # BB#0: 661; SSE3-NEXT: xorps %xmm1, %xmm1 662; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 663; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 664; SSE3-NEXT: retq 665; 666; SSSE3-LABEL: shuffle_v4f32_z4zz: 667; SSSE3: # BB#0: 668; SSSE3-NEXT: xorps %xmm1, %xmm1 669; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 670; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 671; SSSE3-NEXT: retq 672; 673; SSE41-LABEL: shuffle_v4f32_z4zz: 674; SSE41: # BB#0: 675; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero 676; SSE41-NEXT: retq 677; 678; AVX-LABEL: shuffle_v4f32_z4zz: 679; AVX: # BB#0: 680; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero 681; AVX-NEXT: retq 682 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0> 683 ret <4 x float> %shuffle 684} 685 686define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) { 687; SSE2-LABEL: shuffle_v4f32_zz4z: 688; SSE2: # BB#0: 689; SSE2-NEXT: xorps %xmm1, %xmm1 690; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 691; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] 692; SSE2-NEXT: movaps %xmm1, %xmm0 693; SSE2-NEXT: retq 694; 695; SSE3-LABEL: shuffle_v4f32_zz4z: 696; SSE3: # BB#0: 697; SSE3-NEXT: xorps %xmm1, %xmm1 698; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 699; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] 700; SSE3-NEXT: movaps %xmm1, %xmm0 701; SSE3-NEXT: retq 702; 703; SSSE3-LABEL: shuffle_v4f32_zz4z: 704; SSSE3: # BB#0: 705; SSSE3-NEXT: xorps %xmm1, %xmm1 706; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 707; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] 708; SSSE3-NEXT: movaps %xmm1, %xmm0 709; SSSE3-NEXT: retq 710; 711; SSE41-LABEL: shuffle_v4f32_zz4z: 712; SSE41: # BB#0: 713; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero 714; SSE41-NEXT: retq 715; 716; AVX-LABEL: shuffle_v4f32_zz4z: 717; AVX: # BB#0: 718; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero 719; AVX-NEXT: retq 720 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0> 721 ret <4 x float> %shuffle 722} 723 724define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) { 725; SSE2-LABEL: shuffle_v4f32_zuu4: 726; SSE2: # BB#0: 727; SSE2-NEXT: xorps %xmm1, %xmm1 728; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 729; SSE2-NEXT: movaps %xmm1, %xmm0 730; SSE2-NEXT: retq 731; 732; SSE3-LABEL: shuffle_v4f32_zuu4: 733; SSE3: # BB#0: 734; SSE3-NEXT: xorps %xmm1, %xmm1 735; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 736; SSE3-NEXT: movaps %xmm1, %xmm0 737; SSE3-NEXT: retq 738; 739; SSSE3-LABEL: shuffle_v4f32_zuu4: 740; SSSE3: # BB#0: 741; SSSE3-NEXT: xorps %xmm1, %xmm1 742; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 743; SSSE3-NEXT: movaps %xmm1, %xmm0 744; SSSE3-NEXT: retq 745; 746; SSE41-LABEL: shuffle_v4f32_zuu4: 747; SSE41: # BB#0: 748; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0] 749; SSE41-NEXT: retq 750; 751; AVX-LABEL: shuffle_v4f32_zuu4: 752; AVX: # BB#0: 753; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0] 754; AVX-NEXT: retq 755 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4> 756 ret <4 x float> %shuffle 757} 758 759define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) { 760; SSE2-LABEL: shuffle_v4f32_zzz7: 761; SSE2: # BB#0: 762; SSE2-NEXT: xorps %xmm1, %xmm1 763; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 764; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 765; SSE2-NEXT: movaps %xmm1, %xmm0 766; SSE2-NEXT: retq 767; 768; SSE3-LABEL: shuffle_v4f32_zzz7: 769; SSE3: # BB#0: 770; SSE3-NEXT: xorps %xmm1, %xmm1 771; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 772; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 773; SSE3-NEXT: movaps %xmm1, %xmm0 774; SSE3-NEXT: retq 775; 776; SSSE3-LABEL: shuffle_v4f32_zzz7: 777; SSSE3: # BB#0: 778; SSSE3-NEXT: xorps %xmm1, %xmm1 779; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 780; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 781; SSSE3-NEXT: movaps %xmm1, %xmm0 782; SSSE3-NEXT: retq 783; 784; SSE41-LABEL: shuffle_v4f32_zzz7: 785; SSE41: # BB#0: 786; SSE41-NEXT: xorps %xmm1, %xmm1 787; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 788; SSE41-NEXT: retq 789; 790; AVX-LABEL: shuffle_v4f32_zzz7: 791; AVX: # BB#0: 792; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 793; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 794; AVX-NEXT: retq 795 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 796 ret <4 x float> %shuffle 797} 798 799define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) { 800; SSE2-LABEL: shuffle_v4f32_z6zz: 801; SSE2: # BB#0: 802; SSE2-NEXT: xorps %xmm1, %xmm1 803; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] 804; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 805; SSE2-NEXT: retq 806; 807; SSE3-LABEL: shuffle_v4f32_z6zz: 808; SSE3: # BB#0: 809; SSE3-NEXT: xorps %xmm1, %xmm1 810; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] 811; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 812; SSE3-NEXT: retq 813; 814; SSSE3-LABEL: shuffle_v4f32_z6zz: 815; SSSE3: # BB#0: 816; SSSE3-NEXT: xorps %xmm1, %xmm1 817; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] 818; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 819; SSSE3-NEXT: retq 820; 821; SSE41-LABEL: shuffle_v4f32_z6zz: 822; SSE41: # BB#0: 823; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero 824; SSE41-NEXT: retq 825; 826; AVX-LABEL: shuffle_v4f32_z6zz: 827; AVX: # BB#0: 828; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero 829; AVX-NEXT: retq 830 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3> 831 ret <4 x float> %shuffle 832} 833 834define <4 x float> @shuffle_v4f32_0z23(<4 x float> %a) { 835; SSE2-LABEL: shuffle_v4f32_0z23: 836; SSE2: # BB#0: 837; SSE2-NEXT: xorps %xmm1, %xmm1 838; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] 839; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 840; SSE2-NEXT: movaps %xmm1, %xmm0 841; SSE2-NEXT: retq 842; 843; SSE3-LABEL: shuffle_v4f32_0z23: 844; SSE3: # BB#0: 845; SSE3-NEXT: xorps %xmm1, %xmm1 846; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] 847; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 848; SSE3-NEXT: movaps %xmm1, %xmm0 849; SSE3-NEXT: retq 850; 851; SSSE3-LABEL: shuffle_v4f32_0z23: 852; SSSE3: # BB#0: 853; SSSE3-NEXT: xorps %xmm1, %xmm1 854; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] 855; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 856; SSSE3-NEXT: movaps %xmm1, %xmm0 857; SSSE3-NEXT: retq 858; 859; SSE41-LABEL: shuffle_v4f32_0z23: 860; SSE41: # BB#0: 861; SSE41-NEXT: xorps %xmm1, %xmm1 862; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 863; SSE41-NEXT: retq 864; 865; AVX-LABEL: shuffle_v4f32_0z23: 866; AVX: # BB#0: 867; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 868; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 869; AVX-NEXT: retq 870 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3> 871 ret <4 x float> %shuffle 872} 873 874define <4 x float> @shuffle_v4f32_01z3(<4 x float> %a) { 875; SSE2-LABEL: shuffle_v4f32_01z3: 876; SSE2: # BB#0: 877; SSE2-NEXT: xorps %xmm1, %xmm1 878; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0] 879; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 880; SSE2-NEXT: retq 881; 882; SSE3-LABEL: shuffle_v4f32_01z3: 883; SSE3: # BB#0: 884; SSE3-NEXT: xorps %xmm1, %xmm1 885; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0] 886; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 887; SSE3-NEXT: retq 888; 889; SSSE3-LABEL: shuffle_v4f32_01z3: 890; SSSE3: # BB#0: 891; SSSE3-NEXT: xorps %xmm1, %xmm1 892; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0] 893; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 894; SSSE3-NEXT: retq 895; 896; SSE41-LABEL: shuffle_v4f32_01z3: 897; SSE41: # BB#0: 898; SSE41-NEXT: xorps %xmm1, %xmm1 899; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 900; SSE41-NEXT: retq 901; 902; AVX-LABEL: shuffle_v4f32_01z3: 903; AVX: # BB#0: 904; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 905; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 906; AVX-NEXT: retq 907 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3> 908 ret <4 x float> %shuffle 909} 910 911define <4 x float> @shuffle_v4f32_012z(<4 x float> %a) { 912; SSE2-LABEL: shuffle_v4f32_012z: 913; SSE2: # BB#0: 914; SSE2-NEXT: xorps %xmm1, %xmm1 915; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0] 916; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 917; SSE2-NEXT: retq 918; 919; SSE3-LABEL: shuffle_v4f32_012z: 920; SSE3: # BB#0: 921; SSE3-NEXT: xorps %xmm1, %xmm1 922; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0] 923; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 924; SSE3-NEXT: retq 925; 926; SSSE3-LABEL: shuffle_v4f32_012z: 927; SSSE3: # BB#0: 928; SSSE3-NEXT: xorps %xmm1, %xmm1 929; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0] 930; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 931; SSSE3-NEXT: retq 932; 933; SSE41-LABEL: shuffle_v4f32_012z: 934; SSE41: # BB#0: 935; SSE41-NEXT: xorps %xmm1, %xmm1 936; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 937; SSE41-NEXT: retq 938; 939; AVX-LABEL: shuffle_v4f32_012z: 940; AVX: # BB#0: 941; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 942; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 943; AVX-NEXT: retq 944 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 945 ret <4 x float> %shuffle 946} 947 948define <4 x float> @shuffle_v4f32_0zz3(<4 x float> %a) { 949; SSE2-LABEL: shuffle_v4f32_0zz3: 950; SSE2: # BB#0: 951; SSE2-NEXT: xorps %xmm1, %xmm1 952; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2] 953; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1] 954; SSE2-NEXT: retq 955; 956; SSE3-LABEL: shuffle_v4f32_0zz3: 957; SSE3: # BB#0: 958; SSE3-NEXT: xorps %xmm1, %xmm1 959; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2] 960; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1] 961; SSE3-NEXT: retq 962; 963; SSSE3-LABEL: shuffle_v4f32_0zz3: 964; SSSE3: # BB#0: 965; SSSE3-NEXT: xorps %xmm1, %xmm1 966; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2] 967; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1] 968; SSSE3-NEXT: retq 969; 970; SSE41-LABEL: shuffle_v4f32_0zz3: 971; SSE41: # BB#0: 972; SSE41-NEXT: xorps %xmm1, %xmm1 973; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 974; SSE41-NEXT: retq 975; 976; AVX-LABEL: shuffle_v4f32_0zz3: 977; AVX: # BB#0: 978; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 979; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 980; AVX-NEXT: retq 981 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3> 982 ret <4 x float> %shuffle 983} 984 985define <4 x float> @shuffle_v4f32_0z2z(<4 x float> %v) { 986; SSE2-LABEL: shuffle_v4f32_0z2z: 987; SSE2: # BB#0: 988; SSE2-NEXT: xorps %xmm1, %xmm1 989; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0] 990; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 991; SSE2-NEXT: retq 992; 993; SSE3-LABEL: shuffle_v4f32_0z2z: 994; SSE3: # BB#0: 995; SSE3-NEXT: xorps %xmm1, %xmm1 996; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0] 997; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 998; SSE3-NEXT: retq 999; 1000; SSSE3-LABEL: shuffle_v4f32_0z2z: 1001; SSSE3: # BB#0: 1002; SSSE3-NEXT: xorps %xmm1, %xmm1 1003; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0] 1004; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 1005; SSSE3-NEXT: retq 1006; 1007; SSE41-LABEL: shuffle_v4f32_0z2z: 1008; SSE41: # BB#0: 1009; SSE41-NEXT: xorps %xmm1, %xmm1 1010; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1011; SSE41-NEXT: retq 1012; 1013; AVX-LABEL: shuffle_v4f32_0z2z: 1014; AVX: # BB#0: 1015; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1016; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1017; AVX-NEXT: retq 1018 %shuffle = shufflevector <4 x float> %v, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 2, i32 4> 1019 ret <4 x float> %shuffle 1020} 1021 1022define <4 x float> @shuffle_v4f32_u051(<4 x float> %a, <4 x float> %b) { 1023; SSE-LABEL: shuffle_v4f32_u051: 1024; SSE: # BB#0: 1025; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1026; SSE-NEXT: movaps %xmm1, %xmm0 1027; SSE-NEXT: retq 1028; 1029; AVX-LABEL: shuffle_v4f32_u051: 1030; AVX: # BB#0: 1031; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1032; AVX-NEXT: retq 1033 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 undef, i32 0, i32 5, i32 1> 1034 ret <4 x float> %shuffle 1035} 1036 1037define <4 x float> @shuffle_v4f32_0zz4(<4 x float> %a, <4 x float> %b) { 1038; SSE2-LABEL: shuffle_v4f32_0zz4: 1039; SSE2: # BB#0: 1040; SSE2-NEXT: xorps %xmm2, %xmm2 1041; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,0] 1042; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0] 1043; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 1044; SSE2-NEXT: movaps %xmm2, %xmm0 1045; SSE2-NEXT: retq 1046; 1047; SSE3-LABEL: shuffle_v4f32_0zz4: 1048; SSE3: # BB#0: 1049; SSE3-NEXT: xorps %xmm2, %xmm2 1050; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,0] 1051; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0] 1052; SSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 1053; SSE3-NEXT: movaps %xmm2, %xmm0 1054; SSE3-NEXT: retq 1055; 1056; SSSE3-LABEL: shuffle_v4f32_0zz4: 1057; SSSE3: # BB#0: 1058; SSSE3-NEXT: xorps %xmm2, %xmm2 1059; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,0] 1060; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0] 1061; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 1062; SSSE3-NEXT: movaps %xmm2, %xmm0 1063; SSSE3-NEXT: retq 1064; 1065; SSE41-LABEL: shuffle_v4f32_0zz4: 1066; SSE41: # BB#0: 1067; SSE41-NEXT: insertps {{.*#+}} xmm1 = zero,zero,zero,xmm1[0] 1068; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1069; SSE41-NEXT: retq 1070; 1071; AVX-LABEL: shuffle_v4f32_0zz4: 1072; AVX: # BB#0: 1073; AVX-NEXT: vinsertps {{.*#+}} xmm1 = zero,zero,zero,xmm1[0] 1074; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1075; AVX-NEXT: retq 1076 %shuffle = shufflevector <4 x float> %b, <4 x float> zeroinitializer, <4 x i32> <i32 undef, i32 5, i32 6, i32 0> 1077 %shuffle1 = shufflevector <4 x float> %a, <4 x float> %shuffle, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1078 ret <4 x float> %shuffle1 1079} 1080 1081define <4 x float> @shuffle_v4f32_0zz6(<4 x float> %a, <4 x float> %b) { 1082; SSE2-LABEL: shuffle_v4f32_0zz6: 1083; SSE2: # BB#0: 1084; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2] 1085; SSE2-NEXT: xorps %xmm1, %xmm1 1086; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3] 1087; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0,1,3] 1088; SSE2-NEXT: movaps %xmm1, %xmm0 1089; SSE2-NEXT: retq 1090; 1091; SSE3-LABEL: shuffle_v4f32_0zz6: 1092; SSE3: # BB#0: 1093; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2] 1094; SSE3-NEXT: xorps %xmm1, %xmm1 1095; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3] 1096; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0,1,3] 1097; SSE3-NEXT: movaps %xmm1, %xmm0 1098; SSE3-NEXT: retq 1099; 1100; SSSE3-LABEL: shuffle_v4f32_0zz6: 1101; SSSE3: # BB#0: 1102; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2] 1103; SSSE3-NEXT: xorps %xmm1, %xmm1 1104; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3] 1105; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0,1,3] 1106; SSSE3-NEXT: movaps %xmm1, %xmm0 1107; SSSE3-NEXT: retq 1108; 1109; SSE41-LABEL: shuffle_v4f32_0zz6: 1110; SSE41: # BB#0: 1111; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2] 1112; SSE41-NEXT: retq 1113; 1114; AVX-LABEL: shuffle_v4f32_0zz6: 1115; AVX: # BB#0: 1116; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2] 1117; AVX-NEXT: retq 1118 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 6> 1119 %shuffle1 = shufflevector <4 x float> zeroinitializer, <4 x float> %shuffle, <4 x i32> <i32 4, i32 1, i32 2, i32 7> 1120 ret <4 x float> %shuffle1 1121} 1122 1123define <4 x float> @shuffle_v4f32_0z24(<4 x float> %a, <4 x float> %b) { 1124; SSE2-LABEL: shuffle_v4f32_0z24: 1125; SSE2: # BB#0: 1126; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 1127; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 1128; SSE2-NEXT: xorps %xmm1, %xmm1 1129; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] 1130; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 1131; SSE2-NEXT: movaps %xmm1, %xmm0 1132; SSE2-NEXT: retq 1133; 1134; SSE3-LABEL: shuffle_v4f32_0z24: 1135; SSE3: # BB#0: 1136; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 1137; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 1138; SSE3-NEXT: xorps %xmm1, %xmm1 1139; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] 1140; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 1141; SSE3-NEXT: movaps %xmm1, %xmm0 1142; SSE3-NEXT: retq 1143; 1144; SSSE3-LABEL: shuffle_v4f32_0z24: 1145; SSSE3: # BB#0: 1146; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 1147; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 1148; SSSE3-NEXT: xorps %xmm1, %xmm1 1149; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] 1150; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 1151; SSSE3-NEXT: movaps %xmm1, %xmm0 1152; SSSE3-NEXT: retq 1153; 1154; SSE41-LABEL: shuffle_v4f32_0z24: 1155; SSE41: # BB#0: 1156; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0] 1157; SSE41-NEXT: retq 1158; 1159; AVX-LABEL: shuffle_v4f32_0z24: 1160; AVX: # BB#0: 1161; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0] 1162; AVX-NEXT: retq 1163 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 undef, i32 2, i32 4> 1164 %shuffle1 = shufflevector <4 x float> zeroinitializer, <4 x float> %shuffle, <4 x i32> <i32 4, i32 1, i32 6, i32 7> 1165 ret <4 x float> %shuffle1 1166} 1167 1168define <4 x i32> @shuffle_v4i32_4zzz(<4 x i32> %a) { 1169; SSE2-LABEL: shuffle_v4i32_4zzz: 1170; SSE2: # BB#0: 1171; SSE2-NEXT: xorps %xmm1, %xmm1 1172; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1173; SSE2-NEXT: movaps %xmm1, %xmm0 1174; SSE2-NEXT: retq 1175; 1176; SSE3-LABEL: shuffle_v4i32_4zzz: 1177; SSE3: # BB#0: 1178; SSE3-NEXT: xorps %xmm1, %xmm1 1179; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1180; SSE3-NEXT: movaps %xmm1, %xmm0 1181; SSE3-NEXT: retq 1182; 1183; SSSE3-LABEL: shuffle_v4i32_4zzz: 1184; SSSE3: # BB#0: 1185; SSSE3-NEXT: xorps %xmm1, %xmm1 1186; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1187; SSSE3-NEXT: movaps %xmm1, %xmm0 1188; SSSE3-NEXT: retq 1189; 1190; SSE41-LABEL: shuffle_v4i32_4zzz: 1191; SSE41: # BB#0: 1192; SSE41-NEXT: pxor %xmm1, %xmm1 1193; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1194; SSE41-NEXT: retq 1195; 1196; AVX-LABEL: shuffle_v4i32_4zzz: 1197; AVX: # BB#0: 1198; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1199; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1200; AVX-NEXT: retq 1201 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 1202 ret <4 x i32> %shuffle 1203} 1204 1205define <4 x i32> @shuffle_v4i32_z4zz(<4 x i32> %a) { 1206; SSE2-LABEL: shuffle_v4i32_z4zz: 1207; SSE2: # BB#0: 1208; SSE2-NEXT: xorps %xmm1, %xmm1 1209; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1210; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1] 1211; SSE2-NEXT: retq 1212; 1213; SSE3-LABEL: shuffle_v4i32_z4zz: 1214; SSE3: # BB#0: 1215; SSE3-NEXT: xorps %xmm1, %xmm1 1216; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1217; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1] 1218; SSE3-NEXT: retq 1219; 1220; SSSE3-LABEL: shuffle_v4i32_z4zz: 1221; SSSE3: # BB#0: 1222; SSSE3-NEXT: xorps %xmm1, %xmm1 1223; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1224; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1] 1225; SSSE3-NEXT: retq 1226; 1227; SSE41-LABEL: shuffle_v4i32_z4zz: 1228; SSE41: # BB#0: 1229; SSE41-NEXT: pxor %xmm1, %xmm1 1230; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1231; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1] 1232; SSE41-NEXT: retq 1233; 1234; AVX-LABEL: shuffle_v4i32_z4zz: 1235; AVX: # BB#0: 1236; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1237; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1238; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] 1239; AVX-NEXT: retq 1240 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0> 1241 ret <4 x i32> %shuffle 1242} 1243 1244define <4 x i32> @shuffle_v4i32_zz4z(<4 x i32> %a) { 1245; SSE2-LABEL: shuffle_v4i32_zz4z: 1246; SSE2: # BB#0: 1247; SSE2-NEXT: xorps %xmm1, %xmm1 1248; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1249; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1] 1250; SSE2-NEXT: retq 1251; 1252; SSE3-LABEL: shuffle_v4i32_zz4z: 1253; SSE3: # BB#0: 1254; SSE3-NEXT: xorps %xmm1, %xmm1 1255; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1256; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1] 1257; SSE3-NEXT: retq 1258; 1259; SSSE3-LABEL: shuffle_v4i32_zz4z: 1260; SSSE3: # BB#0: 1261; SSSE3-NEXT: xorps %xmm1, %xmm1 1262; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1263; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1] 1264; SSSE3-NEXT: retq 1265; 1266; SSE41-LABEL: shuffle_v4i32_zz4z: 1267; SSE41: # BB#0: 1268; SSE41-NEXT: pxor %xmm1, %xmm1 1269; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1270; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1] 1271; SSE41-NEXT: retq 1272; 1273; AVX-LABEL: shuffle_v4i32_zz4z: 1274; AVX: # BB#0: 1275; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1276; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1277; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1] 1278; AVX-NEXT: retq 1279 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0> 1280 ret <4 x i32> %shuffle 1281} 1282 1283define <4 x i32> @shuffle_v4i32_zuu4(<4 x i32> %a) { 1284; SSE-LABEL: shuffle_v4i32_zuu4: 1285; SSE: # BB#0: 1286; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] 1287; SSE-NEXT: retq 1288; 1289; AVX-LABEL: shuffle_v4i32_zuu4: 1290; AVX: # BB#0: 1291; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] 1292; AVX-NEXT: retq 1293 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4> 1294 ret <4 x i32> %shuffle 1295} 1296 1297define <4 x i32> @shuffle_v4i32_z6zz(<4 x i32> %a) { 1298; SSE2-LABEL: shuffle_v4i32_z6zz: 1299; SSE2: # BB#0: 1300; SSE2-NEXT: xorps %xmm1, %xmm1 1301; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] 1302; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1303; SSE2-NEXT: retq 1304; 1305; SSE3-LABEL: shuffle_v4i32_z6zz: 1306; SSE3: # BB#0: 1307; SSE3-NEXT: xorps %xmm1, %xmm1 1308; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] 1309; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1310; SSE3-NEXT: retq 1311; 1312; SSSE3-LABEL: shuffle_v4i32_z6zz: 1313; SSSE3: # BB#0: 1314; SSSE3-NEXT: xorps %xmm1, %xmm1 1315; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] 1316; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1317; SSSE3-NEXT: retq 1318; 1319; SSE41-LABEL: shuffle_v4i32_z6zz: 1320; SSE41: # BB#0: 1321; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3] 1322; SSE41-NEXT: pxor %xmm0, %xmm0 1323; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 1324; SSE41-NEXT: retq 1325; 1326; AVX1-LABEL: shuffle_v4i32_z6zz: 1327; AVX1: # BB#0: 1328; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1329; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1330; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] 1331; AVX1-NEXT: retq 1332; 1333; AVX2-LABEL: shuffle_v4i32_z6zz: 1334; AVX2: # BB#0: 1335; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1336; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1337; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1338; AVX2-NEXT: retq 1339 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3> 1340 ret <4 x i32> %shuffle 1341} 1342 1343define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) { 1344; SSE2-LABEL: shuffle_v4i32_7012: 1345; SSE2: # BB#0: 1346; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0] 1347; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2] 1348; SSE2-NEXT: movaps %xmm1, %xmm0 1349; SSE2-NEXT: retq 1350; 1351; SSE3-LABEL: shuffle_v4i32_7012: 1352; SSE3: # BB#0: 1353; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0] 1354; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2] 1355; SSE3-NEXT: movaps %xmm1, %xmm0 1356; SSE3-NEXT: retq 1357; 1358; SSSE3-LABEL: shuffle_v4i32_7012: 1359; SSSE3: # BB#0: 1360; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] 1361; SSSE3-NEXT: retq 1362; 1363; SSE41-LABEL: shuffle_v4i32_7012: 1364; SSE41: # BB#0: 1365; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] 1366; SSE41-NEXT: retq 1367; 1368; AVX-LABEL: shuffle_v4i32_7012: 1369; AVX: # BB#0: 1370; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] 1371; AVX-NEXT: retq 1372 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2> 1373 ret <4 x i32> %shuffle 1374} 1375 1376define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) { 1377; SSE2-LABEL: shuffle_v4i32_6701: 1378; SSE2: # BB#0: 1379; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 1380; SSE2-NEXT: movapd %xmm1, %xmm0 1381; SSE2-NEXT: retq 1382; 1383; SSE3-LABEL: shuffle_v4i32_6701: 1384; SSE3: # BB#0: 1385; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 1386; SSE3-NEXT: movapd %xmm1, %xmm0 1387; SSE3-NEXT: retq 1388; 1389; SSSE3-LABEL: shuffle_v4i32_6701: 1390; SSSE3: # BB#0: 1391; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 1392; SSSE3-NEXT: retq 1393; 1394; SSE41-LABEL: shuffle_v4i32_6701: 1395; SSE41: # BB#0: 1396; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 1397; SSE41-NEXT: retq 1398; 1399; AVX-LABEL: shuffle_v4i32_6701: 1400; AVX: # BB#0: 1401; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 1402; AVX-NEXT: retq 1403 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 1404 ret <4 x i32> %shuffle 1405} 1406 1407define <4 x i32> @shuffle_v4i32_5670(<4 x i32> %a, <4 x i32> %b) { 1408; SSE2-LABEL: shuffle_v4i32_5670: 1409; SSE2: # BB#0: 1410; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 1411; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0] 1412; SSE2-NEXT: movaps %xmm1, %xmm0 1413; SSE2-NEXT: retq 1414; 1415; SSE3-LABEL: shuffle_v4i32_5670: 1416; SSE3: # BB#0: 1417; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 1418; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0] 1419; SSE3-NEXT: movaps %xmm1, %xmm0 1420; SSE3-NEXT: retq 1421; 1422; SSSE3-LABEL: shuffle_v4i32_5670: 1423; SSSE3: # BB#0: 1424; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] 1425; SSSE3-NEXT: retq 1426; 1427; SSE41-LABEL: shuffle_v4i32_5670: 1428; SSE41: # BB#0: 1429; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] 1430; SSE41-NEXT: retq 1431; 1432; AVX-LABEL: shuffle_v4i32_5670: 1433; AVX: # BB#0: 1434; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] 1435; AVX-NEXT: retq 1436 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 6, i32 7, i32 0> 1437 ret <4 x i32> %shuffle 1438} 1439 1440define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) { 1441; SSE2-LABEL: shuffle_v4i32_1234: 1442; SSE2: # BB#0: 1443; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 1444; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0] 1445; SSE2-NEXT: retq 1446; 1447; SSE3-LABEL: shuffle_v4i32_1234: 1448; SSE3: # BB#0: 1449; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 1450; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0] 1451; SSE3-NEXT: retq 1452; 1453; SSSE3-LABEL: shuffle_v4i32_1234: 1454; SSSE3: # BB#0: 1455; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] 1456; SSSE3-NEXT: movdqa %xmm1, %xmm0 1457; SSSE3-NEXT: retq 1458; 1459; SSE41-LABEL: shuffle_v4i32_1234: 1460; SSE41: # BB#0: 1461; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] 1462; SSE41-NEXT: movdqa %xmm1, %xmm0 1463; SSE41-NEXT: retq 1464; 1465; AVX-LABEL: shuffle_v4i32_1234: 1466; AVX: # BB#0: 1467; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] 1468; AVX-NEXT: retq 1469 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 1470 ret <4 x i32> %shuffle 1471} 1472 1473define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) { 1474; SSE2-LABEL: shuffle_v4i32_2345: 1475; SSE2: # BB#0: 1476; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 1477; SSE2-NEXT: retq 1478; 1479; SSE3-LABEL: shuffle_v4i32_2345: 1480; SSE3: # BB#0: 1481; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 1482; SSE3-NEXT: retq 1483; 1484; SSSE3-LABEL: shuffle_v4i32_2345: 1485; SSSE3: # BB#0: 1486; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 1487; SSSE3-NEXT: movdqa %xmm1, %xmm0 1488; SSSE3-NEXT: retq 1489; 1490; SSE41-LABEL: shuffle_v4i32_2345: 1491; SSE41: # BB#0: 1492; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 1493; SSE41-NEXT: movdqa %xmm1, %xmm0 1494; SSE41-NEXT: retq 1495; 1496; AVX-LABEL: shuffle_v4i32_2345: 1497; AVX: # BB#0: 1498; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 1499; AVX-NEXT: retq 1500 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 1501 ret <4 x i32> %shuffle 1502} 1503 1504define <4 x i32> @shuffle_v4i32_40u1(<4 x i32> %a, <4 x i32> %b) { 1505; SSE-LABEL: shuffle_v4i32_40u1: 1506; SSE: # BB#0: 1507; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1508; SSE-NEXT: movdqa %xmm1, %xmm0 1509; SSE-NEXT: retq 1510; 1511; AVX-LABEL: shuffle_v4i32_40u1: 1512; AVX: # BB#0: 1513; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1514; AVX-NEXT: retq 1515 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 1> 1516 ret <4 x i32> %shuffle 1517} 1518 1519define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) { 1520; SSE2-LABEL: shuffle_v4i32_3456: 1521; SSE2: # BB#0: 1522; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0] 1523; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2] 1524; SSE2-NEXT: retq 1525; 1526; SSE3-LABEL: shuffle_v4i32_3456: 1527; SSE3: # BB#0: 1528; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0] 1529; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2] 1530; SSE3-NEXT: retq 1531; 1532; SSSE3-LABEL: shuffle_v4i32_3456: 1533; SSSE3: # BB#0: 1534; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] 1535; SSSE3-NEXT: movdqa %xmm1, %xmm0 1536; SSSE3-NEXT: retq 1537; 1538; SSE41-LABEL: shuffle_v4i32_3456: 1539; SSE41: # BB#0: 1540; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] 1541; SSE41-NEXT: movdqa %xmm1, %xmm0 1542; SSE41-NEXT: retq 1543; 1544; AVX-LABEL: shuffle_v4i32_3456: 1545; AVX: # BB#0: 1546; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] 1547; AVX-NEXT: retq 1548 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6> 1549 ret <4 x i32> %shuffle 1550} 1551 1552define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) { 1553; SSE2-LABEL: shuffle_v4i32_0u1u: 1554; SSE2: # BB#0: 1555; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 1556; SSE2-NEXT: retq 1557; 1558; SSE3-LABEL: shuffle_v4i32_0u1u: 1559; SSE3: # BB#0: 1560; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 1561; SSE3-NEXT: retq 1562; 1563; SSSE3-LABEL: shuffle_v4i32_0u1u: 1564; SSSE3: # BB#0: 1565; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 1566; SSSE3-NEXT: retq 1567; 1568; SSE41-LABEL: shuffle_v4i32_0u1u: 1569; SSE41: # BB#0: 1570; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1571; SSE41-NEXT: retq 1572; 1573; AVX-LABEL: shuffle_v4i32_0u1u: 1574; AVX: # BB#0: 1575; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1576; AVX-NEXT: retq 1577 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef> 1578 ret <4 x i32> %shuffle 1579} 1580 1581define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) { 1582; SSE2-LABEL: shuffle_v4i32_0z1z: 1583; SSE2: # BB#0: 1584; SSE2-NEXT: pxor %xmm1, %xmm1 1585; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1586; SSE2-NEXT: retq 1587; 1588; SSE3-LABEL: shuffle_v4i32_0z1z: 1589; SSE3: # BB#0: 1590; SSE3-NEXT: pxor %xmm1, %xmm1 1591; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1592; SSE3-NEXT: retq 1593; 1594; SSSE3-LABEL: shuffle_v4i32_0z1z: 1595; SSSE3: # BB#0: 1596; SSSE3-NEXT: pxor %xmm1, %xmm1 1597; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1598; SSSE3-NEXT: retq 1599; 1600; SSE41-LABEL: shuffle_v4i32_0z1z: 1601; SSE41: # BB#0: 1602; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1603; SSE41-NEXT: retq 1604; 1605; AVX-LABEL: shuffle_v4i32_0z1z: 1606; AVX: # BB#0: 1607; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1608; AVX-NEXT: retq 1609 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 1610 ret <4 x i32> %shuffle 1611} 1612 1613define <4 x i32> @shuffle_v4i32_01zu(<4 x i32> %a) { 1614; SSE-LABEL: shuffle_v4i32_01zu: 1615; SSE: # BB#0: 1616; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1617; SSE-NEXT: retq 1618; 1619; AVX-LABEL: shuffle_v4i32_01zu: 1620; AVX: # BB#0: 1621; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1622; AVX-NEXT: retq 1623 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 7, i32 undef> 1624 ret <4 x i32> %shuffle 1625} 1626 1627define <4 x i32> @shuffle_v4i32_0z23(<4 x i32> %a) { 1628; SSE2-LABEL: shuffle_v4i32_0z23: 1629; SSE2: # BB#0: 1630; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 1631; SSE2-NEXT: retq 1632; 1633; SSE3-LABEL: shuffle_v4i32_0z23: 1634; SSE3: # BB#0: 1635; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 1636; SSE3-NEXT: retq 1637; 1638; SSSE3-LABEL: shuffle_v4i32_0z23: 1639; SSSE3: # BB#0: 1640; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 1641; SSSE3-NEXT: retq 1642; 1643; SSE41-LABEL: shuffle_v4i32_0z23: 1644; SSE41: # BB#0: 1645; SSE41-NEXT: pxor %xmm1, %xmm1 1646; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 1647; SSE41-NEXT: retq 1648; 1649; AVX1-LABEL: shuffle_v4i32_0z23: 1650; AVX1: # BB#0: 1651; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1652; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 1653; AVX1-NEXT: retq 1654; 1655; AVX2-LABEL: shuffle_v4i32_0z23: 1656; AVX2: # BB#0: 1657; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1658; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 1659; AVX2-NEXT: retq 1660 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3> 1661 ret <4 x i32> %shuffle 1662} 1663 1664define <4 x i32> @shuffle_v4i32_01z3(<4 x i32> %a) { 1665; SSE2-LABEL: shuffle_v4i32_01z3: 1666; SSE2: # BB#0: 1667; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 1668; SSE2-NEXT: retq 1669; 1670; SSE3-LABEL: shuffle_v4i32_01z3: 1671; SSE3: # BB#0: 1672; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 1673; SSE3-NEXT: retq 1674; 1675; SSSE3-LABEL: shuffle_v4i32_01z3: 1676; SSSE3: # BB#0: 1677; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 1678; SSSE3-NEXT: retq 1679; 1680; SSE41-LABEL: shuffle_v4i32_01z3: 1681; SSE41: # BB#0: 1682; SSE41-NEXT: pxor %xmm1, %xmm1 1683; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 1684; SSE41-NEXT: retq 1685; 1686; AVX1-LABEL: shuffle_v4i32_01z3: 1687; AVX1: # BB#0: 1688; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1689; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 1690; AVX1-NEXT: retq 1691; 1692; AVX2-LABEL: shuffle_v4i32_01z3: 1693; AVX2: # BB#0: 1694; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1695; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 1696; AVX2-NEXT: retq 1697 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3> 1698 ret <4 x i32> %shuffle 1699} 1700 1701define <4 x i32> @shuffle_v4i32_012z(<4 x i32> %a) { 1702; SSE2-LABEL: shuffle_v4i32_012z: 1703; SSE2: # BB#0: 1704; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 1705; SSE2-NEXT: retq 1706; 1707; SSE3-LABEL: shuffle_v4i32_012z: 1708; SSE3: # BB#0: 1709; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 1710; SSE3-NEXT: retq 1711; 1712; SSSE3-LABEL: shuffle_v4i32_012z: 1713; SSSE3: # BB#0: 1714; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 1715; SSSE3-NEXT: retq 1716; 1717; SSE41-LABEL: shuffle_v4i32_012z: 1718; SSE41: # BB#0: 1719; SSE41-NEXT: pxor %xmm1, %xmm1 1720; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] 1721; SSE41-NEXT: retq 1722; 1723; AVX1-LABEL: shuffle_v4i32_012z: 1724; AVX1: # BB#0: 1725; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1726; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] 1727; AVX1-NEXT: retq 1728; 1729; AVX2-LABEL: shuffle_v4i32_012z: 1730; AVX2: # BB#0: 1731; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1732; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 1733; AVX2-NEXT: retq 1734 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 1735 ret <4 x i32> %shuffle 1736} 1737 1738define <4 x i32> @shuffle_v4i32_0zz3(<4 x i32> %a) { 1739; SSE2-LABEL: shuffle_v4i32_0zz3: 1740; SSE2: # BB#0: 1741; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 1742; SSE2-NEXT: retq 1743; 1744; SSE3-LABEL: shuffle_v4i32_0zz3: 1745; SSE3: # BB#0: 1746; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 1747; SSE3-NEXT: retq 1748; 1749; SSSE3-LABEL: shuffle_v4i32_0zz3: 1750; SSSE3: # BB#0: 1751; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 1752; SSSE3-NEXT: retq 1753; 1754; SSE41-LABEL: shuffle_v4i32_0zz3: 1755; SSE41: # BB#0: 1756; SSE41-NEXT: pxor %xmm1, %xmm1 1757; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7] 1758; SSE41-NEXT: retq 1759; 1760; AVX1-LABEL: shuffle_v4i32_0zz3: 1761; AVX1: # BB#0: 1762; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1763; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7] 1764; AVX1-NEXT: retq 1765; 1766; AVX2-LABEL: shuffle_v4i32_0zz3: 1767; AVX2: # BB#0: 1768; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1769; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 1770; AVX2-NEXT: retq 1771 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3> 1772 ret <4 x i32> %shuffle 1773} 1774 1775define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b) { 1776; SSE-LABEL: shuffle_v4i32_bitcast_0415: 1777; SSE: # BB#0: 1778; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1779; SSE-NEXT: retq 1780; 1781; AVX-LABEL: shuffle_v4i32_bitcast_0415: 1782; AVX: # BB#0: 1783; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1784; AVX-NEXT: retq 1785 %shuffle32 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 0, i32 4> 1786 %bitcast64 = bitcast <4 x i32> %shuffle32 to <2 x double> 1787 %shuffle64 = shufflevector <2 x double> %bitcast64, <2 x double> undef, <2 x i32> <i32 1, i32 0> 1788 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x i32> 1789 ret <4 x i32> %bitcast32 1790} 1791 1792define <4 x float> @shuffle_v4f32_bitcast_4401(<4 x float> %a, <4 x i32> %b) { 1793; SSE-LABEL: shuffle_v4f32_bitcast_4401: 1794; SSE: # BB#0: 1795; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1796; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1797; SSE-NEXT: movapd %xmm1, %xmm0 1798; SSE-NEXT: retq 1799; 1800; AVX-LABEL: shuffle_v4f32_bitcast_4401: 1801; AVX: # BB#0: 1802; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1803; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1804; AVX-NEXT: retq 1805 %1 = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 1806 %2 = bitcast <4 x i32> %1 to <2 x double> 1807 %3 = bitcast <4 x float> %a to <2 x double> 1808 %4 = shufflevector <2 x double> %2, <2 x double> %3, <2 x i32> <i32 0, i32 2> 1809 %5 = bitcast <2 x double> %4 to <4 x float> 1810 ret <4 x float> %5 1811} 1812 1813define <4 x float> @shuffle_v4f32_bitcast_0045(<4 x float> %a, <4 x i32> %b) { 1814; SSE-LABEL: shuffle_v4f32_bitcast_0045: 1815; SSE: # BB#0: 1816; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1] 1817; SSE-NEXT: retq 1818; 1819; AVX-LABEL: shuffle_v4f32_bitcast_0045: 1820; AVX: # BB#0: 1821; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1] 1822; AVX-NEXT: retq 1823 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 1824 %2 = bitcast <4 x i32> %b to <4 x float> 1825 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 1, i32 0, i32 4, i32 5> 1826 ret <4 x float> %3 1827} 1828 1829define <4 x float> @mask_v4f32_4127(<4 x float> %a, <4 x float> %b) { 1830; SSE2-LABEL: mask_v4f32_4127: 1831; SSE2: # BB#0: 1832; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[1,2] 1833; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1] 1834; SSE2-NEXT: movaps %xmm1, %xmm0 1835; SSE2-NEXT: retq 1836; 1837; SSE3-LABEL: mask_v4f32_4127: 1838; SSE3: # BB#0: 1839; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[1,2] 1840; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1] 1841; SSE3-NEXT: movaps %xmm1, %xmm0 1842; SSE3-NEXT: retq 1843; 1844; SSSE3-LABEL: mask_v4f32_4127: 1845; SSSE3: # BB#0: 1846; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[1,2] 1847; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1] 1848; SSSE3-NEXT: movaps %xmm1, %xmm0 1849; SSSE3-NEXT: retq 1850; 1851; SSE41-LABEL: mask_v4f32_4127: 1852; SSE41: # BB#0: 1853; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 1854; SSE41-NEXT: retq 1855; 1856; AVX-LABEL: mask_v4f32_4127: 1857; AVX: # BB#0: 1858; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 1859; AVX-NEXT: retq 1860 %1 = bitcast <4 x float> %a to <4 x i32> 1861 %2 = bitcast <4 x float> %b to <4 x i32> 1862 %3 = and <4 x i32> %1, <i32 0, i32 -1, i32 -1, i32 0> 1863 %4 = and <4 x i32> %2, <i32 -1, i32 0, i32 0, i32 -1> 1864 %5 = or <4 x i32> %4, %3 1865 %6 = bitcast <4 x i32> %5 to <4 x float> 1866 ret <4 x float> %6 1867} 1868 1869define <4 x float> @mask_v4f32_0127(<4 x float> %a, <4 x float> %b) { 1870; SSE2-LABEL: mask_v4f32_0127: 1871; SSE2: # BB#0: 1872; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 1873; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 1874; SSE2-NEXT: movaps %xmm1, %xmm0 1875; SSE2-NEXT: retq 1876; 1877; SSE3-LABEL: mask_v4f32_0127: 1878; SSE3: # BB#0: 1879; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 1880; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 1881; SSE3-NEXT: movaps %xmm1, %xmm0 1882; SSE3-NEXT: retq 1883; 1884; SSSE3-LABEL: mask_v4f32_0127: 1885; SSSE3: # BB#0: 1886; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 1887; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 1888; SSSE3-NEXT: movaps %xmm1, %xmm0 1889; SSSE3-NEXT: retq 1890; 1891; SSE41-LABEL: mask_v4f32_0127: 1892; SSE41: # BB#0: 1893; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 1894; SSE41-NEXT: retq 1895; 1896; AVX-LABEL: mask_v4f32_0127: 1897; AVX: # BB#0: 1898; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 1899; AVX-NEXT: retq 1900 %1 = bitcast <4 x float> %a to <2 x i64> 1901 %2 = bitcast <4 x float> %b to <2 x i64> 1902 %3 = and <2 x i64> %1, <i64 0, i64 -4294967296> 1903 %4 = and <2 x i64> %2, <i64 -1, i64 4294967295> 1904 %5 = or <2 x i64> %4, %3 1905 %6 = bitcast <2 x i64> %5 to <4 x float> 1906 ret <4 x float> %6 1907} 1908 1909define <4 x i32> @mask_v4i32_0127(<4 x i32> %a, <4 x i32> %b) { 1910; SSE2-LABEL: mask_v4i32_0127: 1911; SSE2: # BB#0: 1912; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 1913; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 1914; SSE2-NEXT: movaps %xmm1, %xmm0 1915; SSE2-NEXT: retq 1916; 1917; SSE3-LABEL: mask_v4i32_0127: 1918; SSE3: # BB#0: 1919; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 1920; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 1921; SSE3-NEXT: movaps %xmm1, %xmm0 1922; SSE3-NEXT: retq 1923; 1924; SSSE3-LABEL: mask_v4i32_0127: 1925; SSSE3: # BB#0: 1926; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 1927; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 1928; SSSE3-NEXT: movaps %xmm1, %xmm0 1929; SSSE3-NEXT: retq 1930; 1931; SSE41-LABEL: mask_v4i32_0127: 1932; SSE41: # BB#0: 1933; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] 1934; SSE41-NEXT: retq 1935; 1936; AVX1-LABEL: mask_v4i32_0127: 1937; AVX1: # BB#0: 1938; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] 1939; AVX1-NEXT: retq 1940; 1941; AVX2-LABEL: mask_v4i32_0127: 1942; AVX2: # BB#0: 1943; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 1944; AVX2-NEXT: retq 1945 %1 = bitcast <4 x i32> %a to <2 x i64> 1946 %2 = bitcast <4 x i32> %b to <2 x i64> 1947 %3 = and <2 x i64> %1, <i64 0, i64 -4294967296> 1948 %4 = and <2 x i64> %2, <i64 -1, i64 4294967295> 1949 %5 = or <2 x i64> %4, %3 1950 %6 = bitcast <2 x i64> %5 to <4 x i32> 1951 ret <4 x i32> %6 1952} 1953 1954define <4 x float> @broadcast_v4f32_0101_from_v2f32(<2 x float>* %x) { 1955; SSE2-LABEL: broadcast_v4f32_0101_from_v2f32: 1956; SSE2: # BB#0: 1957; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1958; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1959; SSE2-NEXT: retq 1960; 1961; SSE3-LABEL: broadcast_v4f32_0101_from_v2f32: 1962; SSE3: # BB#0: 1963; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1964; SSE3-NEXT: retq 1965; 1966; SSSE3-LABEL: broadcast_v4f32_0101_from_v2f32: 1967; SSSE3: # BB#0: 1968; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1969; SSSE3-NEXT: retq 1970; 1971; SSE41-LABEL: broadcast_v4f32_0101_from_v2f32: 1972; SSE41: # BB#0: 1973; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1974; SSE41-NEXT: retq 1975; 1976; AVX-LABEL: broadcast_v4f32_0101_from_v2f32: 1977; AVX: # BB#0: 1978; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1979; AVX-NEXT: retq 1980 %1 = load <2 x float>, <2 x float>* %x, align 1 1981 %2 = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1982 ret <4 x float> %2 1983} 1984 1985define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) { 1986; SSE-LABEL: insert_reg_and_zero_v4i32: 1987; SSE: # BB#0: 1988; SSE-NEXT: movd %edi, %xmm0 1989; SSE-NEXT: retq 1990; 1991; AVX-LABEL: insert_reg_and_zero_v4i32: 1992; AVX: # BB#0: 1993; AVX-NEXT: vmovd %edi, %xmm0 1994; AVX-NEXT: retq 1995 %v = insertelement <4 x i32> undef, i32 %a, i32 0 1996 %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1997 ret <4 x i32> %shuffle 1998} 1999 2000define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) { 2001; SSE-LABEL: insert_mem_and_zero_v4i32: 2002; SSE: # BB#0: 2003; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2004; SSE-NEXT: retq 2005; 2006; AVX-LABEL: insert_mem_and_zero_v4i32: 2007; AVX: # BB#0: 2008; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2009; AVX-NEXT: retq 2010 %a = load i32, i32* %ptr 2011 %v = insertelement <4 x i32> undef, i32 %a, i32 0 2012 %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 2013 ret <4 x i32> %shuffle 2014} 2015 2016define <4 x float> @insert_reg_and_zero_v4f32(float %a) { 2017; SSE2-LABEL: insert_reg_and_zero_v4f32: 2018; SSE2: # BB#0: 2019; SSE2-NEXT: xorps %xmm1, %xmm1 2020; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 2021; SSE2-NEXT: movaps %xmm1, %xmm0 2022; SSE2-NEXT: retq 2023; 2024; SSE3-LABEL: insert_reg_and_zero_v4f32: 2025; SSE3: # BB#0: 2026; SSE3-NEXT: xorps %xmm1, %xmm1 2027; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 2028; SSE3-NEXT: movaps %xmm1, %xmm0 2029; SSE3-NEXT: retq 2030; 2031; SSSE3-LABEL: insert_reg_and_zero_v4f32: 2032; SSSE3: # BB#0: 2033; SSSE3-NEXT: xorps %xmm1, %xmm1 2034; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 2035; SSSE3-NEXT: movaps %xmm1, %xmm0 2036; SSSE3-NEXT: retq 2037; 2038; SSE41-LABEL: insert_reg_and_zero_v4f32: 2039; SSE41: # BB#0: 2040; SSE41-NEXT: xorps %xmm1, %xmm1 2041; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 2042; SSE41-NEXT: retq 2043; 2044; AVX-LABEL: insert_reg_and_zero_v4f32: 2045; AVX: # BB#0: 2046; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 2047; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 2048; AVX-NEXT: retq 2049 %v = insertelement <4 x float> undef, float %a, i32 0 2050 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 2051 ret <4 x float> %shuffle 2052} 2053 2054define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) { 2055; SSE-LABEL: insert_mem_and_zero_v4f32: 2056; SSE: # BB#0: 2057; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2058; SSE-NEXT: retq 2059; 2060; AVX-LABEL: insert_mem_and_zero_v4f32: 2061; AVX: # BB#0: 2062; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2063; AVX-NEXT: retq 2064 %a = load float, float* %ptr 2065 %v = insertelement <4 x float> undef, float %a, i32 0 2066 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 2067 ret <4 x float> %shuffle 2068} 2069 2070define <4 x i32> @insert_reg_lo_v4i32(i64 %a, <4 x i32> %b) { 2071; SSE2-LABEL: insert_reg_lo_v4i32: 2072; SSE2: # BB#0: 2073; SSE2-NEXT: movd %rdi, %xmm1 2074; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2075; SSE2-NEXT: retq 2076; 2077; SSE3-LABEL: insert_reg_lo_v4i32: 2078; SSE3: # BB#0: 2079; SSE3-NEXT: movd %rdi, %xmm1 2080; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2081; SSE3-NEXT: retq 2082; 2083; SSSE3-LABEL: insert_reg_lo_v4i32: 2084; SSSE3: # BB#0: 2085; SSSE3-NEXT: movd %rdi, %xmm1 2086; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2087; SSSE3-NEXT: retq 2088; 2089; SSE41-LABEL: insert_reg_lo_v4i32: 2090; SSE41: # BB#0: 2091; SSE41-NEXT: movd %rdi, %xmm1 2092; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 2093; SSE41-NEXT: retq 2094; 2095; AVX1-LABEL: insert_reg_lo_v4i32: 2096; AVX1: # BB#0: 2097; AVX1-NEXT: vmovq %rdi, %xmm1 2098; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 2099; AVX1-NEXT: retq 2100; 2101; AVX2-LABEL: insert_reg_lo_v4i32: 2102; AVX2: # BB#0: 2103; AVX2-NEXT: vmovq %rdi, %xmm1 2104; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 2105; AVX2-NEXT: retq 2106 %a.cast = bitcast i64 %a to <2 x i32> 2107 %v = shufflevector <2 x i32> %a.cast, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2108 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 2109 ret <4 x i32> %shuffle 2110} 2111 2112define <4 x i32> @insert_mem_lo_v4i32(<2 x i32>* %ptr, <4 x i32> %b) { 2113; SSE2-LABEL: insert_mem_lo_v4i32: 2114; SSE2: # BB#0: 2115; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 2116; SSE2-NEXT: retq 2117; 2118; SSE3-LABEL: insert_mem_lo_v4i32: 2119; SSE3: # BB#0: 2120; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 2121; SSE3-NEXT: retq 2122; 2123; SSSE3-LABEL: insert_mem_lo_v4i32: 2124; SSSE3: # BB#0: 2125; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 2126; SSSE3-NEXT: retq 2127; 2128; SSE41-LABEL: insert_mem_lo_v4i32: 2129; SSE41: # BB#0: 2130; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 2131; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 2132; SSE41-NEXT: retq 2133; 2134; AVX1-LABEL: insert_mem_lo_v4i32: 2135; AVX1: # BB#0: 2136; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 2137; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 2138; AVX1-NEXT: retq 2139; 2140; AVX2-LABEL: insert_mem_lo_v4i32: 2141; AVX2: # BB#0: 2142; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 2143; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 2144; AVX2-NEXT: retq 2145 %a = load <2 x i32>, <2 x i32>* %ptr 2146 %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2147 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 2148 ret <4 x i32> %shuffle 2149} 2150 2151define <4 x i32> @insert_reg_hi_v4i32(i64 %a, <4 x i32> %b) { 2152; SSE-LABEL: insert_reg_hi_v4i32: 2153; SSE: # BB#0: 2154; SSE-NEXT: movd %rdi, %xmm1 2155; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2156; SSE-NEXT: retq 2157; 2158; AVX-LABEL: insert_reg_hi_v4i32: 2159; AVX: # BB#0: 2160; AVX-NEXT: vmovq %rdi, %xmm1 2161; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2162; AVX-NEXT: retq 2163 %a.cast = bitcast i64 %a to <2 x i32> 2164 %v = shufflevector <2 x i32> %a.cast, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2165 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 2166 ret <4 x i32> %shuffle 2167} 2168 2169define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) { 2170; SSE-LABEL: insert_mem_hi_v4i32: 2171; SSE: # BB#0: 2172; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 2173; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2174; SSE-NEXT: retq 2175; 2176; AVX-LABEL: insert_mem_hi_v4i32: 2177; AVX: # BB#0: 2178; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 2179; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2180; AVX-NEXT: retq 2181 %a = load <2 x i32>, <2 x i32>* %ptr 2182 %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2183 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 2184 ret <4 x i32> %shuffle 2185} 2186 2187define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) { 2188; SSE-LABEL: insert_reg_lo_v4f32: 2189; SSE: # BB#0: 2190; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 2191; SSE-NEXT: movapd %xmm1, %xmm0 2192; SSE-NEXT: retq 2193; 2194; AVX-LABEL: insert_reg_lo_v4f32: 2195; AVX: # BB#0: 2196; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 2197; AVX-NEXT: retq 2198 %a.cast = bitcast double %a to <2 x float> 2199 %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2200 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 2201 ret <4 x float> %shuffle 2202} 2203 2204define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) { 2205; SSE-LABEL: insert_mem_lo_v4f32: 2206; SSE: # BB#0: 2207; SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 2208; SSE-NEXT: retq 2209; 2210; AVX-LABEL: insert_mem_lo_v4f32: 2211; AVX: # BB#0: 2212; AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 2213; AVX-NEXT: retq 2214 %a = load <2 x float>, <2 x float>* %ptr 2215 %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2216 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 2217 ret <4 x float> %shuffle 2218} 2219 2220define <4 x float> @insert_reg_hi_v4f32(double %a, <4 x float> %b) { 2221; SSE-LABEL: insert_reg_hi_v4f32: 2222; SSE: # BB#0: 2223; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 2224; SSE-NEXT: movapd %xmm1, %xmm0 2225; SSE-NEXT: retq 2226; 2227; AVX-LABEL: insert_reg_hi_v4f32: 2228; AVX: # BB#0: 2229; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2230; AVX-NEXT: retq 2231 %a.cast = bitcast double %a to <2 x float> 2232 %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2233 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 2234 ret <4 x float> %shuffle 2235} 2236 2237define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) { 2238; SSE-LABEL: insert_mem_hi_v4f32: 2239; SSE: # BB#0: 2240; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 2241; SSE-NEXT: retq 2242; 2243; AVX-LABEL: insert_mem_hi_v4f32: 2244; AVX: # BB#0: 2245; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 2246; AVX-NEXT: retq 2247 %a = load <2 x float>, <2 x float>* %ptr 2248 %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2249 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 2250 ret <4 x float> %shuffle 2251} 2252 2253define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) { 2254; SSE-LABEL: shuffle_mem_v4f32_3210: 2255; SSE: # BB#0: 2256; SSE-NEXT: movaps (%rdi), %xmm0 2257; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] 2258; SSE-NEXT: retq 2259; 2260; AVX-LABEL: shuffle_mem_v4f32_3210: 2261; AVX: # BB#0: 2262; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0] 2263; AVX-NEXT: retq 2264 %a = load <4 x float>, <4 x float>* %ptr 2265 %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 2266 ret <4 x float> %shuffle 2267} 2268 2269define <4 x i32> @insert_dup_mem_v4i32(i32* %ptr) { 2270; SSE-LABEL: insert_dup_mem_v4i32: 2271; SSE: # BB#0: 2272; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2273; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2274; SSE-NEXT: retq 2275; 2276; AVX-LABEL: insert_dup_mem_v4i32: 2277; AVX: # BB#0: 2278; AVX-NEXT: vbroadcastss (%rdi), %xmm0 2279; AVX-NEXT: retq 2280 %tmp = load i32, i32* %ptr, align 4 2281 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2282 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer 2283 ret <4 x i32> %tmp2 2284} 2285 2286; 2287; Shuffle to logical bit shifts 2288; 2289 2290define <4 x i32> @shuffle_v4i32_z0zX(<4 x i32> %a) { 2291; SSE-LABEL: shuffle_v4i32_z0zX: 2292; SSE: # BB#0: 2293; SSE-NEXT: psllq $32, %xmm0 2294; SSE-NEXT: retq 2295; 2296; AVX-LABEL: shuffle_v4i32_z0zX: 2297; AVX: # BB#0: 2298; AVX-NEXT: vpsllq $32, %xmm0, %xmm0 2299; AVX-NEXT: retq 2300 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 4, i32 undef> 2301 ret <4 x i32> %shuffle 2302} 2303 2304define <4 x i32> @shuffle_v4i32_1z3z(<4 x i32> %a) { 2305; SSE-LABEL: shuffle_v4i32_1z3z: 2306; SSE: # BB#0: 2307; SSE-NEXT: psrlq $32, %xmm0 2308; SSE-NEXT: retq 2309; 2310; AVX-LABEL: shuffle_v4i32_1z3z: 2311; AVX: # BB#0: 2312; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 2313; AVX-NEXT: retq 2314 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 4, i32 3, i32 4> 2315 ret <4 x i32> %shuffle 2316} 2317