1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL 9 10target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 11target triple = "x86_64-unknown-unknown" 12 13define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) { 14; SSE-LABEL: shuffle_v2i64_00: 15; SSE: # BB#0: 16; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 17; SSE-NEXT: retq 18; 19; AVX1-LABEL: shuffle_v2i64_00: 20; AVX1: # BB#0: 21; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 22; AVX1-NEXT: retq 23; 24; AVX2-LABEL: shuffle_v2i64_00: 25; AVX2: # BB#0: 26; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 27; AVX2-NEXT: retq 28; 29; AVX512VL-LABEL: shuffle_v2i64_00: 30; AVX512VL: # BB#0: 31; AVX512VL-NEXT: vpbroadcastq %xmm0, %xmm0 32; AVX512VL-NEXT: retq 33 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0> 34 ret <2 x i64> %shuffle 35} 36define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) { 37; SSE-LABEL: shuffle_v2i64_10: 38; SSE: # BB#0: 39; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 40; SSE-NEXT: retq 41; 42; AVX-LABEL: shuffle_v2i64_10: 43; AVX: # BB#0: 44; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 45; AVX-NEXT: retq 46 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0> 47 ret <2 x i64> %shuffle 48} 49define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) { 50; SSE-LABEL: shuffle_v2i64_11: 51; SSE: # BB#0: 52; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 53; SSE-NEXT: retq 54; 55; AVX-LABEL: shuffle_v2i64_11: 56; AVX: # BB#0: 57; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 58; AVX-NEXT: retq 59 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1> 60 ret <2 x i64> %shuffle 61} 62define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) { 63; SSE-LABEL: shuffle_v2i64_22: 64; SSE: # BB#0: 65; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] 66; SSE-NEXT: retq 67; 68; AVX1-LABEL: shuffle_v2i64_22: 69; AVX1: # BB#0: 70; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] 71; AVX1-NEXT: retq 72; 73; AVX2-LABEL: shuffle_v2i64_22: 74; AVX2: # BB#0: 75; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0 76; AVX2-NEXT: retq 77; 78; AVX512VL-LABEL: shuffle_v2i64_22: 79; AVX512VL: # BB#0: 80; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm0 81; AVX512VL-NEXT: retq 82 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2> 83 ret <2 x i64> %shuffle 84} 85define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) { 86; SSE-LABEL: shuffle_v2i64_32: 87; SSE: # BB#0: 88; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 89; SSE-NEXT: retq 90; 91; AVX-LABEL: shuffle_v2i64_32: 92; AVX: # BB#0: 93; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 94; AVX-NEXT: retq 95 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2> 96 ret <2 x i64> %shuffle 97} 98define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) { 99; SSE-LABEL: shuffle_v2i64_33: 100; SSE: # BB#0: 101; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 102; SSE-NEXT: retq 103; 104; AVX-LABEL: shuffle_v2i64_33: 105; AVX: # BB#0: 106; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 107; AVX-NEXT: retq 108 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3> 109 ret <2 x i64> %shuffle 110} 111 112define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) { 113; SSE2-LABEL: shuffle_v2f64_00: 114; SSE2: # BB#0: 115; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 116; SSE2-NEXT: retq 117; 118; SSE3-LABEL: shuffle_v2f64_00: 119; SSE3: # BB#0: 120; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 121; SSE3-NEXT: retq 122; 123; SSSE3-LABEL: shuffle_v2f64_00: 124; SSSE3: # BB#0: 125; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 126; SSSE3-NEXT: retq 127; 128; SSE41-LABEL: shuffle_v2f64_00: 129; SSE41: # BB#0: 130; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 131; SSE41-NEXT: retq 132; 133; AVX-LABEL: shuffle_v2f64_00: 134; AVX: # BB#0: 135; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 136; AVX-NEXT: retq 137 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0> 138 ret <2 x double> %shuffle 139} 140define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) { 141; SSE-LABEL: shuffle_v2f64_10: 142; SSE: # BB#0: 143; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 144; SSE-NEXT: retq 145; 146; AVX-LABEL: shuffle_v2f64_10: 147; AVX: # BB#0: 148; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 149; AVX-NEXT: retq 150 151 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0> 152 ret <2 x double> %shuffle 153} 154define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) { 155; SSE-LABEL: shuffle_v2f64_11: 156; SSE: # BB#0: 157; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 158; SSE-NEXT: retq 159; 160; AVX-LABEL: shuffle_v2f64_11: 161; AVX: # BB#0: 162; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] 163; AVX-NEXT: retq 164 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1> 165 ret <2 x double> %shuffle 166} 167define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) { 168; SSE2-LABEL: shuffle_v2f64_22: 169; SSE2: # BB#0: 170; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] 171; SSE2-NEXT: movaps %xmm1, %xmm0 172; SSE2-NEXT: retq 173; 174; SSE3-LABEL: shuffle_v2f64_22: 175; SSE3: # BB#0: 176; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 177; SSE3-NEXT: retq 178; 179; SSSE3-LABEL: shuffle_v2f64_22: 180; SSSE3: # BB#0: 181; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 182; SSSE3-NEXT: retq 183; 184; SSE41-LABEL: shuffle_v2f64_22: 185; SSE41: # BB#0: 186; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 187; SSE41-NEXT: retq 188; 189; AVX-LABEL: shuffle_v2f64_22: 190; AVX: # BB#0: 191; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] 192; AVX-NEXT: retq 193 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2> 194 ret <2 x double> %shuffle 195} 196define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) { 197; SSE-LABEL: shuffle_v2f64_32: 198; SSE: # BB#0: 199; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 200; SSE-NEXT: movapd %xmm1, %xmm0 201; SSE-NEXT: retq 202; 203; AVX-LABEL: shuffle_v2f64_32: 204; AVX: # BB#0: 205; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] 206; AVX-NEXT: retq 207 208 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2> 209 ret <2 x double> %shuffle 210} 211define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { 212; SSE-LABEL: shuffle_v2f64_33: 213; SSE: # BB#0: 214; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 215; SSE-NEXT: movaps %xmm1, %xmm0 216; SSE-NEXT: retq 217; 218; AVX-LABEL: shuffle_v2f64_33: 219; AVX: # BB#0: 220; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1] 221; AVX-NEXT: retq 222 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3> 223 ret <2 x double> %shuffle 224} 225define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) { 226; SSE2-LABEL: shuffle_v2f64_03: 227; SSE2: # BB#0: 228; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 229; SSE2-NEXT: movapd %xmm1, %xmm0 230; SSE2-NEXT: retq 231; 232; SSE3-LABEL: shuffle_v2f64_03: 233; SSE3: # BB#0: 234; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 235; SSE3-NEXT: movapd %xmm1, %xmm0 236; SSE3-NEXT: retq 237; 238; SSSE3-LABEL: shuffle_v2f64_03: 239; SSSE3: # BB#0: 240; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 241; SSSE3-NEXT: movapd %xmm1, %xmm0 242; SSSE3-NEXT: retq 243; 244; SSE41-LABEL: shuffle_v2f64_03: 245; SSE41: # BB#0: 246; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 247; SSE41-NEXT: retq 248; 249; AVX-LABEL: shuffle_v2f64_03: 250; AVX: # BB#0: 251; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 252; AVX-NEXT: retq 253 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3> 254 ret <2 x double> %shuffle 255} 256define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) { 257; SSE2-LABEL: shuffle_v2f64_21: 258; SSE2: # BB#0: 259; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 260; SSE2-NEXT: retq 261; 262; SSE3-LABEL: shuffle_v2f64_21: 263; SSE3: # BB#0: 264; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 265; SSE3-NEXT: retq 266; 267; SSSE3-LABEL: shuffle_v2f64_21: 268; SSSE3: # BB#0: 269; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 270; SSSE3-NEXT: retq 271; 272; SSE41-LABEL: shuffle_v2f64_21: 273; SSE41: # BB#0: 274; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 275; SSE41-NEXT: retq 276; 277; AVX-LABEL: shuffle_v2f64_21: 278; AVX: # BB#0: 279; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 280; AVX-NEXT: retq 281 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1> 282 ret <2 x double> %shuffle 283} 284 285 286define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) { 287; SSE-LABEL: shuffle_v2i64_02: 288; SSE: # BB#0: 289; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 290; SSE-NEXT: retq 291; 292; AVX-LABEL: shuffle_v2i64_02: 293; AVX: # BB#0: 294; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 295; AVX-NEXT: retq 296 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 297 ret <2 x i64> %shuffle 298} 299define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 300; SSE-LABEL: shuffle_v2i64_02_copy: 301; SSE: # BB#0: 302; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 303; SSE-NEXT: movdqa %xmm1, %xmm0 304; SSE-NEXT: retq 305; 306; AVX-LABEL: shuffle_v2i64_02_copy: 307; AVX: # BB#0: 308; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0] 309; AVX-NEXT: retq 310 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 311 ret <2 x i64> %shuffle 312} 313define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { 314; SSE2-LABEL: shuffle_v2i64_03: 315; SSE2: # BB#0: 316; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 317; SSE2-NEXT: movapd %xmm1, %xmm0 318; SSE2-NEXT: retq 319; 320; SSE3-LABEL: shuffle_v2i64_03: 321; SSE3: # BB#0: 322; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 323; SSE3-NEXT: movapd %xmm1, %xmm0 324; SSE3-NEXT: retq 325; 326; SSSE3-LABEL: shuffle_v2i64_03: 327; SSSE3: # BB#0: 328; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 329; SSSE3-NEXT: movapd %xmm1, %xmm0 330; SSSE3-NEXT: retq 331; 332; SSE41-LABEL: shuffle_v2i64_03: 333; SSE41: # BB#0: 334; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 335; SSE41-NEXT: retq 336; 337; AVX1-LABEL: shuffle_v2i64_03: 338; AVX1: # BB#0: 339; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 340; AVX1-NEXT: retq 341; 342; AVX2-LABEL: shuffle_v2i64_03: 343; AVX2: # BB#0: 344; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 345; AVX2-NEXT: retq 346; 347; AVX512VL-LABEL: shuffle_v2i64_03: 348; AVX512VL: # BB#0: 349; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 350; AVX512VL-NEXT: retq 351 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 352 ret <2 x i64> %shuffle 353} 354define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 355; SSE2-LABEL: shuffle_v2i64_03_copy: 356; SSE2: # BB#0: 357; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 358; SSE2-NEXT: movapd %xmm2, %xmm0 359; SSE2-NEXT: retq 360; 361; SSE3-LABEL: shuffle_v2i64_03_copy: 362; SSE3: # BB#0: 363; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 364; SSE3-NEXT: movapd %xmm2, %xmm0 365; SSE3-NEXT: retq 366; 367; SSSE3-LABEL: shuffle_v2i64_03_copy: 368; SSSE3: # BB#0: 369; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 370; SSSE3-NEXT: movapd %xmm2, %xmm0 371; SSSE3-NEXT: retq 372; 373; SSE41-LABEL: shuffle_v2i64_03_copy: 374; SSE41: # BB#0: 375; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 376; SSE41-NEXT: movdqa %xmm1, %xmm0 377; SSE41-NEXT: retq 378; 379; AVX1-LABEL: shuffle_v2i64_03_copy: 380; AVX1: # BB#0: 381; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7] 382; AVX1-NEXT: retq 383; 384; AVX2-LABEL: shuffle_v2i64_03_copy: 385; AVX2: # BB#0: 386; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3] 387; AVX2-NEXT: retq 388; 389; AVX512VL-LABEL: shuffle_v2i64_03_copy: 390; AVX512VL: # BB#0: 391; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3] 392; AVX512VL-NEXT: retq 393 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 394 ret <2 x i64> %shuffle 395} 396define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) { 397; SSE2-LABEL: shuffle_v2i64_12: 398; SSE2: # BB#0: 399; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 400; SSE2-NEXT: retq 401; 402; SSE3-LABEL: shuffle_v2i64_12: 403; SSE3: # BB#0: 404; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 405; SSE3-NEXT: retq 406; 407; SSSE3-LABEL: shuffle_v2i64_12: 408; SSSE3: # BB#0: 409; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 410; SSSE3-NEXT: movdqa %xmm1, %xmm0 411; SSSE3-NEXT: retq 412; 413; SSE41-LABEL: shuffle_v2i64_12: 414; SSE41: # BB#0: 415; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 416; SSE41-NEXT: movdqa %xmm1, %xmm0 417; SSE41-NEXT: retq 418; 419; AVX-LABEL: shuffle_v2i64_12: 420; AVX: # BB#0: 421; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 422; AVX-NEXT: retq 423 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 424 ret <2 x i64> %shuffle 425} 426define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 427; SSE2-LABEL: shuffle_v2i64_12_copy: 428; SSE2: # BB#0: 429; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] 430; SSE2-NEXT: movapd %xmm1, %xmm0 431; SSE2-NEXT: retq 432; 433; SSE3-LABEL: shuffle_v2i64_12_copy: 434; SSE3: # BB#0: 435; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] 436; SSE3-NEXT: movapd %xmm1, %xmm0 437; SSE3-NEXT: retq 438; 439; SSSE3-LABEL: shuffle_v2i64_12_copy: 440; SSSE3: # BB#0: 441; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 442; SSSE3-NEXT: movdqa %xmm2, %xmm0 443; SSSE3-NEXT: retq 444; 445; SSE41-LABEL: shuffle_v2i64_12_copy: 446; SSE41: # BB#0: 447; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 448; SSE41-NEXT: movdqa %xmm2, %xmm0 449; SSE41-NEXT: retq 450; 451; AVX-LABEL: shuffle_v2i64_12_copy: 452; AVX: # BB#0: 453; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 454; AVX-NEXT: retq 455 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 456 ret <2 x i64> %shuffle 457} 458define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) { 459; SSE-LABEL: shuffle_v2i64_13: 460; SSE: # BB#0: 461; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 462; SSE-NEXT: retq 463; 464; AVX-LABEL: shuffle_v2i64_13: 465; AVX: # BB#0: 466; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 467; AVX-NEXT: retq 468 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 469 ret <2 x i64> %shuffle 470} 471define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 472; SSE-LABEL: shuffle_v2i64_13_copy: 473; SSE: # BB#0: 474; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1] 475; SSE-NEXT: movdqa %xmm1, %xmm0 476; SSE-NEXT: retq 477; 478; AVX-LABEL: shuffle_v2i64_13_copy: 479; AVX: # BB#0: 480; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1] 481; AVX-NEXT: retq 482 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 483 ret <2 x i64> %shuffle 484} 485define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) { 486; SSE-LABEL: shuffle_v2i64_20: 487; SSE: # BB#0: 488; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 489; SSE-NEXT: movdqa %xmm1, %xmm0 490; SSE-NEXT: retq 491; 492; AVX-LABEL: shuffle_v2i64_20: 493; AVX: # BB#0: 494; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 495; AVX-NEXT: retq 496 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 497 ret <2 x i64> %shuffle 498} 499define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 500; SSE-LABEL: shuffle_v2i64_20_copy: 501; SSE: # BB#0: 502; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 503; SSE-NEXT: movdqa %xmm2, %xmm0 504; SSE-NEXT: retq 505; 506; AVX-LABEL: shuffle_v2i64_20_copy: 507; AVX: # BB#0: 508; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0] 509; AVX-NEXT: retq 510 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 511 ret <2 x i64> %shuffle 512} 513define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) { 514; SSE2-LABEL: shuffle_v2i64_21: 515; SSE2: # BB#0: 516; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 517; SSE2-NEXT: retq 518; 519; SSE3-LABEL: shuffle_v2i64_21: 520; SSE3: # BB#0: 521; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 522; SSE3-NEXT: retq 523; 524; SSSE3-LABEL: shuffle_v2i64_21: 525; SSSE3: # BB#0: 526; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 527; SSSE3-NEXT: retq 528; 529; SSE41-LABEL: shuffle_v2i64_21: 530; SSE41: # BB#0: 531; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 532; SSE41-NEXT: retq 533; 534; AVX1-LABEL: shuffle_v2i64_21: 535; AVX1: # BB#0: 536; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 537; AVX1-NEXT: retq 538; 539; AVX2-LABEL: shuffle_v2i64_21: 540; AVX2: # BB#0: 541; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 542; AVX2-NEXT: retq 543; 544; AVX512VL-LABEL: shuffle_v2i64_21: 545; AVX512VL: # BB#0: 546; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 547; AVX512VL-NEXT: retq 548 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> 549 ret <2 x i64> %shuffle 550} 551define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 552; SSE2-LABEL: shuffle_v2i64_21_copy: 553; SSE2: # BB#0: 554; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 555; SSE2-NEXT: movapd %xmm1, %xmm0 556; SSE2-NEXT: retq 557; 558; SSE3-LABEL: shuffle_v2i64_21_copy: 559; SSE3: # BB#0: 560; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 561; SSE3-NEXT: movapd %xmm1, %xmm0 562; SSE3-NEXT: retq 563; 564; SSSE3-LABEL: shuffle_v2i64_21_copy: 565; SSSE3: # BB#0: 566; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 567; SSSE3-NEXT: movapd %xmm1, %xmm0 568; SSSE3-NEXT: retq 569; 570; SSE41-LABEL: shuffle_v2i64_21_copy: 571; SSE41: # BB#0: 572; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 573; SSE41-NEXT: movdqa %xmm1, %xmm0 574; SSE41-NEXT: retq 575; 576; AVX1-LABEL: shuffle_v2i64_21_copy: 577; AVX1: # BB#0: 578; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7] 579; AVX1-NEXT: retq 580; 581; AVX2-LABEL: shuffle_v2i64_21_copy: 582; AVX2: # BB#0: 583; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3] 584; AVX2-NEXT: retq 585; 586; AVX512VL-LABEL: shuffle_v2i64_21_copy: 587; AVX512VL: # BB#0: 588; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3] 589; AVX512VL-NEXT: retq 590 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> 591 ret <2 x i64> %shuffle 592} 593define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) { 594; SSE2-LABEL: shuffle_v2i64_30: 595; SSE2: # BB#0: 596; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 597; SSE2-NEXT: movapd %xmm1, %xmm0 598; SSE2-NEXT: retq 599; 600; SSE3-LABEL: shuffle_v2i64_30: 601; SSE3: # BB#0: 602; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 603; SSE3-NEXT: movapd %xmm1, %xmm0 604; SSE3-NEXT: retq 605; 606; SSSE3-LABEL: shuffle_v2i64_30: 607; SSSE3: # BB#0: 608; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 609; SSSE3-NEXT: retq 610; 611; SSE41-LABEL: shuffle_v2i64_30: 612; SSE41: # BB#0: 613; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 614; SSE41-NEXT: retq 615; 616; AVX-LABEL: shuffle_v2i64_30: 617; AVX: # BB#0: 618; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 619; AVX-NEXT: retq 620 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0> 621 ret <2 x i64> %shuffle 622} 623define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 624; SSE2-LABEL: shuffle_v2i64_30_copy: 625; SSE2: # BB#0: 626; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] 627; SSE2-NEXT: movapd %xmm2, %xmm0 628; SSE2-NEXT: retq 629; 630; SSE3-LABEL: shuffle_v2i64_30_copy: 631; SSE3: # BB#0: 632; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] 633; SSE3-NEXT: movapd %xmm2, %xmm0 634; SSE3-NEXT: retq 635; 636; SSSE3-LABEL: shuffle_v2i64_30_copy: 637; SSSE3: # BB#0: 638; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 639; SSSE3-NEXT: movdqa %xmm1, %xmm0 640; SSSE3-NEXT: retq 641; 642; SSE41-LABEL: shuffle_v2i64_30_copy: 643; SSE41: # BB#0: 644; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 645; SSE41-NEXT: movdqa %xmm1, %xmm0 646; SSE41-NEXT: retq 647; 648; AVX-LABEL: shuffle_v2i64_30_copy: 649; AVX: # BB#0: 650; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 651; AVX-NEXT: retq 652 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0> 653 ret <2 x i64> %shuffle 654} 655define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) { 656; SSE-LABEL: shuffle_v2i64_31: 657; SSE: # BB#0: 658; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] 659; SSE-NEXT: movdqa %xmm1, %xmm0 660; SSE-NEXT: retq 661; 662; AVX-LABEL: shuffle_v2i64_31: 663; AVX: # BB#0: 664; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] 665; AVX-NEXT: retq 666 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1> 667 ret <2 x i64> %shuffle 668} 669define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 670; SSE-LABEL: shuffle_v2i64_31_copy: 671; SSE: # BB#0: 672; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 673; SSE-NEXT: movdqa %xmm2, %xmm0 674; SSE-NEXT: retq 675; 676; AVX-LABEL: shuffle_v2i64_31_copy: 677; AVX: # BB#0: 678; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1] 679; AVX-NEXT: retq 680 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1> 681 ret <2 x i64> %shuffle 682} 683 684define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) { 685; SSE-LABEL: shuffle_v2i64_0z: 686; SSE: # BB#0: 687; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 688; SSE-NEXT: retq 689; 690; AVX-LABEL: shuffle_v2i64_0z: 691; AVX: # BB#0: 692; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 693; AVX-NEXT: retq 694 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 695 ret <2 x i64> %shuffle 696} 697 698define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) { 699; SSE-LABEL: shuffle_v2i64_1z: 700; SSE: # BB#0: 701; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 702; SSE-NEXT: retq 703; 704; AVX-LABEL: shuffle_v2i64_1z: 705; AVX: # BB#0: 706; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 707; AVX-NEXT: retq 708 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3> 709 ret <2 x i64> %shuffle 710} 711 712define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) { 713; SSE-LABEL: shuffle_v2i64_z0: 714; SSE: # BB#0: 715; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 716; SSE-NEXT: retq 717; 718; AVX-LABEL: shuffle_v2i64_z0: 719; AVX: # BB#0: 720; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 721; AVX-NEXT: retq 722 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0> 723 ret <2 x i64> %shuffle 724} 725 726define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) { 727; SSE2-LABEL: shuffle_v2i64_z1: 728; SSE2: # BB#0: 729; SSE2-NEXT: xorpd %xmm1, %xmm1 730; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 731; SSE2-NEXT: retq 732; 733; SSE3-LABEL: shuffle_v2i64_z1: 734; SSE3: # BB#0: 735; SSE3-NEXT: xorpd %xmm1, %xmm1 736; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 737; SSE3-NEXT: retq 738; 739; SSSE3-LABEL: shuffle_v2i64_z1: 740; SSSE3: # BB#0: 741; SSSE3-NEXT: xorpd %xmm1, %xmm1 742; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 743; SSSE3-NEXT: retq 744; 745; SSE41-LABEL: shuffle_v2i64_z1: 746; SSE41: # BB#0: 747; SSE41-NEXT: pxor %xmm1, %xmm1 748; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 749; SSE41-NEXT: retq 750; 751; AVX1-LABEL: shuffle_v2i64_z1: 752; AVX1: # BB#0: 753; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 754; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 755; AVX1-NEXT: retq 756; 757; AVX2-LABEL: shuffle_v2i64_z1: 758; AVX2: # BB#0: 759; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 760; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 761; AVX2-NEXT: retq 762; 763; AVX512VL-LABEL: shuffle_v2i64_z1: 764; AVX512VL: # BB#0: 765; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 766; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 767; AVX512VL-NEXT: retq 768 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1> 769 ret <2 x i64> %shuffle 770} 771 772define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) { 773; SSE-LABEL: shuffle_v2f64_0z: 774; SSE: # BB#0: 775; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 776; SSE-NEXT: retq 777; 778; AVX-LABEL: shuffle_v2f64_0z: 779; AVX: # BB#0: 780; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 781; AVX-NEXT: retq 782 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 783 ret <2 x double> %shuffle 784} 785 786define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) { 787; SSE-LABEL: shuffle_v2f64_1z: 788; SSE: # BB#0: 789; SSE-NEXT: xorpd %xmm1, %xmm1 790; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 791; SSE-NEXT: retq 792; 793; AVX1-LABEL: shuffle_v2f64_1z: 794; AVX1: # BB#0: 795; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 796; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 797; AVX1-NEXT: retq 798; 799; AVX2-LABEL: shuffle_v2f64_1z: 800; AVX2: # BB#0: 801; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 802; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 803; AVX2-NEXT: retq 804; 805; AVX512VL-LABEL: shuffle_v2f64_1z: 806; AVX512VL: # BB#0: 807; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 808; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 809; AVX512VL-NEXT: retq 810 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3> 811 ret <2 x double> %shuffle 812} 813 814define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) { 815; SSE-LABEL: shuffle_v2f64_z0: 816; SSE: # BB#0: 817; SSE-NEXT: xorpd %xmm1, %xmm1 818; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 819; SSE-NEXT: movapd %xmm1, %xmm0 820; SSE-NEXT: retq 821; 822; AVX1-LABEL: shuffle_v2f64_z0: 823; AVX1: # BB#0: 824; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 825; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 826; AVX1-NEXT: retq 827; 828; AVX2-LABEL: shuffle_v2f64_z0: 829; AVX2: # BB#0: 830; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 831; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 832; AVX2-NEXT: retq 833; 834; AVX512VL-LABEL: shuffle_v2f64_z0: 835; AVX512VL: # BB#0: 836; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 837; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 838; AVX512VL-NEXT: retq 839 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0> 840 ret <2 x double> %shuffle 841} 842 843define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) { 844; SSE2-LABEL: shuffle_v2f64_z1: 845; SSE2: # BB#0: 846; SSE2-NEXT: xorpd %xmm1, %xmm1 847; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 848; SSE2-NEXT: retq 849; 850; SSE3-LABEL: shuffle_v2f64_z1: 851; SSE3: # BB#0: 852; SSE3-NEXT: xorpd %xmm1, %xmm1 853; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 854; SSE3-NEXT: retq 855; 856; SSSE3-LABEL: shuffle_v2f64_z1: 857; SSSE3: # BB#0: 858; SSSE3-NEXT: xorpd %xmm1, %xmm1 859; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 860; SSSE3-NEXT: retq 861; 862; SSE41-LABEL: shuffle_v2f64_z1: 863; SSE41: # BB#0: 864; SSE41-NEXT: xorpd %xmm1, %xmm1 865; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 866; SSE41-NEXT: retq 867; 868; AVX-LABEL: shuffle_v2f64_z1: 869; AVX: # BB#0: 870; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 871; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 872; AVX-NEXT: retq 873 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 874 ret <2 x double> %shuffle 875} 876 877define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) { 878; SSE-LABEL: shuffle_v2f64_bitcast_1z: 879; SSE: # BB#0: 880; SSE-NEXT: xorpd %xmm1, %xmm1 881; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 882; SSE-NEXT: retq 883; 884; AVX1-LABEL: shuffle_v2f64_bitcast_1z: 885; AVX1: # BB#0: 886; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 887; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 888; AVX1-NEXT: retq 889; 890; AVX2-LABEL: shuffle_v2f64_bitcast_1z: 891; AVX2: # BB#0: 892; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 893; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 894; AVX2-NEXT: retq 895; 896; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z: 897; AVX512VL: # BB#0: 898; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 899; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 900; AVX512VL-NEXT: retq 901 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 902 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float> 903 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 904 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double> 905 ret <2 x double> %bitcast64 906} 907 908define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) { 909; SSE2-LABEL: shuffle_v2i64_bitcast_z123: 910; SSE2: # BB#0: 911; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 912; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 913; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 914; SSE2-NEXT: retq 915; 916; SSE3-LABEL: shuffle_v2i64_bitcast_z123: 917; SSE3: # BB#0: 918; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 919; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 920; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 921; SSE3-NEXT: retq 922; 923; SSSE3-LABEL: shuffle_v2i64_bitcast_z123: 924; SSSE3: # BB#0: 925; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 926; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 927; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 928; SSSE3-NEXT: retq 929; 930; SSE41-LABEL: shuffle_v2i64_bitcast_z123: 931; SSE41: # BB#0: 932; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 933; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 934; SSE41-NEXT: xorps %xmm1, %xmm1 935; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 936; SSE41-NEXT: retq 937; 938; AVX1-LABEL: shuffle_v2i64_bitcast_z123: 939; AVX1: # BB#0: 940; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 941; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 942; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 943; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 944; AVX1-NEXT: retq 945; 946; AVX2-LABEL: shuffle_v2i64_bitcast_z123: 947; AVX2: # BB#0: 948; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 949; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 950; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 951; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 952; AVX2-NEXT: retq 953; 954; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123: 955; AVX512VL: # BB#0: 956; AVX512VL-NEXT: vmovss {{.*}}(%rip), %xmm1 957; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 958; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 959; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 960; AVX512VL-NEXT: retq 961 %bitcast32 = bitcast <2 x i64> %x to <4 x float> 962 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 963 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64> 964 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1> 965 ret <2 x i64> %and 966} 967 968define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) { 969; SSE-LABEL: insert_reg_and_zero_v2i64: 970; SSE: # BB#0: 971; SSE-NEXT: movd %rdi, %xmm0 972; SSE-NEXT: retq 973; 974; AVX-LABEL: insert_reg_and_zero_v2i64: 975; AVX: # BB#0: 976; AVX-NEXT: vmovq %rdi, %xmm0 977; AVX-NEXT: retq 978 %v = insertelement <2 x i64> undef, i64 %a, i32 0 979 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 980 ret <2 x i64> %shuffle 981} 982 983define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) { 984; SSE-LABEL: insert_mem_and_zero_v2i64: 985; SSE: # BB#0: 986; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 987; SSE-NEXT: retq 988; 989; AVX1-LABEL: insert_mem_and_zero_v2i64: 990; AVX1: # BB#0: 991; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 992; AVX1-NEXT: retq 993; 994; AVX2-LABEL: insert_mem_and_zero_v2i64: 995; AVX2: # BB#0: 996; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 997; AVX2-NEXT: retq 998; 999; AVX512VL-LABEL: insert_mem_and_zero_v2i64: 1000; AVX512VL: # BB#0: 1001; AVX512VL-NEXT: vmovq (%rdi), %xmm0 1002; AVX512VL-NEXT: retq 1003 %a = load i64, i64* %ptr 1004 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1005 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 1006 ret <2 x i64> %shuffle 1007} 1008 1009define <2 x double> @insert_reg_and_zero_v2f64(double %a) { 1010; SSE-LABEL: insert_reg_and_zero_v2f64: 1011; SSE: # BB#0: 1012; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1013; SSE-NEXT: retq 1014; 1015; AVX-LABEL: insert_reg_and_zero_v2f64: 1016; AVX: # BB#0: 1017; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1018; AVX-NEXT: retq 1019 %v = insertelement <2 x double> undef, double %a, i32 0 1020 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 1021 ret <2 x double> %shuffle 1022} 1023 1024define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) { 1025; SSE-LABEL: insert_mem_and_zero_v2f64: 1026; SSE: # BB#0: 1027; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1028; SSE-NEXT: retq 1029; 1030; AVX1-LABEL: insert_mem_and_zero_v2f64: 1031; AVX1: # BB#0: 1032; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1033; AVX1-NEXT: retq 1034; 1035; AVX2-LABEL: insert_mem_and_zero_v2f64: 1036; AVX2: # BB#0: 1037; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1038; AVX2-NEXT: retq 1039; 1040; AVX512VL-LABEL: insert_mem_and_zero_v2f64: 1041; AVX512VL: # BB#0: 1042; AVX512VL-NEXT: vmovsd (%rdi), %xmm0 1043; AVX512VL-NEXT: retq 1044 %a = load double, double* %ptr 1045 %v = insertelement <2 x double> undef, double %a, i32 0 1046 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 1047 ret <2 x double> %shuffle 1048} 1049 1050define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) { 1051; SSE2-LABEL: insert_reg_lo_v2i64: 1052; SSE2: # BB#0: 1053; SSE2-NEXT: movd %rdi, %xmm1 1054; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1055; SSE2-NEXT: retq 1056; 1057; SSE3-LABEL: insert_reg_lo_v2i64: 1058; SSE3: # BB#0: 1059; SSE3-NEXT: movd %rdi, %xmm1 1060; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1061; SSE3-NEXT: retq 1062; 1063; SSSE3-LABEL: insert_reg_lo_v2i64: 1064; SSSE3: # BB#0: 1065; SSSE3-NEXT: movd %rdi, %xmm1 1066; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1067; SSSE3-NEXT: retq 1068; 1069; SSE41-LABEL: insert_reg_lo_v2i64: 1070; SSE41: # BB#0: 1071; SSE41-NEXT: movd %rdi, %xmm1 1072; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1073; SSE41-NEXT: retq 1074; 1075; AVX1-LABEL: insert_reg_lo_v2i64: 1076; AVX1: # BB#0: 1077; AVX1-NEXT: vmovq %rdi, %xmm1 1078; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1079; AVX1-NEXT: retq 1080; 1081; AVX2-LABEL: insert_reg_lo_v2i64: 1082; AVX2: # BB#0: 1083; AVX2-NEXT: vmovq %rdi, %xmm1 1084; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1085; AVX2-NEXT: retq 1086; 1087; AVX512VL-LABEL: insert_reg_lo_v2i64: 1088; AVX512VL: # BB#0: 1089; AVX512VL-NEXT: vmovq %rdi, %xmm1 1090; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1091; AVX512VL-NEXT: retq 1092 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1093 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 1094 ret <2 x i64> %shuffle 1095} 1096 1097define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) { 1098; SSE2-LABEL: insert_mem_lo_v2i64: 1099; SSE2: # BB#0: 1100; SSE2-NEXT: movlpd (%rdi), %xmm0 1101; SSE2-NEXT: retq 1102; 1103; SSE3-LABEL: insert_mem_lo_v2i64: 1104; SSE3: # BB#0: 1105; SSE3-NEXT: movlpd (%rdi), %xmm0 1106; SSE3-NEXT: retq 1107; 1108; SSSE3-LABEL: insert_mem_lo_v2i64: 1109; SSSE3: # BB#0: 1110; SSSE3-NEXT: movlpd (%rdi), %xmm0 1111; SSSE3-NEXT: retq 1112; 1113; SSE41-LABEL: insert_mem_lo_v2i64: 1114; SSE41: # BB#0: 1115; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1116; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1117; SSE41-NEXT: retq 1118; 1119; AVX1-LABEL: insert_mem_lo_v2i64: 1120; AVX1: # BB#0: 1121; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1122; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1123; AVX1-NEXT: retq 1124; 1125; AVX2-LABEL: insert_mem_lo_v2i64: 1126; AVX2: # BB#0: 1127; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1128; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1129; AVX2-NEXT: retq 1130; 1131; AVX512VL-LABEL: insert_mem_lo_v2i64: 1132; AVX512VL: # BB#0: 1133; AVX512VL-NEXT: vmovq (%rdi), %xmm1 1134; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1135; AVX512VL-NEXT: retq 1136 %a = load i64, i64* %ptr 1137 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1138 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 1139 ret <2 x i64> %shuffle 1140} 1141 1142define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) { 1143; SSE-LABEL: insert_reg_hi_v2i64: 1144; SSE: # BB#0: 1145; SSE-NEXT: movd %rdi, %xmm1 1146; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1147; SSE-NEXT: retq 1148; 1149; AVX-LABEL: insert_reg_hi_v2i64: 1150; AVX: # BB#0: 1151; AVX-NEXT: vmovq %rdi, %xmm1 1152; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1153; AVX-NEXT: retq 1154 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1155 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 1156 ret <2 x i64> %shuffle 1157} 1158 1159define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) { 1160; SSE-LABEL: insert_mem_hi_v2i64: 1161; SSE: # BB#0: 1162; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1163; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1164; SSE-NEXT: retq 1165; 1166; AVX1-LABEL: insert_mem_hi_v2i64: 1167; AVX1: # BB#0: 1168; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1169; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1170; AVX1-NEXT: retq 1171; 1172; AVX2-LABEL: insert_mem_hi_v2i64: 1173; AVX2: # BB#0: 1174; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1175; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1176; AVX2-NEXT: retq 1177; 1178; AVX512VL-LABEL: insert_mem_hi_v2i64: 1179; AVX512VL: # BB#0: 1180; AVX512VL-NEXT: vmovq (%rdi), %xmm1 1181; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1182; AVX512VL-NEXT: retq 1183 %a = load i64, i64* %ptr 1184 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1185 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 1186 ret <2 x i64> %shuffle 1187} 1188 1189define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) { 1190; SSE-LABEL: insert_reg_lo_v2f64: 1191; SSE: # BB#0: 1192; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1193; SSE-NEXT: movapd %xmm1, %xmm0 1194; SSE-NEXT: retq 1195; 1196; AVX1-LABEL: insert_reg_lo_v2f64: 1197; AVX1: # BB#0: 1198; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1199; AVX1-NEXT: retq 1200; 1201; AVX2-LABEL: insert_reg_lo_v2f64: 1202; AVX2: # BB#0: 1203; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1204; AVX2-NEXT: retq 1205; 1206; AVX512VL-LABEL: insert_reg_lo_v2f64: 1207; AVX512VL: # BB#0: 1208; AVX512VL-NEXT: vmovsd %xmm0, %xmm1, %xmm0 1209; AVX512VL-NEXT: retq 1210 %v = insertelement <2 x double> undef, double %a, i32 0 1211 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 1212 ret <2 x double> %shuffle 1213} 1214 1215define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) { 1216; SSE-LABEL: insert_mem_lo_v2f64: 1217; SSE: # BB#0: 1218; SSE-NEXT: movlpd (%rdi), %xmm0 1219; SSE-NEXT: retq 1220; 1221; AVX-LABEL: insert_mem_lo_v2f64: 1222; AVX: # BB#0: 1223; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 1224; AVX-NEXT: retq 1225 %a = load double, double* %ptr 1226 %v = insertelement <2 x double> undef, double %a, i32 0 1227 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 1228 ret <2 x double> %shuffle 1229} 1230 1231define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) { 1232; SSE-LABEL: insert_reg_hi_v2f64: 1233; SSE: # BB#0: 1234; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1235; SSE-NEXT: movapd %xmm1, %xmm0 1236; SSE-NEXT: retq 1237; 1238; AVX-LABEL: insert_reg_hi_v2f64: 1239; AVX: # BB#0: 1240; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1241; AVX-NEXT: retq 1242 %v = insertelement <2 x double> undef, double %a, i32 0 1243 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 1244 ret <2 x double> %shuffle 1245} 1246 1247define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) { 1248; SSE-LABEL: insert_mem_hi_v2f64: 1249; SSE: # BB#0: 1250; SSE-NEXT: movhpd (%rdi), %xmm0 1251; SSE-NEXT: retq 1252; 1253; AVX-LABEL: insert_mem_hi_v2f64: 1254; AVX: # BB#0: 1255; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 1256; AVX-NEXT: retq 1257 %a = load double, double* %ptr 1258 %v = insertelement <2 x double> undef, double %a, i32 0 1259 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 1260 ret <2 x double> %shuffle 1261} 1262 1263define <2 x double> @insert_dup_reg_v2f64(double %a) { 1264; SSE2-LABEL: insert_dup_reg_v2f64: 1265; SSE2: # BB#0: 1266; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1267; SSE2-NEXT: retq 1268; 1269; SSE3-LABEL: insert_dup_reg_v2f64: 1270; SSE3: # BB#0: 1271; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1272; SSE3-NEXT: retq 1273; 1274; SSSE3-LABEL: insert_dup_reg_v2f64: 1275; SSSE3: # BB#0: 1276; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1277; SSSE3-NEXT: retq 1278; 1279; SSE41-LABEL: insert_dup_reg_v2f64: 1280; SSE41: # BB#0: 1281; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1282; SSE41-NEXT: retq 1283; 1284; AVX-LABEL: insert_dup_reg_v2f64: 1285; AVX: # BB#0: 1286; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1287; AVX-NEXT: retq 1288 %v = insertelement <2 x double> undef, double %a, i32 0 1289 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1290 ret <2 x double> %shuffle 1291} 1292 1293define <2 x double> @insert_dup_mem_v2f64(double* %ptr) { 1294; SSE2-LABEL: insert_dup_mem_v2f64: 1295; SSE2: # BB#0: 1296; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1297; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1298; SSE2-NEXT: retq 1299; 1300; SSE3-LABEL: insert_dup_mem_v2f64: 1301; SSE3: # BB#0: 1302; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1303; SSE3-NEXT: retq 1304; 1305; SSSE3-LABEL: insert_dup_mem_v2f64: 1306; SSSE3: # BB#0: 1307; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1308; SSSE3-NEXT: retq 1309; 1310; SSE41-LABEL: insert_dup_mem_v2f64: 1311; SSE41: # BB#0: 1312; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1313; SSE41-NEXT: retq 1314; 1315; AVX-LABEL: insert_dup_mem_v2f64: 1316; AVX: # BB#0: 1317; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1318; AVX-NEXT: retq 1319 %a = load double, double* %ptr 1320 %v = insertelement <2 x double> undef, double %a, i32 0 1321 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1322 ret <2 x double> %shuffle 1323} 1324 1325define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind { 1326; SSE2-LABEL: insert_dup_mem128_v2f64: 1327; SSE2: # BB#0: 1328; SSE2-NEXT: movaps (%rdi), %xmm0 1329; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1330; SSE2-NEXT: retq 1331; 1332; SSE3-LABEL: insert_dup_mem128_v2f64: 1333; SSE3: # BB#0: 1334; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1335; SSE3-NEXT: retq 1336; 1337; SSSE3-LABEL: insert_dup_mem128_v2f64: 1338; SSSE3: # BB#0: 1339; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1340; SSSE3-NEXT: retq 1341; 1342; SSE41-LABEL: insert_dup_mem128_v2f64: 1343; SSE41: # BB#0: 1344; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1345; SSE41-NEXT: retq 1346; 1347; AVX-LABEL: insert_dup_mem128_v2f64: 1348; AVX: # BB#0: 1349; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1350; AVX-NEXT: retq 1351 %v = load <2 x double>, <2 x double>* %ptr 1352 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1353 ret <2 x double> %shuffle 1354} 1355 1356 1357define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) { 1358; SSE-LABEL: insert_dup_mem_v2i64: 1359; SSE: # BB#0: 1360; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1361; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1362; SSE-NEXT: retq 1363; 1364; AVX1-LABEL: insert_dup_mem_v2i64: 1365; AVX1: # BB#0: 1366; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 1367; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1368; AVX1-NEXT: retq 1369; 1370; AVX2-LABEL: insert_dup_mem_v2i64: 1371; AVX2: # BB#0: 1372; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0 1373; AVX2-NEXT: retq 1374; 1375; AVX512VL-LABEL: insert_dup_mem_v2i64: 1376; AVX512VL: # BB#0: 1377; AVX512VL-NEXT: vpbroadcastq (%rdi), %xmm0 1378; AVX512VL-NEXT: retq 1379 %tmp = load i64, i64* %ptr, align 1 1380 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0 1381 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer 1382 ret <2 x i64> %tmp2 1383} 1384 1385define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) { 1386; SSE-LABEL: shuffle_mem_v2f64_10: 1387; SSE: # BB#0: 1388; SSE-NEXT: movapd (%rdi), %xmm0 1389; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 1390; SSE-NEXT: retq 1391; 1392; AVX-LABEL: shuffle_mem_v2f64_10: 1393; AVX: # BB#0: 1394; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0] 1395; AVX-NEXT: retq 1396 1397 %a = load <2 x double>, <2 x double>* %ptr 1398 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0> 1399 ret <2 x double> %shuffle 1400} 1401