1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL 9 10target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 11target triple = "x86_64-unknown-unknown" 12 13define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) { 14; SSE-LABEL: shuffle_v2i64_00: 15; SSE: # BB#0: 16; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 17; SSE-NEXT: retq 18; 19; AVX1-LABEL: shuffle_v2i64_00: 20; AVX1: # BB#0: 21; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 22; AVX1-NEXT: retq 23; 24; AVX2-LABEL: shuffle_v2i64_00: 25; AVX2: # BB#0: 26; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 27; AVX2-NEXT: retq 28; 29; AVX512VL-LABEL: shuffle_v2i64_00: 30; AVX512VL: # BB#0: 31; AVX512VL-NEXT: vpbroadcastq %xmm0, %xmm0 32; AVX512VL-NEXT: retq 33 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0> 34 ret <2 x i64> %shuffle 35} 36define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) { 37; SSE-LABEL: shuffle_v2i64_10: 38; SSE: # BB#0: 39; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 40; SSE-NEXT: retq 41; 42; AVX-LABEL: shuffle_v2i64_10: 43; AVX: # BB#0: 44; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 45; AVX-NEXT: retq 46 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0> 47 ret <2 x i64> %shuffle 48} 49define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) { 50; SSE-LABEL: shuffle_v2i64_11: 51; SSE: # BB#0: 52; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 53; SSE-NEXT: retq 54; 55; AVX-LABEL: shuffle_v2i64_11: 56; AVX: # BB#0: 57; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 58; AVX-NEXT: retq 59 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1> 60 ret <2 x i64> %shuffle 61} 62define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) { 63; SSE-LABEL: shuffle_v2i64_22: 64; SSE: # BB#0: 65; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] 66; SSE-NEXT: retq 67; 68; AVX1-LABEL: shuffle_v2i64_22: 69; AVX1: # BB#0: 70; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] 71; AVX1-NEXT: retq 72; 73; AVX2-LABEL: shuffle_v2i64_22: 74; AVX2: # BB#0: 75; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0 76; AVX2-NEXT: retq 77; 78; AVX512VL-LABEL: shuffle_v2i64_22: 79; AVX512VL: # BB#0: 80; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm0 81; AVX512VL-NEXT: retq 82 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2> 83 ret <2 x i64> %shuffle 84} 85define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) { 86; SSE-LABEL: shuffle_v2i64_32: 87; SSE: # BB#0: 88; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 89; SSE-NEXT: retq 90; 91; AVX-LABEL: shuffle_v2i64_32: 92; AVX: # BB#0: 93; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 94; AVX-NEXT: retq 95 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2> 96 ret <2 x i64> %shuffle 97} 98define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) { 99; SSE-LABEL: shuffle_v2i64_33: 100; SSE: # BB#0: 101; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 102; SSE-NEXT: retq 103; 104; AVX-LABEL: shuffle_v2i64_33: 105; AVX: # BB#0: 106; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 107; AVX-NEXT: retq 108 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3> 109 ret <2 x i64> %shuffle 110} 111 112define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) { 113; SSE2-LABEL: shuffle_v2f64_00: 114; SSE2: # BB#0: 115; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 116; SSE2-NEXT: retq 117; 118; SSE3-LABEL: shuffle_v2f64_00: 119; SSE3: # BB#0: 120; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 121; SSE3-NEXT: retq 122; 123; SSSE3-LABEL: shuffle_v2f64_00: 124; SSSE3: # BB#0: 125; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 126; SSSE3-NEXT: retq 127; 128; SSE41-LABEL: shuffle_v2f64_00: 129; SSE41: # BB#0: 130; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 131; SSE41-NEXT: retq 132; 133; AVX-LABEL: shuffle_v2f64_00: 134; AVX: # BB#0: 135; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 136; AVX-NEXT: retq 137 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0> 138 ret <2 x double> %shuffle 139} 140define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) { 141; SSE-LABEL: shuffle_v2f64_10: 142; SSE: # BB#0: 143; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 144; SSE-NEXT: retq 145; 146; AVX-LABEL: shuffle_v2f64_10: 147; AVX: # BB#0: 148; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 149; AVX-NEXT: retq 150 151 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0> 152 ret <2 x double> %shuffle 153} 154define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) { 155; SSE-LABEL: shuffle_v2f64_11: 156; SSE: # BB#0: 157; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 158; SSE-NEXT: retq 159; 160; AVX-LABEL: shuffle_v2f64_11: 161; AVX: # BB#0: 162; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1] 163; AVX-NEXT: retq 164 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1> 165 ret <2 x double> %shuffle 166} 167define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) { 168; SSE2-LABEL: shuffle_v2f64_22: 169; SSE2: # BB#0: 170; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] 171; SSE2-NEXT: movaps %xmm1, %xmm0 172; SSE2-NEXT: retq 173; 174; SSE3-LABEL: shuffle_v2f64_22: 175; SSE3: # BB#0: 176; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 177; SSE3-NEXT: retq 178; 179; SSSE3-LABEL: shuffle_v2f64_22: 180; SSSE3: # BB#0: 181; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 182; SSSE3-NEXT: retq 183; 184; SSE41-LABEL: shuffle_v2f64_22: 185; SSE41: # BB#0: 186; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 187; SSE41-NEXT: retq 188; 189; AVX-LABEL: shuffle_v2f64_22: 190; AVX: # BB#0: 191; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] 192; AVX-NEXT: retq 193 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2> 194 ret <2 x double> %shuffle 195} 196define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) { 197; SSE-LABEL: shuffle_v2f64_32: 198; SSE: # BB#0: 199; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 200; SSE-NEXT: movapd %xmm1, %xmm0 201; SSE-NEXT: retq 202; 203; AVX-LABEL: shuffle_v2f64_32: 204; AVX: # BB#0: 205; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] 206; AVX-NEXT: retq 207 208 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2> 209 ret <2 x double> %shuffle 210} 211define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { 212; SSE-LABEL: shuffle_v2f64_33: 213; SSE: # BB#0: 214; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 215; SSE-NEXT: movaps %xmm1, %xmm0 216; SSE-NEXT: retq 217; 218; AVX-LABEL: shuffle_v2f64_33: 219; AVX: # BB#0: 220; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,1] 221; AVX-NEXT: retq 222 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3> 223 ret <2 x double> %shuffle 224} 225define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) { 226; SSE2-LABEL: shuffle_v2f64_03: 227; SSE2: # BB#0: 228; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 229; SSE2-NEXT: movapd %xmm1, %xmm0 230; SSE2-NEXT: retq 231; 232; SSE3-LABEL: shuffle_v2f64_03: 233; SSE3: # BB#0: 234; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 235; SSE3-NEXT: movapd %xmm1, %xmm0 236; SSE3-NEXT: retq 237; 238; SSSE3-LABEL: shuffle_v2f64_03: 239; SSSE3: # BB#0: 240; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 241; SSSE3-NEXT: movapd %xmm1, %xmm0 242; SSSE3-NEXT: retq 243; 244; SSE41-LABEL: shuffle_v2f64_03: 245; SSE41: # BB#0: 246; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 247; SSE41-NEXT: retq 248; 249; AVX-LABEL: shuffle_v2f64_03: 250; AVX: # BB#0: 251; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 252; AVX-NEXT: retq 253 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3> 254 ret <2 x double> %shuffle 255} 256define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) { 257; SSE2-LABEL: shuffle_v2f64_21: 258; SSE2: # BB#0: 259; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 260; SSE2-NEXT: retq 261; 262; SSE3-LABEL: shuffle_v2f64_21: 263; SSE3: # BB#0: 264; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 265; SSE3-NEXT: retq 266; 267; SSSE3-LABEL: shuffle_v2f64_21: 268; SSSE3: # BB#0: 269; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 270; SSSE3-NEXT: retq 271; 272; SSE41-LABEL: shuffle_v2f64_21: 273; SSE41: # BB#0: 274; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 275; SSE41-NEXT: retq 276; 277; AVX-LABEL: shuffle_v2f64_21: 278; AVX: # BB#0: 279; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 280; AVX-NEXT: retq 281 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1> 282 ret <2 x double> %shuffle 283} 284 285 286define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) { 287; SSE-LABEL: shuffle_v2i64_02: 288; SSE: # BB#0: 289; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 290; SSE-NEXT: retq 291; 292; AVX-LABEL: shuffle_v2i64_02: 293; AVX: # BB#0: 294; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 295; AVX-NEXT: retq 296 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 297 ret <2 x i64> %shuffle 298} 299define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 300; SSE-LABEL: shuffle_v2i64_02_copy: 301; SSE: # BB#0: 302; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 303; SSE-NEXT: movdqa %xmm1, %xmm0 304; SSE-NEXT: retq 305; 306; AVX-LABEL: shuffle_v2i64_02_copy: 307; AVX: # BB#0: 308; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0] 309; AVX-NEXT: retq 310 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 311 ret <2 x i64> %shuffle 312} 313define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { 314; SSE2-LABEL: shuffle_v2i64_03: 315; SSE2: # BB#0: 316; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 317; SSE2-NEXT: movapd %xmm1, %xmm0 318; SSE2-NEXT: retq 319; 320; SSE3-LABEL: shuffle_v2i64_03: 321; SSE3: # BB#0: 322; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 323; SSE3-NEXT: movapd %xmm1, %xmm0 324; SSE3-NEXT: retq 325; 326; SSSE3-LABEL: shuffle_v2i64_03: 327; SSSE3: # BB#0: 328; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 329; SSSE3-NEXT: movapd %xmm1, %xmm0 330; SSSE3-NEXT: retq 331; 332; SSE41-LABEL: shuffle_v2i64_03: 333; SSE41: # BB#0: 334; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 335; SSE41-NEXT: retq 336; 337; AVX1-LABEL: shuffle_v2i64_03: 338; AVX1: # BB#0: 339; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 340; AVX1-NEXT: retq 341; 342; AVX2-LABEL: shuffle_v2i64_03: 343; AVX2: # BB#0: 344; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 345; AVX2-NEXT: retq 346; 347; AVX512VL-LABEL: shuffle_v2i64_03: 348; AVX512VL: # BB#0: 349; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 350; AVX512VL-NEXT: retq 351 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 352 ret <2 x i64> %shuffle 353} 354define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 355; SSE2-LABEL: shuffle_v2i64_03_copy: 356; SSE2: # BB#0: 357; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 358; SSE2-NEXT: movapd %xmm2, %xmm0 359; SSE2-NEXT: retq 360; 361; SSE3-LABEL: shuffle_v2i64_03_copy: 362; SSE3: # BB#0: 363; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 364; SSE3-NEXT: movapd %xmm2, %xmm0 365; SSE3-NEXT: retq 366; 367; SSSE3-LABEL: shuffle_v2i64_03_copy: 368; SSSE3: # BB#0: 369; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 370; SSSE3-NEXT: movapd %xmm2, %xmm0 371; SSSE3-NEXT: retq 372; 373; SSE41-LABEL: shuffle_v2i64_03_copy: 374; SSE41: # BB#0: 375; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 376; SSE41-NEXT: movdqa %xmm1, %xmm0 377; SSE41-NEXT: retq 378; 379; AVX1-LABEL: shuffle_v2i64_03_copy: 380; AVX1: # BB#0: 381; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7] 382; AVX1-NEXT: retq 383; 384; AVX2-LABEL: shuffle_v2i64_03_copy: 385; AVX2: # BB#0: 386; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3] 387; AVX2-NEXT: retq 388; 389; AVX512VL-LABEL: shuffle_v2i64_03_copy: 390; AVX512VL: # BB#0: 391; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3] 392; AVX512VL-NEXT: retq 393 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 394 ret <2 x i64> %shuffle 395} 396define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) { 397; SSE2-LABEL: shuffle_v2i64_12: 398; SSE2: # BB#0: 399; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 400; SSE2-NEXT: retq 401; 402; SSE3-LABEL: shuffle_v2i64_12: 403; SSE3: # BB#0: 404; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 405; SSE3-NEXT: retq 406; 407; SSSE3-LABEL: shuffle_v2i64_12: 408; SSSE3: # BB#0: 409; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 410; SSSE3-NEXT: movdqa %xmm1, %xmm0 411; SSSE3-NEXT: retq 412; 413; SSE41-LABEL: shuffle_v2i64_12: 414; SSE41: # BB#0: 415; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 416; SSE41-NEXT: movdqa %xmm1, %xmm0 417; SSE41-NEXT: retq 418; 419; AVX-LABEL: shuffle_v2i64_12: 420; AVX: # BB#0: 421; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 422; AVX-NEXT: retq 423 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 424 ret <2 x i64> %shuffle 425} 426define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 427; SSE2-LABEL: shuffle_v2i64_12_copy: 428; SSE2: # BB#0: 429; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] 430; SSE2-NEXT: movapd %xmm1, %xmm0 431; SSE2-NEXT: retq 432; 433; SSE3-LABEL: shuffle_v2i64_12_copy: 434; SSE3: # BB#0: 435; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] 436; SSE3-NEXT: movapd %xmm1, %xmm0 437; SSE3-NEXT: retq 438; 439; SSSE3-LABEL: shuffle_v2i64_12_copy: 440; SSSE3: # BB#0: 441; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 442; SSSE3-NEXT: movdqa %xmm2, %xmm0 443; SSSE3-NEXT: retq 444; 445; SSE41-LABEL: shuffle_v2i64_12_copy: 446; SSE41: # BB#0: 447; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 448; SSE41-NEXT: movdqa %xmm2, %xmm0 449; SSE41-NEXT: retq 450; 451; AVX-LABEL: shuffle_v2i64_12_copy: 452; AVX: # BB#0: 453; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 454; AVX-NEXT: retq 455 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 456 ret <2 x i64> %shuffle 457} 458define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) { 459; SSE-LABEL: shuffle_v2i64_13: 460; SSE: # BB#0: 461; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 462; SSE-NEXT: retq 463; 464; AVX-LABEL: shuffle_v2i64_13: 465; AVX: # BB#0: 466; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 467; AVX-NEXT: retq 468 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 469 ret <2 x i64> %shuffle 470} 471define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 472; SSE-LABEL: shuffle_v2i64_13_copy: 473; SSE: # BB#0: 474; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1] 475; SSE-NEXT: movdqa %xmm1, %xmm0 476; SSE-NEXT: retq 477; 478; AVX-LABEL: shuffle_v2i64_13_copy: 479; AVX: # BB#0: 480; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1] 481; AVX-NEXT: retq 482 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 483 ret <2 x i64> %shuffle 484} 485define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) { 486; SSE-LABEL: shuffle_v2i64_20: 487; SSE: # BB#0: 488; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 489; SSE-NEXT: movdqa %xmm1, %xmm0 490; SSE-NEXT: retq 491; 492; AVX-LABEL: shuffle_v2i64_20: 493; AVX: # BB#0: 494; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 495; AVX-NEXT: retq 496 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 497 ret <2 x i64> %shuffle 498} 499define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 500; SSE-LABEL: shuffle_v2i64_20_copy: 501; SSE: # BB#0: 502; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 503; SSE-NEXT: movdqa %xmm2, %xmm0 504; SSE-NEXT: retq 505; 506; AVX-LABEL: shuffle_v2i64_20_copy: 507; AVX: # BB#0: 508; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0] 509; AVX-NEXT: retq 510 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 511 ret <2 x i64> %shuffle 512} 513define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) { 514; SSE2-LABEL: shuffle_v2i64_21: 515; SSE2: # BB#0: 516; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 517; SSE2-NEXT: retq 518; 519; SSE3-LABEL: shuffle_v2i64_21: 520; SSE3: # BB#0: 521; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 522; SSE3-NEXT: retq 523; 524; SSSE3-LABEL: shuffle_v2i64_21: 525; SSSE3: # BB#0: 526; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 527; SSSE3-NEXT: retq 528; 529; SSE41-LABEL: shuffle_v2i64_21: 530; SSE41: # BB#0: 531; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 532; SSE41-NEXT: retq 533; 534; AVX1-LABEL: shuffle_v2i64_21: 535; AVX1: # BB#0: 536; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 537; AVX1-NEXT: retq 538; 539; AVX2-LABEL: shuffle_v2i64_21: 540; AVX2: # BB#0: 541; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 542; AVX2-NEXT: retq 543; 544; AVX512VL-LABEL: shuffle_v2i64_21: 545; AVX512VL: # BB#0: 546; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 547; AVX512VL-NEXT: retq 548 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> 549 ret <2 x i64> %shuffle 550} 551define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 552; SSE2-LABEL: shuffle_v2i64_21_copy: 553; SSE2: # BB#0: 554; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 555; SSE2-NEXT: movapd %xmm1, %xmm0 556; SSE2-NEXT: retq 557; 558; SSE3-LABEL: shuffle_v2i64_21_copy: 559; SSE3: # BB#0: 560; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 561; SSE3-NEXT: movapd %xmm1, %xmm0 562; SSE3-NEXT: retq 563; 564; SSSE3-LABEL: shuffle_v2i64_21_copy: 565; SSSE3: # BB#0: 566; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 567; SSSE3-NEXT: movapd %xmm1, %xmm0 568; SSSE3-NEXT: retq 569; 570; SSE41-LABEL: shuffle_v2i64_21_copy: 571; SSE41: # BB#0: 572; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 573; SSE41-NEXT: movdqa %xmm1, %xmm0 574; SSE41-NEXT: retq 575; 576; AVX1-LABEL: shuffle_v2i64_21_copy: 577; AVX1: # BB#0: 578; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7] 579; AVX1-NEXT: retq 580; 581; AVX2-LABEL: shuffle_v2i64_21_copy: 582; AVX2: # BB#0: 583; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3] 584; AVX2-NEXT: retq 585; 586; AVX512VL-LABEL: shuffle_v2i64_21_copy: 587; AVX512VL: # BB#0: 588; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3] 589; AVX512VL-NEXT: retq 590 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> 591 ret <2 x i64> %shuffle 592} 593define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) { 594; SSE2-LABEL: shuffle_v2i64_30: 595; SSE2: # BB#0: 596; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 597; SSE2-NEXT: movapd %xmm1, %xmm0 598; SSE2-NEXT: retq 599; 600; SSE3-LABEL: shuffle_v2i64_30: 601; SSE3: # BB#0: 602; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 603; SSE3-NEXT: movapd %xmm1, %xmm0 604; SSE3-NEXT: retq 605; 606; SSSE3-LABEL: shuffle_v2i64_30: 607; SSSE3: # BB#0: 608; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 609; SSSE3-NEXT: retq 610; 611; SSE41-LABEL: shuffle_v2i64_30: 612; SSE41: # BB#0: 613; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 614; SSE41-NEXT: retq 615; 616; AVX-LABEL: shuffle_v2i64_30: 617; AVX: # BB#0: 618; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 619; AVX-NEXT: retq 620 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0> 621 ret <2 x i64> %shuffle 622} 623define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 624; SSE2-LABEL: shuffle_v2i64_30_copy: 625; SSE2: # BB#0: 626; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] 627; SSE2-NEXT: movapd %xmm2, %xmm0 628; SSE2-NEXT: retq 629; 630; SSE3-LABEL: shuffle_v2i64_30_copy: 631; SSE3: # BB#0: 632; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] 633; SSE3-NEXT: movapd %xmm2, %xmm0 634; SSE3-NEXT: retq 635; 636; SSSE3-LABEL: shuffle_v2i64_30_copy: 637; SSSE3: # BB#0: 638; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 639; SSSE3-NEXT: movdqa %xmm1, %xmm0 640; SSSE3-NEXT: retq 641; 642; SSE41-LABEL: shuffle_v2i64_30_copy: 643; SSE41: # BB#0: 644; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 645; SSE41-NEXT: movdqa %xmm1, %xmm0 646; SSE41-NEXT: retq 647; 648; AVX-LABEL: shuffle_v2i64_30_copy: 649; AVX: # BB#0: 650; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 651; AVX-NEXT: retq 652 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0> 653 ret <2 x i64> %shuffle 654} 655define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) { 656; SSE-LABEL: shuffle_v2i64_31: 657; SSE: # BB#0: 658; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] 659; SSE-NEXT: movdqa %xmm1, %xmm0 660; SSE-NEXT: retq 661; 662; AVX-LABEL: shuffle_v2i64_31: 663; AVX: # BB#0: 664; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] 665; AVX-NEXT: retq 666 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1> 667 ret <2 x i64> %shuffle 668} 669define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 670; SSE-LABEL: shuffle_v2i64_31_copy: 671; SSE: # BB#0: 672; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 673; SSE-NEXT: movdqa %xmm2, %xmm0 674; SSE-NEXT: retq 675; 676; AVX-LABEL: shuffle_v2i64_31_copy: 677; AVX: # BB#0: 678; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1] 679; AVX-NEXT: retq 680 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1> 681 ret <2 x i64> %shuffle 682} 683 684define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) { 685; SSE-LABEL: shuffle_v2i64_0z: 686; SSE: # BB#0: 687; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 688; SSE-NEXT: retq 689; 690; AVX-LABEL: shuffle_v2i64_0z: 691; AVX: # BB#0: 692; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 693; AVX-NEXT: retq 694 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 695 ret <2 x i64> %shuffle 696} 697 698define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) { 699; SSE-LABEL: shuffle_v2i64_1z: 700; SSE: # BB#0: 701; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 702; SSE-NEXT: retq 703; 704; AVX-LABEL: shuffle_v2i64_1z: 705; AVX: # BB#0: 706; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 707; AVX-NEXT: retq 708 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3> 709 ret <2 x i64> %shuffle 710} 711 712define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) { 713; SSE-LABEL: shuffle_v2i64_z0: 714; SSE: # BB#0: 715; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 716; SSE-NEXT: retq 717; 718; AVX-LABEL: shuffle_v2i64_z0: 719; AVX: # BB#0: 720; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 721; AVX-NEXT: retq 722 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0> 723 ret <2 x i64> %shuffle 724} 725 726define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) { 727; SSE2-LABEL: shuffle_v2i64_z1: 728; SSE2: # BB#0: 729; SSE2-NEXT: xorpd %xmm1, %xmm1 730; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 731; SSE2-NEXT: retq 732; 733; SSE3-LABEL: shuffle_v2i64_z1: 734; SSE3: # BB#0: 735; SSE3-NEXT: xorpd %xmm1, %xmm1 736; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 737; SSE3-NEXT: retq 738; 739; SSSE3-LABEL: shuffle_v2i64_z1: 740; SSSE3: # BB#0: 741; SSSE3-NEXT: xorpd %xmm1, %xmm1 742; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 743; SSSE3-NEXT: retq 744; 745; SSE41-LABEL: shuffle_v2i64_z1: 746; SSE41: # BB#0: 747; SSE41-NEXT: pxor %xmm1, %xmm1 748; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 749; SSE41-NEXT: retq 750; 751; AVX1-LABEL: shuffle_v2i64_z1: 752; AVX1: # BB#0: 753; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 754; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 755; AVX1-NEXT: retq 756; 757; AVX2-LABEL: shuffle_v2i64_z1: 758; AVX2: # BB#0: 759; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 760; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 761; AVX2-NEXT: retq 762; 763; AVX512VL-LABEL: shuffle_v2i64_z1: 764; AVX512VL: # BB#0: 765; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 766; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 767; AVX512VL-NEXT: retq 768 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1> 769 ret <2 x i64> %shuffle 770} 771 772define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) { 773; SSE-LABEL: shuffle_v2f64_0z: 774; SSE: # BB#0: 775; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 776; SSE-NEXT: retq 777; 778; AVX-LABEL: shuffle_v2f64_0z: 779; AVX: # BB#0: 780; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 781; AVX-NEXT: retq 782 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 783 ret <2 x double> %shuffle 784} 785 786define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) { 787; SSE-LABEL: shuffle_v2f64_1z: 788; SSE: # BB#0: 789; SSE-NEXT: xorpd %xmm1, %xmm1 790; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 791; SSE-NEXT: retq 792; 793; AVX1-LABEL: shuffle_v2f64_1z: 794; AVX1: # BB#0: 795; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 796; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 797; AVX1-NEXT: retq 798; 799; AVX2-LABEL: shuffle_v2f64_1z: 800; AVX2: # BB#0: 801; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 802; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 803; AVX2-NEXT: retq 804; 805; AVX512VL-LABEL: shuffle_v2f64_1z: 806; AVX512VL: # BB#0: 807; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 808; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 809; AVX512VL-NEXT: retq 810 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3> 811 ret <2 x double> %shuffle 812} 813 814define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) { 815; SSE-LABEL: shuffle_v2f64_z0: 816; SSE: # BB#0: 817; SSE-NEXT: xorpd %xmm1, %xmm1 818; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 819; SSE-NEXT: movapd %xmm1, %xmm0 820; SSE-NEXT: retq 821; 822; AVX1-LABEL: shuffle_v2f64_z0: 823; AVX1: # BB#0: 824; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 825; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 826; AVX1-NEXT: retq 827; 828; AVX2-LABEL: shuffle_v2f64_z0: 829; AVX2: # BB#0: 830; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 831; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 832; AVX2-NEXT: retq 833; 834; AVX512VL-LABEL: shuffle_v2f64_z0: 835; AVX512VL: # BB#0: 836; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 837; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 838; AVX512VL-NEXT: retq 839 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0> 840 ret <2 x double> %shuffle 841} 842 843define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) { 844; SSE2-LABEL: shuffle_v2f64_z1: 845; SSE2: # BB#0: 846; SSE2-NEXT: xorpd %xmm1, %xmm1 847; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 848; SSE2-NEXT: retq 849; 850; SSE3-LABEL: shuffle_v2f64_z1: 851; SSE3: # BB#0: 852; SSE3-NEXT: xorpd %xmm1, %xmm1 853; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 854; SSE3-NEXT: retq 855; 856; SSSE3-LABEL: shuffle_v2f64_z1: 857; SSSE3: # BB#0: 858; SSSE3-NEXT: xorpd %xmm1, %xmm1 859; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 860; SSSE3-NEXT: retq 861; 862; SSE41-LABEL: shuffle_v2f64_z1: 863; SSE41: # BB#0: 864; SSE41-NEXT: xorpd %xmm1, %xmm1 865; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 866; SSE41-NEXT: retq 867; 868; AVX1-LABEL: shuffle_v2f64_z1: 869; AVX1: # BB#0: 870; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 871; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 872; AVX1-NEXT: retq 873; 874; AVX2-LABEL: shuffle_v2f64_z1: 875; AVX2: # BB#0: 876; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 877; AVX2-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 878; AVX2-NEXT: retq 879; 880; AVX512VL-LABEL: shuffle_v2f64_z1: 881; AVX512VL: # BB#0: 882; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 883; AVX512VL-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 884; AVX512VL-NEXT: retq 885 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 886 ret <2 x double> %shuffle 887} 888 889define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) { 890; SSE-LABEL: shuffle_v2f64_bitcast_1z: 891; SSE: # BB#0: 892; SSE-NEXT: xorpd %xmm1, %xmm1 893; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 894; SSE-NEXT: retq 895; 896; AVX1-LABEL: shuffle_v2f64_bitcast_1z: 897; AVX1: # BB#0: 898; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 899; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 900; AVX1-NEXT: retq 901; 902; AVX2-LABEL: shuffle_v2f64_bitcast_1z: 903; AVX2: # BB#0: 904; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 905; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 906; AVX2-NEXT: retq 907; 908; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z: 909; AVX512VL: # BB#0: 910; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 911; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 912; AVX512VL-NEXT: retq 913 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 914 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float> 915 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 916 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double> 917 ret <2 x double> %bitcast64 918} 919 920define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) { 921; SSE2-LABEL: shuffle_v2i64_bitcast_z123: 922; SSE2: # BB#0: 923; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 924; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 925; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 926; SSE2-NEXT: retq 927; 928; SSE3-LABEL: shuffle_v2i64_bitcast_z123: 929; SSE3: # BB#0: 930; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 931; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 932; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 933; SSE3-NEXT: retq 934; 935; SSSE3-LABEL: shuffle_v2i64_bitcast_z123: 936; SSSE3: # BB#0: 937; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 938; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 939; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 940; SSSE3-NEXT: retq 941; 942; SSE41-LABEL: shuffle_v2i64_bitcast_z123: 943; SSE41: # BB#0: 944; SSE41-NEXT: pxor %xmm1, %xmm1 945; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 946; SSE41-NEXT: retq 947; 948; AVX1-LABEL: shuffle_v2i64_bitcast_z123: 949; AVX1: # BB#0: 950; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 951; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 952; AVX1-NEXT: retq 953; 954; AVX2-LABEL: shuffle_v2i64_bitcast_z123: 955; AVX2: # BB#0: 956; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 957; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 958; AVX2-NEXT: retq 959; 960; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123: 961; AVX512VL: # BB#0: 962; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 963; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 964; AVX512VL-NEXT: retq 965 %bitcast32 = bitcast <2 x i64> %x to <4 x float> 966 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 967 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64> 968 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1> 969 ret <2 x i64> %and 970} 971 972define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) { 973; SSE-LABEL: insert_reg_and_zero_v2i64: 974; SSE: # BB#0: 975; SSE-NEXT: movd %rdi, %xmm0 976; SSE-NEXT: retq 977; 978; AVX-LABEL: insert_reg_and_zero_v2i64: 979; AVX: # BB#0: 980; AVX-NEXT: vmovq %rdi, %xmm0 981; AVX-NEXT: retq 982 %v = insertelement <2 x i64> undef, i64 %a, i32 0 983 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 984 ret <2 x i64> %shuffle 985} 986 987define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) { 988; SSE-LABEL: insert_mem_and_zero_v2i64: 989; SSE: # BB#0: 990; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 991; SSE-NEXT: retq 992; 993; AVX-LABEL: insert_mem_and_zero_v2i64: 994; AVX: # BB#0: 995; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 996; AVX-NEXT: retq 997 %a = load i64, i64* %ptr 998 %v = insertelement <2 x i64> undef, i64 %a, i32 0 999 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 1000 ret <2 x i64> %shuffle 1001} 1002 1003define <2 x double> @insert_reg_and_zero_v2f64(double %a) { 1004; SSE-LABEL: insert_reg_and_zero_v2f64: 1005; SSE: # BB#0: 1006; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1007; SSE-NEXT: retq 1008; 1009; AVX-LABEL: insert_reg_and_zero_v2f64: 1010; AVX: # BB#0: 1011; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1012; AVX-NEXT: retq 1013 %v = insertelement <2 x double> undef, double %a, i32 0 1014 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 1015 ret <2 x double> %shuffle 1016} 1017 1018define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) { 1019; SSE-LABEL: insert_mem_and_zero_v2f64: 1020; SSE: # BB#0: 1021; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1022; SSE-NEXT: retq 1023; 1024; AVX-LABEL: insert_mem_and_zero_v2f64: 1025; AVX: # BB#0: 1026; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1027; AVX-NEXT: retq 1028 %a = load double, double* %ptr 1029 %v = insertelement <2 x double> undef, double %a, i32 0 1030 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 1031 ret <2 x double> %shuffle 1032} 1033 1034define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) { 1035; SSE2-LABEL: insert_reg_lo_v2i64: 1036; SSE2: # BB#0: 1037; SSE2-NEXT: movd %rdi, %xmm1 1038; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1039; SSE2-NEXT: retq 1040; 1041; SSE3-LABEL: insert_reg_lo_v2i64: 1042; SSE3: # BB#0: 1043; SSE3-NEXT: movd %rdi, %xmm1 1044; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1045; SSE3-NEXT: retq 1046; 1047; SSSE3-LABEL: insert_reg_lo_v2i64: 1048; SSSE3: # BB#0: 1049; SSSE3-NEXT: movd %rdi, %xmm1 1050; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1051; SSSE3-NEXT: retq 1052; 1053; SSE41-LABEL: insert_reg_lo_v2i64: 1054; SSE41: # BB#0: 1055; SSE41-NEXT: movd %rdi, %xmm1 1056; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1057; SSE41-NEXT: retq 1058; 1059; AVX1-LABEL: insert_reg_lo_v2i64: 1060; AVX1: # BB#0: 1061; AVX1-NEXT: vmovq %rdi, %xmm1 1062; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1063; AVX1-NEXT: retq 1064; 1065; AVX2-LABEL: insert_reg_lo_v2i64: 1066; AVX2: # BB#0: 1067; AVX2-NEXT: vmovq %rdi, %xmm1 1068; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1069; AVX2-NEXT: retq 1070; 1071; AVX512VL-LABEL: insert_reg_lo_v2i64: 1072; AVX512VL: # BB#0: 1073; AVX512VL-NEXT: vmovq %rdi, %xmm1 1074; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1075; AVX512VL-NEXT: retq 1076 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1077 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 1078 ret <2 x i64> %shuffle 1079} 1080 1081define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) { 1082; SSE2-LABEL: insert_mem_lo_v2i64: 1083; SSE2: # BB#0: 1084; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 1085; SSE2-NEXT: retq 1086; 1087; SSE3-LABEL: insert_mem_lo_v2i64: 1088; SSE3: # BB#0: 1089; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 1090; SSE3-NEXT: retq 1091; 1092; SSSE3-LABEL: insert_mem_lo_v2i64: 1093; SSSE3: # BB#0: 1094; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 1095; SSSE3-NEXT: retq 1096; 1097; SSE41-LABEL: insert_mem_lo_v2i64: 1098; SSE41: # BB#0: 1099; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1100; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1101; SSE41-NEXT: retq 1102; 1103; AVX1-LABEL: insert_mem_lo_v2i64: 1104; AVX1: # BB#0: 1105; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1106; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1107; AVX1-NEXT: retq 1108; 1109; AVX2-LABEL: insert_mem_lo_v2i64: 1110; AVX2: # BB#0: 1111; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1112; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1113; AVX2-NEXT: retq 1114; 1115; AVX512VL-LABEL: insert_mem_lo_v2i64: 1116; AVX512VL: # BB#0: 1117; AVX512VL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1118; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1119; AVX512VL-NEXT: retq 1120 %a = load i64, i64* %ptr 1121 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1122 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 1123 ret <2 x i64> %shuffle 1124} 1125 1126define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) { 1127; SSE-LABEL: insert_reg_hi_v2i64: 1128; SSE: # BB#0: 1129; SSE-NEXT: movd %rdi, %xmm1 1130; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1131; SSE-NEXT: retq 1132; 1133; AVX-LABEL: insert_reg_hi_v2i64: 1134; AVX: # BB#0: 1135; AVX-NEXT: vmovq %rdi, %xmm1 1136; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1137; AVX-NEXT: retq 1138 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1139 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 1140 ret <2 x i64> %shuffle 1141} 1142 1143define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) { 1144; SSE-LABEL: insert_mem_hi_v2i64: 1145; SSE: # BB#0: 1146; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1147; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1148; SSE-NEXT: retq 1149; 1150; AVX-LABEL: insert_mem_hi_v2i64: 1151; AVX: # BB#0: 1152; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1153; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1154; AVX-NEXT: retq 1155 %a = load i64, i64* %ptr 1156 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1157 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 1158 ret <2 x i64> %shuffle 1159} 1160 1161define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) { 1162; SSE-LABEL: insert_reg_lo_v2f64: 1163; SSE: # BB#0: 1164; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1165; SSE-NEXT: movapd %xmm1, %xmm0 1166; SSE-NEXT: retq 1167; 1168; AVX-LABEL: insert_reg_lo_v2f64: 1169; AVX: # BB#0: 1170; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1171; AVX-NEXT: retq 1172 %v = insertelement <2 x double> undef, double %a, i32 0 1173 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 1174 ret <2 x double> %shuffle 1175} 1176 1177define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) { 1178; SSE-LABEL: insert_mem_lo_v2f64: 1179; SSE: # BB#0: 1180; SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 1181; SSE-NEXT: retq 1182; 1183; AVX-LABEL: insert_mem_lo_v2f64: 1184; AVX: # BB#0: 1185; AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 1186; AVX-NEXT: retq 1187 %a = load double, double* %ptr 1188 %v = insertelement <2 x double> undef, double %a, i32 0 1189 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 1190 ret <2 x double> %shuffle 1191} 1192 1193define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) { 1194; SSE-LABEL: insert_reg_hi_v2f64: 1195; SSE: # BB#0: 1196; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1197; SSE-NEXT: movapd %xmm1, %xmm0 1198; SSE-NEXT: retq 1199; 1200; AVX-LABEL: insert_reg_hi_v2f64: 1201; AVX: # BB#0: 1202; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1203; AVX-NEXT: retq 1204 %v = insertelement <2 x double> undef, double %a, i32 0 1205 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 1206 ret <2 x double> %shuffle 1207} 1208 1209define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) { 1210; SSE-LABEL: insert_mem_hi_v2f64: 1211; SSE: # BB#0: 1212; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 1213; SSE-NEXT: retq 1214; 1215; AVX-LABEL: insert_mem_hi_v2f64: 1216; AVX: # BB#0: 1217; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 1218; AVX-NEXT: retq 1219 %a = load double, double* %ptr 1220 %v = insertelement <2 x double> undef, double %a, i32 0 1221 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 1222 ret <2 x double> %shuffle 1223} 1224 1225define <2 x double> @insert_dup_reg_v2f64(double %a) { 1226; SSE2-LABEL: insert_dup_reg_v2f64: 1227; SSE2: # BB#0: 1228; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1229; SSE2-NEXT: retq 1230; 1231; SSE3-LABEL: insert_dup_reg_v2f64: 1232; SSE3: # BB#0: 1233; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1234; SSE3-NEXT: retq 1235; 1236; SSSE3-LABEL: insert_dup_reg_v2f64: 1237; SSSE3: # BB#0: 1238; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1239; SSSE3-NEXT: retq 1240; 1241; SSE41-LABEL: insert_dup_reg_v2f64: 1242; SSE41: # BB#0: 1243; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1244; SSE41-NEXT: retq 1245; 1246; AVX-LABEL: insert_dup_reg_v2f64: 1247; AVX: # BB#0: 1248; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1249; AVX-NEXT: retq 1250 %v = insertelement <2 x double> undef, double %a, i32 0 1251 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1252 ret <2 x double> %shuffle 1253} 1254 1255define <2 x double> @insert_dup_mem_v2f64(double* %ptr) { 1256; SSE2-LABEL: insert_dup_mem_v2f64: 1257; SSE2: # BB#0: 1258; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1259; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1260; SSE2-NEXT: retq 1261; 1262; SSE3-LABEL: insert_dup_mem_v2f64: 1263; SSE3: # BB#0: 1264; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1265; SSE3-NEXT: retq 1266; 1267; SSSE3-LABEL: insert_dup_mem_v2f64: 1268; SSSE3: # BB#0: 1269; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1270; SSSE3-NEXT: retq 1271; 1272; SSE41-LABEL: insert_dup_mem_v2f64: 1273; SSE41: # BB#0: 1274; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1275; SSE41-NEXT: retq 1276; 1277; AVX-LABEL: insert_dup_mem_v2f64: 1278; AVX: # BB#0: 1279; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1280; AVX-NEXT: retq 1281 %a = load double, double* %ptr 1282 %v = insertelement <2 x double> undef, double %a, i32 0 1283 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1284 ret <2 x double> %shuffle 1285} 1286 1287define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind { 1288; SSE2-LABEL: insert_dup_mem128_v2f64: 1289; SSE2: # BB#0: 1290; SSE2-NEXT: movaps (%rdi), %xmm0 1291; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1292; SSE2-NEXT: retq 1293; 1294; SSE3-LABEL: insert_dup_mem128_v2f64: 1295; SSE3: # BB#0: 1296; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1297; SSE3-NEXT: retq 1298; 1299; SSSE3-LABEL: insert_dup_mem128_v2f64: 1300; SSSE3: # BB#0: 1301; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1302; SSSE3-NEXT: retq 1303; 1304; SSE41-LABEL: insert_dup_mem128_v2f64: 1305; SSE41: # BB#0: 1306; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1307; SSE41-NEXT: retq 1308; 1309; AVX-LABEL: insert_dup_mem128_v2f64: 1310; AVX: # BB#0: 1311; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1312; AVX-NEXT: retq 1313 %v = load <2 x double>, <2 x double>* %ptr 1314 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1315 ret <2 x double> %shuffle 1316} 1317 1318 1319define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) { 1320; SSE-LABEL: insert_dup_mem_v2i64: 1321; SSE: # BB#0: 1322; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1323; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1324; SSE-NEXT: retq 1325; 1326; AVX1-LABEL: insert_dup_mem_v2i64: 1327; AVX1: # BB#0: 1328; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 1329; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1330; AVX1-NEXT: retq 1331; 1332; AVX2-LABEL: insert_dup_mem_v2i64: 1333; AVX2: # BB#0: 1334; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0 1335; AVX2-NEXT: retq 1336; 1337; AVX512VL-LABEL: insert_dup_mem_v2i64: 1338; AVX512VL: # BB#0: 1339; AVX512VL-NEXT: vpbroadcastq (%rdi), %xmm0 1340; AVX512VL-NEXT: retq 1341 %tmp = load i64, i64* %ptr, align 1 1342 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0 1343 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer 1344 ret <2 x i64> %tmp2 1345} 1346 1347define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) { 1348; SSE-LABEL: shuffle_mem_v2f64_10: 1349; SSE: # BB#0: 1350; SSE-NEXT: movapd (%rdi), %xmm0 1351; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 1352; SSE-NEXT: retq 1353; 1354; AVX-LABEL: shuffle_mem_v2f64_10: 1355; AVX: # BB#0: 1356; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0] 1357; AVX-NEXT: retq 1358 1359 %a = load <2 x double>, <2 x double>* %ptr 1360 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0> 1361 ret <2 x double> %shuffle 1362} 1363