1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512VL 9 10define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) { 11; SSE-LABEL: shuffle_v2i64_00: 12; SSE: # %bb.0: 13; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 14; SSE-NEXT: retq 15; 16; AVX1-LABEL: shuffle_v2i64_00: 17; AVX1: # %bb.0: 18; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] 19; AVX1-NEXT: retq 20; 21; AVX2-LABEL: shuffle_v2i64_00: 22; AVX2: # %bb.0: 23; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 24; AVX2-NEXT: retq 25; 26; AVX512VL-LABEL: shuffle_v2i64_00: 27; AVX512VL: # %bb.0: 28; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 29; AVX512VL-NEXT: retq 30 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0> 31 ret <2 x i64> %shuffle 32} 33define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) { 34; SSE-LABEL: shuffle_v2i64_10: 35; SSE: # %bb.0: 36; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 37; SSE-NEXT: retq 38; 39; AVX-LABEL: shuffle_v2i64_10: 40; AVX: # %bb.0: 41; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] 42; AVX-NEXT: retq 43 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0> 44 ret <2 x i64> %shuffle 45} 46define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) { 47; SSE-LABEL: shuffle_v2i64_11: 48; SSE: # %bb.0: 49; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 50; SSE-NEXT: retq 51; 52; AVX-LABEL: shuffle_v2i64_11: 53; AVX: # %bb.0: 54; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3] 55; AVX-NEXT: retq 56 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1> 57 ret <2 x i64> %shuffle 58} 59define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) { 60; SSE-LABEL: shuffle_v2i64_22: 61; SSE: # %bb.0: 62; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] 63; SSE-NEXT: retq 64; 65; AVX1-LABEL: shuffle_v2i64_22: 66; AVX1: # %bb.0: 67; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[0,1,0,1] 68; AVX1-NEXT: retq 69; 70; AVX2-LABEL: shuffle_v2i64_22: 71; AVX2: # %bb.0: 72; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] 73; AVX2-NEXT: retq 74; 75; AVX512VL-LABEL: shuffle_v2i64_22: 76; AVX512VL: # %bb.0: 77; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] 78; AVX512VL-NEXT: retq 79 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2> 80 ret <2 x i64> %shuffle 81} 82define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) { 83; SSE-LABEL: shuffle_v2i64_32: 84; SSE: # %bb.0: 85; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 86; SSE-NEXT: retq 87; 88; AVX-LABEL: shuffle_v2i64_32: 89; AVX: # %bb.0: 90; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,3,0,1] 91; AVX-NEXT: retq 92 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2> 93 ret <2 x i64> %shuffle 94} 95define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) { 96; SSE-LABEL: shuffle_v2i64_33: 97; SSE: # %bb.0: 98; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 99; SSE-NEXT: retq 100; 101; AVX-LABEL: shuffle_v2i64_33: 102; AVX: # %bb.0: 103; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,3,2,3] 104; AVX-NEXT: retq 105 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3> 106 ret <2 x i64> %shuffle 107} 108 109define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) { 110; SSE2-LABEL: shuffle_v2f64_00: 111; SSE2: # %bb.0: 112; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 113; SSE2-NEXT: retq 114; 115; SSE3-LABEL: shuffle_v2f64_00: 116; SSE3: # %bb.0: 117; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 118; SSE3-NEXT: retq 119; 120; SSSE3-LABEL: shuffle_v2f64_00: 121; SSSE3: # %bb.0: 122; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 123; SSSE3-NEXT: retq 124; 125; SSE41-LABEL: shuffle_v2f64_00: 126; SSE41: # %bb.0: 127; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 128; SSE41-NEXT: retq 129; 130; AVX-LABEL: shuffle_v2f64_00: 131; AVX: # %bb.0: 132; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 133; AVX-NEXT: retq 134 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0> 135 ret <2 x double> %shuffle 136} 137define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) { 138; SSE-LABEL: shuffle_v2f64_10: 139; SSE: # %bb.0: 140; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1] 141; SSE-NEXT: retq 142; 143; AVX-LABEL: shuffle_v2f64_10: 144; AVX: # %bb.0: 145; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 146; AVX-NEXT: retq 147 148 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0> 149 ret <2 x double> %shuffle 150} 151define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) { 152; SSE-LABEL: shuffle_v2f64_11: 153; SSE: # %bb.0: 154; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 155; SSE-NEXT: retq 156; 157; AVX-LABEL: shuffle_v2f64_11: 158; AVX: # %bb.0: 159; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1] 160; AVX-NEXT: retq 161 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1> 162 ret <2 x double> %shuffle 163} 164define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) { 165; SSE2-LABEL: shuffle_v2f64_22: 166; SSE2: # %bb.0: 167; SSE2-NEXT: movaps %xmm1, %xmm0 168; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 169; SSE2-NEXT: retq 170; 171; SSE3-LABEL: shuffle_v2f64_22: 172; SSE3: # %bb.0: 173; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 174; SSE3-NEXT: retq 175; 176; SSSE3-LABEL: shuffle_v2f64_22: 177; SSSE3: # %bb.0: 178; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 179; SSSE3-NEXT: retq 180; 181; SSE41-LABEL: shuffle_v2f64_22: 182; SSE41: # %bb.0: 183; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 184; SSE41-NEXT: retq 185; 186; AVX-LABEL: shuffle_v2f64_22: 187; AVX: # %bb.0: 188; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] 189; AVX-NEXT: retq 190 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2> 191 ret <2 x double> %shuffle 192} 193define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) { 194; SSE-LABEL: shuffle_v2f64_32: 195; SSE: # %bb.0: 196; SSE-NEXT: movaps %xmm1, %xmm0 197; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1] 198; SSE-NEXT: retq 199; 200; AVX-LABEL: shuffle_v2f64_32: 201; AVX: # %bb.0: 202; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] 203; AVX-NEXT: retq 204 205 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2> 206 ret <2 x double> %shuffle 207} 208define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { 209; SSE-LABEL: shuffle_v2f64_33: 210; SSE: # %bb.0: 211; SSE-NEXT: movaps %xmm1, %xmm0 212; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 213; SSE-NEXT: retq 214; 215; AVX-LABEL: shuffle_v2f64_33: 216; AVX: # %bb.0: 217; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,1] 218; AVX-NEXT: retq 219 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3> 220 ret <2 x double> %shuffle 221} 222define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) { 223; SSE2-LABEL: shuffle_v2f64_03: 224; SSE2: # %bb.0: 225; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 226; SSE2-NEXT: retq 227; 228; SSE3-LABEL: shuffle_v2f64_03: 229; SSE3: # %bb.0: 230; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 231; SSE3-NEXT: retq 232; 233; SSSE3-LABEL: shuffle_v2f64_03: 234; SSSE3: # %bb.0: 235; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 236; SSSE3-NEXT: retq 237; 238; SSE41-LABEL: shuffle_v2f64_03: 239; SSE41: # %bb.0: 240; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 241; SSE41-NEXT: retq 242; 243; AVX-LABEL: shuffle_v2f64_03: 244; AVX: # %bb.0: 245; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 246; AVX-NEXT: retq 247 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3> 248 ret <2 x double> %shuffle 249} 250define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) { 251; SSE2-LABEL: shuffle_v2f64_21: 252; SSE2: # %bb.0: 253; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 254; SSE2-NEXT: retq 255; 256; SSE3-LABEL: shuffle_v2f64_21: 257; SSE3: # %bb.0: 258; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 259; SSE3-NEXT: retq 260; 261; SSSE3-LABEL: shuffle_v2f64_21: 262; SSSE3: # %bb.0: 263; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 264; SSSE3-NEXT: retq 265; 266; SSE41-LABEL: shuffle_v2f64_21: 267; SSE41: # %bb.0: 268; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 269; SSE41-NEXT: retq 270; 271; AVX-LABEL: shuffle_v2f64_21: 272; AVX: # %bb.0: 273; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 274; AVX-NEXT: retq 275 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1> 276 ret <2 x double> %shuffle 277} 278define <2 x double> @shuffle_v2f64_u2(<2 x double> %a, <2 x double> %b) { 279; SSE2-LABEL: shuffle_v2f64_u2: 280; SSE2: # %bb.0: 281; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 282; SSE2-NEXT: retq 283; 284; SSE3-LABEL: shuffle_v2f64_u2: 285; SSE3: # %bb.0: 286; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 287; SSE3-NEXT: retq 288; 289; SSSE3-LABEL: shuffle_v2f64_u2: 290; SSSE3: # %bb.0: 291; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 292; SSSE3-NEXT: retq 293; 294; SSE41-LABEL: shuffle_v2f64_u2: 295; SSE41: # %bb.0: 296; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 297; SSE41-NEXT: retq 298; 299; AVX-LABEL: shuffle_v2f64_u2: 300; AVX: # %bb.0: 301; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] 302; AVX-NEXT: retq 303 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 undef, i32 2> 304 ret <2 x double> %shuffle 305} 306define <2 x double> @shuffle_v2f64_3u(<2 x double> %a, <2 x double> %b) { 307; SSE-LABEL: shuffle_v2f64_3u: 308; SSE: # %bb.0: 309; SSE-NEXT: movaps %xmm1, %xmm0 310; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 311; SSE-NEXT: retq 312; 313; AVX-LABEL: shuffle_v2f64_3u: 314; AVX: # %bb.0: 315; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] 316; AVX-NEXT: retq 317 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 undef> 318 ret <2 x double> %shuffle 319} 320 321define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) { 322; SSE-LABEL: shuffle_v2i64_02: 323; SSE: # %bb.0: 324; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 325; SSE-NEXT: retq 326; 327; AVX-LABEL: shuffle_v2i64_02: 328; AVX: # %bb.0: 329; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 330; AVX-NEXT: retq 331 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 332 ret <2 x i64> %shuffle 333} 334define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 335; SSE-LABEL: shuffle_v2i64_02_copy: 336; SSE: # %bb.0: 337; SSE-NEXT: movaps %xmm1, %xmm0 338; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 339; SSE-NEXT: retq 340; 341; AVX-LABEL: shuffle_v2i64_02_copy: 342; AVX: # %bb.0: 343; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm2[0] 344; AVX-NEXT: retq 345 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 346 ret <2 x i64> %shuffle 347} 348define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { 349; SSE2-LABEL: shuffle_v2i64_03: 350; SSE2: # %bb.0: 351; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 352; SSE2-NEXT: retq 353; 354; SSE3-LABEL: shuffle_v2i64_03: 355; SSE3: # %bb.0: 356; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 357; SSE3-NEXT: retq 358; 359; SSSE3-LABEL: shuffle_v2i64_03: 360; SSSE3: # %bb.0: 361; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 362; SSSE3-NEXT: retq 363; 364; SSE41-LABEL: shuffle_v2i64_03: 365; SSE41: # %bb.0: 366; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 367; SSE41-NEXT: retq 368; 369; AVX-LABEL: shuffle_v2i64_03: 370; AVX: # %bb.0: 371; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 372; AVX-NEXT: retq 373 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 374 ret <2 x i64> %shuffle 375} 376define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 377; SSE2-LABEL: shuffle_v2i64_03_copy: 378; SSE2: # %bb.0: 379; SSE2-NEXT: movaps %xmm1, %xmm0 380; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 381; SSE2-NEXT: retq 382; 383; SSE3-LABEL: shuffle_v2i64_03_copy: 384; SSE3: # %bb.0: 385; SSE3-NEXT: movaps %xmm1, %xmm0 386; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 387; SSE3-NEXT: retq 388; 389; SSSE3-LABEL: shuffle_v2i64_03_copy: 390; SSSE3: # %bb.0: 391; SSSE3-NEXT: movaps %xmm1, %xmm0 392; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 393; SSSE3-NEXT: retq 394; 395; SSE41-LABEL: shuffle_v2i64_03_copy: 396; SSE41: # %bb.0: 397; SSE41-NEXT: movaps %xmm1, %xmm0 398; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 399; SSE41-NEXT: retq 400; 401; AVX-LABEL: shuffle_v2i64_03_copy: 402; AVX: # %bb.0: 403; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3] 404; AVX-NEXT: retq 405 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 406 ret <2 x i64> %shuffle 407} 408define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) { 409; SSE2-LABEL: shuffle_v2i64_12: 410; SSE2: # %bb.0: 411; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1] 412; SSE2-NEXT: retq 413; 414; SSE3-LABEL: shuffle_v2i64_12: 415; SSE3: # %bb.0: 416; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1] 417; SSE3-NEXT: retq 418; 419; SSSE3-LABEL: shuffle_v2i64_12: 420; SSSE3: # %bb.0: 421; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 422; SSSE3-NEXT: movdqa %xmm1, %xmm0 423; SSSE3-NEXT: retq 424; 425; SSE41-LABEL: shuffle_v2i64_12: 426; SSE41: # %bb.0: 427; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 428; SSE41-NEXT: movdqa %xmm1, %xmm0 429; SSE41-NEXT: retq 430; 431; AVX-LABEL: shuffle_v2i64_12: 432; AVX: # %bb.0: 433; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 434; AVX-NEXT: retq 435 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 436 ret <2 x i64> %shuffle 437} 438define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 439; SSE2-LABEL: shuffle_v2i64_12_copy: 440; SSE2: # %bb.0: 441; SSE2-NEXT: movaps %xmm1, %xmm0 442; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm2[0,1] 443; SSE2-NEXT: retq 444; 445; SSE3-LABEL: shuffle_v2i64_12_copy: 446; SSE3: # %bb.0: 447; SSE3-NEXT: movaps %xmm1, %xmm0 448; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm2[0,1] 449; SSE3-NEXT: retq 450; 451; SSSE3-LABEL: shuffle_v2i64_12_copy: 452; SSSE3: # %bb.0: 453; SSSE3-NEXT: movdqa %xmm2, %xmm0 454; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 455; SSSE3-NEXT: retq 456; 457; SSE41-LABEL: shuffle_v2i64_12_copy: 458; SSE41: # %bb.0: 459; SSE41-NEXT: movdqa %xmm2, %xmm0 460; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 461; SSE41-NEXT: retq 462; 463; AVX-LABEL: shuffle_v2i64_12_copy: 464; AVX: # %bb.0: 465; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 466; AVX-NEXT: retq 467 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 468 ret <2 x i64> %shuffle 469} 470define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) { 471; SSE-LABEL: shuffle_v2i64_13: 472; SSE: # %bb.0: 473; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 474; SSE-NEXT: retq 475; 476; AVX-LABEL: shuffle_v2i64_13: 477; AVX: # %bb.0: 478; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 479; AVX-NEXT: retq 480 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 481 ret <2 x i64> %shuffle 482} 483define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 484; SSE-LABEL: shuffle_v2i64_13_copy: 485; SSE: # %bb.0: 486; SSE-NEXT: movaps %xmm1, %xmm0 487; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1] 488; SSE-NEXT: retq 489; 490; AVX-LABEL: shuffle_v2i64_13_copy: 491; AVX: # %bb.0: 492; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm2[1] 493; AVX-NEXT: retq 494 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 495 ret <2 x i64> %shuffle 496} 497define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) { 498; SSE-LABEL: shuffle_v2i64_20: 499; SSE: # %bb.0: 500; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 501; SSE-NEXT: movaps %xmm1, %xmm0 502; SSE-NEXT: retq 503; 504; AVX-LABEL: shuffle_v2i64_20: 505; AVX: # %bb.0: 506; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 507; AVX-NEXT: retq 508 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 509 ret <2 x i64> %shuffle 510} 511define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 512; SSE-LABEL: shuffle_v2i64_20_copy: 513; SSE: # %bb.0: 514; SSE-NEXT: movaps %xmm2, %xmm0 515; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 516; SSE-NEXT: retq 517; 518; AVX-LABEL: shuffle_v2i64_20_copy: 519; AVX: # %bb.0: 520; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm1[0] 521; AVX-NEXT: retq 522 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 523 ret <2 x i64> %shuffle 524} 525define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) { 526; SSE2-LABEL: shuffle_v2i64_21: 527; SSE2: # %bb.0: 528; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 529; SSE2-NEXT: retq 530; 531; SSE3-LABEL: shuffle_v2i64_21: 532; SSE3: # %bb.0: 533; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 534; SSE3-NEXT: retq 535; 536; SSSE3-LABEL: shuffle_v2i64_21: 537; SSSE3: # %bb.0: 538; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 539; SSSE3-NEXT: retq 540; 541; SSE41-LABEL: shuffle_v2i64_21: 542; SSE41: # %bb.0: 543; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 544; SSE41-NEXT: retq 545; 546; AVX-LABEL: shuffle_v2i64_21: 547; AVX: # %bb.0: 548; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 549; AVX-NEXT: retq 550 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> 551 ret <2 x i64> %shuffle 552} 553define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 554; SSE2-LABEL: shuffle_v2i64_21_copy: 555; SSE2: # %bb.0: 556; SSE2-NEXT: movapd %xmm1, %xmm0 557; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 558; SSE2-NEXT: retq 559; 560; SSE3-LABEL: shuffle_v2i64_21_copy: 561; SSE3: # %bb.0: 562; SSE3-NEXT: movapd %xmm1, %xmm0 563; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 564; SSE3-NEXT: retq 565; 566; SSSE3-LABEL: shuffle_v2i64_21_copy: 567; SSSE3: # %bb.0: 568; SSSE3-NEXT: movapd %xmm1, %xmm0 569; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 570; SSSE3-NEXT: retq 571; 572; SSE41-LABEL: shuffle_v2i64_21_copy: 573; SSE41: # %bb.0: 574; SSE41-NEXT: movaps %xmm1, %xmm0 575; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] 576; SSE41-NEXT: retq 577; 578; AVX-LABEL: shuffle_v2i64_21_copy: 579; AVX: # %bb.0: 580; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3] 581; AVX-NEXT: retq 582 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> 583 ret <2 x i64> %shuffle 584} 585define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) { 586; SSE2-LABEL: shuffle_v2i64_30: 587; SSE2: # %bb.0: 588; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1] 589; SSE2-NEXT: movaps %xmm1, %xmm0 590; SSE2-NEXT: retq 591; 592; SSE3-LABEL: shuffle_v2i64_30: 593; SSE3: # %bb.0: 594; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1] 595; SSE3-NEXT: movaps %xmm1, %xmm0 596; SSE3-NEXT: retq 597; 598; SSSE3-LABEL: shuffle_v2i64_30: 599; SSSE3: # %bb.0: 600; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 601; SSSE3-NEXT: retq 602; 603; SSE41-LABEL: shuffle_v2i64_30: 604; SSE41: # %bb.0: 605; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 606; SSE41-NEXT: retq 607; 608; AVX-LABEL: shuffle_v2i64_30: 609; AVX: # %bb.0: 610; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 611; AVX-NEXT: retq 612 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0> 613 ret <2 x i64> %shuffle 614} 615define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 616; SSE2-LABEL: shuffle_v2i64_30_copy: 617; SSE2: # %bb.0: 618; SSE2-NEXT: movaps %xmm2, %xmm0 619; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1] 620; SSE2-NEXT: retq 621; 622; SSE3-LABEL: shuffle_v2i64_30_copy: 623; SSE3: # %bb.0: 624; SSE3-NEXT: movaps %xmm2, %xmm0 625; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1] 626; SSE3-NEXT: retq 627; 628; SSSE3-LABEL: shuffle_v2i64_30_copy: 629; SSSE3: # %bb.0: 630; SSSE3-NEXT: movdqa %xmm1, %xmm0 631; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 632; SSSE3-NEXT: retq 633; 634; SSE41-LABEL: shuffle_v2i64_30_copy: 635; SSE41: # %bb.0: 636; SSE41-NEXT: movdqa %xmm1, %xmm0 637; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 638; SSE41-NEXT: retq 639; 640; AVX-LABEL: shuffle_v2i64_30_copy: 641; AVX: # %bb.0: 642; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 643; AVX-NEXT: retq 644 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0> 645 ret <2 x i64> %shuffle 646} 647define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) { 648; SSE-LABEL: shuffle_v2i64_31: 649; SSE: # %bb.0: 650; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 651; SSE-NEXT: movaps %xmm1, %xmm0 652; SSE-NEXT: retq 653; 654; AVX-LABEL: shuffle_v2i64_31: 655; AVX: # %bb.0: 656; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] 657; AVX-NEXT: retq 658 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1> 659 ret <2 x i64> %shuffle 660} 661define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 662; SSE-LABEL: shuffle_v2i64_31_copy: 663; SSE: # %bb.0: 664; SSE-NEXT: movaps %xmm2, %xmm0 665; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 666; SSE-NEXT: retq 667; 668; AVX-LABEL: shuffle_v2i64_31_copy: 669; AVX: # %bb.0: 670; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm2[1],xmm1[1] 671; AVX-NEXT: retq 672 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1> 673 ret <2 x i64> %shuffle 674} 675 676define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) { 677; SSE-LABEL: shuffle_v2i64_0z: 678; SSE: # %bb.0: 679; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 680; SSE-NEXT: retq 681; 682; AVX-LABEL: shuffle_v2i64_0z: 683; AVX: # %bb.0: 684; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 685; AVX-NEXT: retq 686 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 687 ret <2 x i64> %shuffle 688} 689 690define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) { 691; SSE-LABEL: shuffle_v2i64_1z: 692; SSE: # %bb.0: 693; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 694; SSE-NEXT: retq 695; 696; AVX-LABEL: shuffle_v2i64_1z: 697; AVX: # %bb.0: 698; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 699; AVX-NEXT: retq 700 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3> 701 ret <2 x i64> %shuffle 702} 703 704define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) { 705; SSE-LABEL: shuffle_v2i64_z0: 706; SSE: # %bb.0: 707; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 708; SSE-NEXT: retq 709; 710; AVX-LABEL: shuffle_v2i64_z0: 711; AVX: # %bb.0: 712; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 713; AVX-NEXT: retq 714 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0> 715 ret <2 x i64> %shuffle 716} 717 718define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) { 719; SSE2-LABEL: shuffle_v2i64_z1: 720; SSE2: # %bb.0: 721; SSE2-NEXT: xorps %xmm1, %xmm1 722; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 723; SSE2-NEXT: movaps %xmm1, %xmm0 724; SSE2-NEXT: retq 725; 726; SSE3-LABEL: shuffle_v2i64_z1: 727; SSE3: # %bb.0: 728; SSE3-NEXT: xorps %xmm1, %xmm1 729; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 730; SSE3-NEXT: movaps %xmm1, %xmm0 731; SSE3-NEXT: retq 732; 733; SSSE3-LABEL: shuffle_v2i64_z1: 734; SSSE3: # %bb.0: 735; SSSE3-NEXT: xorps %xmm1, %xmm1 736; SSSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 737; SSSE3-NEXT: movaps %xmm1, %xmm0 738; SSSE3-NEXT: retq 739; 740; SSE41-LABEL: shuffle_v2i64_z1: 741; SSE41: # %bb.0: 742; SSE41-NEXT: xorps %xmm1, %xmm1 743; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 744; SSE41-NEXT: retq 745; 746; AVX-LABEL: shuffle_v2i64_z1: 747; AVX: # %bb.0: 748; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 749; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 750; AVX-NEXT: retq 751 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1> 752 ret <2 x i64> %shuffle 753} 754 755define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) { 756; SSE-LABEL: shuffle_v2f64_0z: 757; SSE: # %bb.0: 758; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 759; SSE-NEXT: retq 760; 761; AVX-LABEL: shuffle_v2f64_0z: 762; AVX: # %bb.0: 763; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 764; AVX-NEXT: retq 765 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 766 ret <2 x double> %shuffle 767} 768 769define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) { 770; SSE-LABEL: shuffle_v2f64_1z: 771; SSE: # %bb.0: 772; SSE-NEXT: xorps %xmm1, %xmm1 773; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 774; SSE-NEXT: retq 775; 776; AVX-LABEL: shuffle_v2f64_1z: 777; AVX: # %bb.0: 778; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 779; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 780; AVX-NEXT: retq 781 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3> 782 ret <2 x double> %shuffle 783} 784 785define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) { 786; SSE-LABEL: shuffle_v2f64_z0: 787; SSE: # %bb.0: 788; SSE-NEXT: xorps %xmm1, %xmm1 789; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 790; SSE-NEXT: movaps %xmm1, %xmm0 791; SSE-NEXT: retq 792; 793; AVX-LABEL: shuffle_v2f64_z0: 794; AVX: # %bb.0: 795; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 796; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 797; AVX-NEXT: retq 798 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0> 799 ret <2 x double> %shuffle 800} 801 802define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) { 803; SSE2-LABEL: shuffle_v2f64_z1: 804; SSE2: # %bb.0: 805; SSE2-NEXT: xorpd %xmm1, %xmm1 806; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 807; SSE2-NEXT: retq 808; 809; SSE3-LABEL: shuffle_v2f64_z1: 810; SSE3: # %bb.0: 811; SSE3-NEXT: xorpd %xmm1, %xmm1 812; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 813; SSE3-NEXT: retq 814; 815; SSSE3-LABEL: shuffle_v2f64_z1: 816; SSSE3: # %bb.0: 817; SSSE3-NEXT: xorpd %xmm1, %xmm1 818; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 819; SSSE3-NEXT: retq 820; 821; SSE41-LABEL: shuffle_v2f64_z1: 822; SSE41: # %bb.0: 823; SSE41-NEXT: xorps %xmm1, %xmm1 824; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 825; SSE41-NEXT: retq 826; 827; AVX-LABEL: shuffle_v2f64_z1: 828; AVX: # %bb.0: 829; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 830; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 831; AVX-NEXT: retq 832 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 833 ret <2 x double> %shuffle 834} 835 836define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) { 837; SSE-LABEL: shuffle_v2f64_bitcast_1z: 838; SSE: # %bb.0: 839; SSE-NEXT: xorps %xmm1, %xmm1 840; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 841; SSE-NEXT: retq 842; 843; AVX-LABEL: shuffle_v2f64_bitcast_1z: 844; AVX: # %bb.0: 845; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 846; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 847; AVX-NEXT: retq 848 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 849 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float> 850 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 851 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double> 852 ret <2 x double> %bitcast64 853} 854 855define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) { 856; SSE2-LABEL: shuffle_v2i64_bitcast_z123: 857; SSE2: # %bb.0: 858; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 859; SSE2-NEXT: retq 860; 861; SSE3-LABEL: shuffle_v2i64_bitcast_z123: 862; SSE3: # %bb.0: 863; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 864; SSE3-NEXT: retq 865; 866; SSSE3-LABEL: shuffle_v2i64_bitcast_z123: 867; SSSE3: # %bb.0: 868; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 869; SSSE3-NEXT: retq 870; 871; SSE41-LABEL: shuffle_v2i64_bitcast_z123: 872; SSE41: # %bb.0: 873; SSE41-NEXT: xorps %xmm1, %xmm1 874; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 875; SSE41-NEXT: retq 876; 877; AVX-LABEL: shuffle_v2i64_bitcast_z123: 878; AVX: # %bb.0: 879; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 880; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 881; AVX-NEXT: retq 882 %bitcast32 = bitcast <2 x i64> %x to <4 x float> 883 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 884 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64> 885 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1> 886 ret <2 x i64> %and 887} 888 889define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) { 890; SSE-LABEL: insert_reg_and_zero_v2i64: 891; SSE: # %bb.0: 892; SSE-NEXT: movq %rdi, %xmm0 893; SSE-NEXT: retq 894; 895; AVX-LABEL: insert_reg_and_zero_v2i64: 896; AVX: # %bb.0: 897; AVX-NEXT: vmovq %rdi, %xmm0 898; AVX-NEXT: retq 899 %v = insertelement <2 x i64> undef, i64 %a, i32 0 900 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 901 ret <2 x i64> %shuffle 902} 903 904define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) { 905; SSE-LABEL: insert_mem_and_zero_v2i64: 906; SSE: # %bb.0: 907; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 908; SSE-NEXT: retq 909; 910; AVX-LABEL: insert_mem_and_zero_v2i64: 911; AVX: # %bb.0: 912; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 913; AVX-NEXT: retq 914 %a = load i64, i64* %ptr 915 %v = insertelement <2 x i64> undef, i64 %a, i32 0 916 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 917 ret <2 x i64> %shuffle 918} 919 920define <2 x double> @insert_reg_and_zero_v2f64(double %a) { 921; SSE-LABEL: insert_reg_and_zero_v2f64: 922; SSE: # %bb.0: 923; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 924; SSE-NEXT: retq 925; 926; AVX-LABEL: insert_reg_and_zero_v2f64: 927; AVX: # %bb.0: 928; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 929; AVX-NEXT: retq 930 %v = insertelement <2 x double> undef, double %a, i32 0 931 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 932 ret <2 x double> %shuffle 933} 934 935define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) { 936; SSE-LABEL: insert_mem_and_zero_v2f64: 937; SSE: # %bb.0: 938; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 939; SSE-NEXT: retq 940; 941; AVX-LABEL: insert_mem_and_zero_v2f64: 942; AVX: # %bb.0: 943; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 944; AVX-NEXT: retq 945 %a = load double, double* %ptr 946 %v = insertelement <2 x double> undef, double %a, i32 0 947 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 948 ret <2 x double> %shuffle 949} 950 951define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) { 952; SSE2-LABEL: insert_reg_lo_v2i64: 953; SSE2: # %bb.0: 954; SSE2-NEXT: movq %rdi, %xmm1 955; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 956; SSE2-NEXT: retq 957; 958; SSE3-LABEL: insert_reg_lo_v2i64: 959; SSE3: # %bb.0: 960; SSE3-NEXT: movq %rdi, %xmm1 961; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 962; SSE3-NEXT: retq 963; 964; SSSE3-LABEL: insert_reg_lo_v2i64: 965; SSSE3: # %bb.0: 966; SSSE3-NEXT: movq %rdi, %xmm1 967; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 968; SSSE3-NEXT: retq 969; 970; SSE41-LABEL: insert_reg_lo_v2i64: 971; SSE41: # %bb.0: 972; SSE41-NEXT: pinsrq $0, %rdi, %xmm0 973; SSE41-NEXT: retq 974; 975; AVX-LABEL: insert_reg_lo_v2i64: 976; AVX: # %bb.0: 977; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0 978; AVX-NEXT: retq 979 %v = insertelement <2 x i64> undef, i64 %a, i32 0 980 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 981 ret <2 x i64> %shuffle 982} 983 984define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) { 985; SSE2-LABEL: insert_mem_lo_v2i64: 986; SSE2: # %bb.0: 987; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 988; SSE2-NEXT: retq 989; 990; SSE3-LABEL: insert_mem_lo_v2i64: 991; SSE3: # %bb.0: 992; SSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 993; SSE3-NEXT: retq 994; 995; SSSE3-LABEL: insert_mem_lo_v2i64: 996; SSSE3: # %bb.0: 997; SSSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 998; SSSE3-NEXT: retq 999; 1000; SSE41-LABEL: insert_mem_lo_v2i64: 1001; SSE41: # %bb.0: 1002; SSE41-NEXT: pinsrq $0, (%rdi), %xmm0 1003; SSE41-NEXT: retq 1004; 1005; AVX-LABEL: insert_mem_lo_v2i64: 1006; AVX: # %bb.0: 1007; AVX-NEXT: vpinsrq $0, (%rdi), %xmm0, %xmm0 1008; AVX-NEXT: retq 1009 %a = load i64, i64* %ptr 1010 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1011 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 1012 ret <2 x i64> %shuffle 1013} 1014 1015define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) { 1016; SSE2-LABEL: insert_reg_hi_v2i64: 1017; SSE2: # %bb.0: 1018; SSE2-NEXT: movq %rdi, %xmm1 1019; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1020; SSE2-NEXT: retq 1021; 1022; SSE3-LABEL: insert_reg_hi_v2i64: 1023; SSE3: # %bb.0: 1024; SSE3-NEXT: movq %rdi, %xmm1 1025; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1026; SSE3-NEXT: retq 1027; 1028; SSSE3-LABEL: insert_reg_hi_v2i64: 1029; SSSE3: # %bb.0: 1030; SSSE3-NEXT: movq %rdi, %xmm1 1031; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1032; SSSE3-NEXT: retq 1033; 1034; SSE41-LABEL: insert_reg_hi_v2i64: 1035; SSE41: # %bb.0: 1036; SSE41-NEXT: pinsrq $1, %rdi, %xmm0 1037; SSE41-NEXT: retq 1038; 1039; AVX-LABEL: insert_reg_hi_v2i64: 1040; AVX: # %bb.0: 1041; AVX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 1042; AVX-NEXT: retq 1043 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1044 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 1045 ret <2 x i64> %shuffle 1046} 1047 1048define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) { 1049; SSE2-LABEL: insert_mem_hi_v2i64: 1050; SSE2: # %bb.0: 1051; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1052; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1053; SSE2-NEXT: retq 1054; 1055; SSE3-LABEL: insert_mem_hi_v2i64: 1056; SSE3: # %bb.0: 1057; SSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1058; SSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1059; SSE3-NEXT: retq 1060; 1061; SSSE3-LABEL: insert_mem_hi_v2i64: 1062; SSSE3: # %bb.0: 1063; SSSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1064; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1065; SSSE3-NEXT: retq 1066; 1067; SSE41-LABEL: insert_mem_hi_v2i64: 1068; SSE41: # %bb.0: 1069; SSE41-NEXT: pinsrq $1, (%rdi), %xmm0 1070; SSE41-NEXT: retq 1071; 1072; AVX-LABEL: insert_mem_hi_v2i64: 1073; AVX: # %bb.0: 1074; AVX-NEXT: vpinsrq $1, (%rdi), %xmm0, %xmm0 1075; AVX-NEXT: retq 1076 %a = load i64, i64* %ptr 1077 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1078 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 1079 ret <2 x i64> %shuffle 1080} 1081 1082define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) { 1083; SSE2-LABEL: insert_reg_lo_v2f64: 1084; SSE2: # %bb.0: 1085; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1086; SSE2-NEXT: retq 1087; 1088; SSE3-LABEL: insert_reg_lo_v2f64: 1089; SSE3: # %bb.0: 1090; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1091; SSE3-NEXT: retq 1092; 1093; SSSE3-LABEL: insert_reg_lo_v2f64: 1094; SSSE3: # %bb.0: 1095; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1096; SSSE3-NEXT: retq 1097; 1098; SSE41-LABEL: insert_reg_lo_v2f64: 1099; SSE41: # %bb.0: 1100; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1101; SSE41-NEXT: retq 1102; 1103; AVX-LABEL: insert_reg_lo_v2f64: 1104; AVX: # %bb.0: 1105; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1106; AVX-NEXT: retq 1107 %v = insertelement <2 x double> undef, double %a, i32 0 1108 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 1109 ret <2 x double> %shuffle 1110} 1111 1112define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) { 1113; SSE-LABEL: insert_mem_lo_v2f64: 1114; SSE: # %bb.0: 1115; SSE-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1116; SSE-NEXT: retq 1117; 1118; AVX-LABEL: insert_mem_lo_v2f64: 1119; AVX: # %bb.0: 1120; AVX-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1121; AVX-NEXT: retq 1122 %a = load double, double* %ptr 1123 %v = insertelement <2 x double> undef, double %a, i32 0 1124 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 1125 ret <2 x double> %shuffle 1126} 1127 1128define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) { 1129; SSE-LABEL: insert_reg_hi_v2f64: 1130; SSE: # %bb.0: 1131; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1132; SSE-NEXT: movaps %xmm1, %xmm0 1133; SSE-NEXT: retq 1134; 1135; AVX-LABEL: insert_reg_hi_v2f64: 1136; AVX: # %bb.0: 1137; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1138; AVX-NEXT: retq 1139 %v = insertelement <2 x double> undef, double %a, i32 0 1140 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 1141 ret <2 x double> %shuffle 1142} 1143 1144define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) { 1145; SSE-LABEL: insert_mem_hi_v2f64: 1146; SSE: # %bb.0: 1147; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 1148; SSE-NEXT: retq 1149; 1150; AVX-LABEL: insert_mem_hi_v2f64: 1151; AVX: # %bb.0: 1152; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 1153; AVX-NEXT: retq 1154 %a = load double, double* %ptr 1155 %v = insertelement <2 x double> undef, double %a, i32 0 1156 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 1157 ret <2 x double> %shuffle 1158} 1159 1160define <2 x double> @insert_dup_reg_v2f64(double %a) { 1161; SSE2-LABEL: insert_dup_reg_v2f64: 1162; SSE2: # %bb.0: 1163; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1164; SSE2-NEXT: retq 1165; 1166; SSE3-LABEL: insert_dup_reg_v2f64: 1167; SSE3: # %bb.0: 1168; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1169; SSE3-NEXT: retq 1170; 1171; SSSE3-LABEL: insert_dup_reg_v2f64: 1172; SSSE3: # %bb.0: 1173; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1174; SSSE3-NEXT: retq 1175; 1176; SSE41-LABEL: insert_dup_reg_v2f64: 1177; SSE41: # %bb.0: 1178; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1179; SSE41-NEXT: retq 1180; 1181; AVX-LABEL: insert_dup_reg_v2f64: 1182; AVX: # %bb.0: 1183; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1184; AVX-NEXT: retq 1185 %v = insertelement <2 x double> undef, double %a, i32 0 1186 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1187 ret <2 x double> %shuffle 1188} 1189 1190define <2 x double> @insert_dup_mem_v2f64(double* %ptr) { 1191; SSE2-LABEL: insert_dup_mem_v2f64: 1192; SSE2: # %bb.0: 1193; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1194; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1195; SSE2-NEXT: retq 1196; 1197; SSE3-LABEL: insert_dup_mem_v2f64: 1198; SSE3: # %bb.0: 1199; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1200; SSE3-NEXT: retq 1201; 1202; SSSE3-LABEL: insert_dup_mem_v2f64: 1203; SSSE3: # %bb.0: 1204; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1205; SSSE3-NEXT: retq 1206; 1207; SSE41-LABEL: insert_dup_mem_v2f64: 1208; SSE41: # %bb.0: 1209; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1210; SSE41-NEXT: retq 1211; 1212; AVX-LABEL: insert_dup_mem_v2f64: 1213; AVX: # %bb.0: 1214; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1215; AVX-NEXT: retq 1216 %a = load double, double* %ptr 1217 %v = insertelement <2 x double> undef, double %a, i32 0 1218 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1219 ret <2 x double> %shuffle 1220} 1221 1222define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind { 1223; SSE2-LABEL: insert_dup_mem128_v2f64: 1224; SSE2: # %bb.0: 1225; SSE2-NEXT: movaps (%rdi), %xmm0 1226; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1227; SSE2-NEXT: retq 1228; 1229; SSE3-LABEL: insert_dup_mem128_v2f64: 1230; SSE3: # %bb.0: 1231; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1232; SSE3-NEXT: retq 1233; 1234; SSSE3-LABEL: insert_dup_mem128_v2f64: 1235; SSSE3: # %bb.0: 1236; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1237; SSSE3-NEXT: retq 1238; 1239; SSE41-LABEL: insert_dup_mem128_v2f64: 1240; SSE41: # %bb.0: 1241; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1242; SSE41-NEXT: retq 1243; 1244; AVX-LABEL: insert_dup_mem128_v2f64: 1245; AVX: # %bb.0: 1246; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1247; AVX-NEXT: retq 1248 %v = load <2 x double>, <2 x double>* %ptr 1249 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1250 ret <2 x double> %shuffle 1251} 1252 1253 1254define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) { 1255; SSE-LABEL: insert_dup_mem_v2i64: 1256; SSE: # %bb.0: 1257; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1258; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1259; SSE-NEXT: retq 1260; 1261; AVX-LABEL: insert_dup_mem_v2i64: 1262; AVX: # %bb.0: 1263; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1264; AVX-NEXT: retq 1265 %tmp = load i64, i64* %ptr, align 1 1266 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0 1267 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer 1268 ret <2 x i64> %tmp2 1269} 1270 1271define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) { 1272; SSE-LABEL: shuffle_mem_v2f64_10: 1273; SSE: # %bb.0: 1274; SSE-NEXT: movaps (%rdi), %xmm0 1275; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1] 1276; SSE-NEXT: retq 1277; 1278; AVX-LABEL: shuffle_mem_v2f64_10: 1279; AVX: # %bb.0: 1280; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0] 1281; AVX-NEXT: retq 1282 1283 %a = load <2 x double>, <2 x double>* %ptr 1284 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0> 1285 ret <2 x double> %shuffle 1286} 1287 1288define <2 x double> @shuffle_mem_v2f64_31(<2 x double> %a, <2 x double>* %b) { 1289; SSE-LABEL: shuffle_mem_v2f64_31: 1290; SSE: # %bb.0: 1291; SSE-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1292; SSE-NEXT: retq 1293; 1294; AVX-LABEL: shuffle_mem_v2f64_31: 1295; AVX: # %bb.0: 1296; AVX-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1297; AVX-NEXT: retq 1298 %c = load <2 x double>, <2 x double>* %b 1299 %f = shufflevector <2 x double> %a, <2 x double> %c, <2 x i32> <i32 3, i32 1> 1300 ret <2 x double> %f 1301} 1302 1303define <2 x double> @shuffle_mem_v2f64_02(<2 x double> %a, <2 x double>* %pb) { 1304; SSE-LABEL: shuffle_mem_v2f64_02: 1305; SSE: # %bb.0: 1306; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 1307; SSE-NEXT: retq 1308; 1309; AVX-LABEL: shuffle_mem_v2f64_02: 1310; AVX: # %bb.0: 1311; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] 1312; AVX-NEXT: retq 1313 %b = load <2 x double>, <2 x double>* %pb, align 1 1314 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2> 1315 ret <2 x double> %shuffle 1316} 1317 1318define <2 x double> @shuffle_mem_v2f64_21(<2 x double> %a, <2 x double>* %pb) { 1319; SSE2-LABEL: shuffle_mem_v2f64_21: 1320; SSE2: # %bb.0: 1321; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1322; SSE2-NEXT: retq 1323; 1324; SSE3-LABEL: shuffle_mem_v2f64_21: 1325; SSE3: # %bb.0: 1326; SSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1327; SSE3-NEXT: retq 1328; 1329; SSSE3-LABEL: shuffle_mem_v2f64_21: 1330; SSSE3: # %bb.0: 1331; SSSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1332; SSSE3-NEXT: retq 1333; 1334; SSE41-LABEL: shuffle_mem_v2f64_21: 1335; SSE41: # %bb.0: 1336; SSE41-NEXT: movups (%rdi), %xmm1 1337; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1338; SSE41-NEXT: retq 1339; 1340; AVX-LABEL: shuffle_mem_v2f64_21: 1341; AVX: # %bb.0: 1342; AVX-NEXT: vblendps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1343; AVX-NEXT: retq 1344 %b = load <2 x double>, <2 x double>* %pb, align 1 1345 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1> 1346 ret <2 x double> %shuffle 1347} 1348