1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2 --check-prefix=AVX2-SLOW 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2 --check-prefix=AVX2-FAST 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST 7 8define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) { 9; AVX1-LABEL: shuffle_v4f64_0000: 10; AVX1: # %bb.0: 11; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 12; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 13; AVX1-NEXT: retq 14; 15; AVX2-LABEL: shuffle_v4f64_0000: 16; AVX2: # %bb.0: 17; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 18; AVX2-NEXT: retq 19; 20; AVX512VL-LABEL: shuffle_v4f64_0000: 21; AVX512VL: # %bb.0: 22; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0 23; AVX512VL-NEXT: retq 24 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 25 ret <4 x double> %shuffle 26} 27 28define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) { 29; AVX1-LABEL: shuffle_v4f64_0001: 30; AVX1: # %bb.0: 31; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] 32; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 33; AVX1-NEXT: retq 34; 35; AVX2-LABEL: shuffle_v4f64_0001: 36; AVX2: # %bb.0: 37; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 38; AVX2-NEXT: retq 39; 40; AVX512VL-LABEL: shuffle_v4f64_0001: 41; AVX512VL: # %bb.0: 42; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 43; AVX512VL-NEXT: retq 44 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 45 ret <4 x double> %shuffle 46} 47 48define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) { 49; AVX1-LABEL: shuffle_v4f64_0020: 50; AVX1: # %bb.0: 51; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 52; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 53; AVX1-NEXT: retq 54; 55; AVX2-LABEL: shuffle_v4f64_0020: 56; AVX2: # %bb.0: 57; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0] 58; AVX2-NEXT: retq 59; 60; AVX512VL-LABEL: shuffle_v4f64_0020: 61; AVX512VL: # %bb.0: 62; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0] 63; AVX512VL-NEXT: retq 64 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 65 ret <4 x double> %shuffle 66} 67 68define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) { 69; AVX1-LABEL: shuffle_v4f64_0300: 70; AVX1: # %bb.0: 71; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 72; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 73; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[2] 74; AVX1-NEXT: retq 75; 76; AVX2-LABEL: shuffle_v4f64_0300: 77; AVX2: # %bb.0: 78; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0] 79; AVX2-NEXT: retq 80; 81; AVX512VL-LABEL: shuffle_v4f64_0300: 82; AVX512VL: # %bb.0: 83; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0] 84; AVX512VL-NEXT: retq 85 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 86 ret <4 x double> %shuffle 87} 88 89define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) { 90; AVX1-LABEL: shuffle_v4f64_1000: 91; AVX1: # %bb.0: 92; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 93; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] 94; AVX1-NEXT: retq 95; 96; AVX2-LABEL: shuffle_v4f64_1000: 97; AVX2: # %bb.0: 98; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0] 99; AVX2-NEXT: retq 100; 101; AVX512VL-LABEL: shuffle_v4f64_1000: 102; AVX512VL: # %bb.0: 103; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0] 104; AVX512VL-NEXT: retq 105 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 106 ret <4 x double> %shuffle 107} 108 109define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) { 110; AVX1-LABEL: shuffle_v4f64_2200: 111; AVX1: # %bb.0: 112; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 113; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 114; AVX1-NEXT: retq 115; 116; AVX2-LABEL: shuffle_v4f64_2200: 117; AVX2: # %bb.0: 118; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] 119; AVX2-NEXT: retq 120; 121; AVX512VL-LABEL: shuffle_v4f64_2200: 122; AVX512VL: # %bb.0: 123; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] 124; AVX512VL-NEXT: retq 125 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 126 ret <4 x double> %shuffle 127} 128 129define <4 x double> @shuffle_v4f64_2222(<4 x double> %a, <4 x double> %b) { 130; AVX1-LABEL: shuffle_v4f64_2222: 131; AVX1: # %bb.0: 132; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 133; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 134; AVX1-NEXT: retq 135; 136; AVX2-LABEL: shuffle_v4f64_2222: 137; AVX2: # %bb.0: 138; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 139; AVX2-NEXT: retq 140; 141; AVX512VL-LABEL: shuffle_v4f64_2222: 142; AVX512VL: # %bb.0: 143; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 144; AVX512VL-NEXT: retq 145 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 146 ret <4 x double> %shuffle 147} 148 149define <4 x double> @shuffle_v4f64_2222_bc(<4 x i64> %a, <4 x i64> %b) { 150; AVX1-LABEL: shuffle_v4f64_2222_bc: 151; AVX1: # %bb.0: 152; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 153; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 154; AVX1-NEXT: retq 155; 156; AVX2-LABEL: shuffle_v4f64_2222_bc: 157; AVX2: # %bb.0: 158; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 159; AVX2-NEXT: retq 160; 161; AVX512VL-LABEL: shuffle_v4f64_2222_bc: 162; AVX512VL: # %bb.0: 163; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 164; AVX512VL-NEXT: retq 165 %tmp0 = bitcast <4 x i64> %a to <4 x double> 166 %tmp1 = bitcast <4 x i64> %b to <4 x double> 167 %shuffle = shufflevector <4 x double> %tmp0, <4 x double> %tmp1, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 168 ret <4 x double> %shuffle 169} 170 171define <4 x double> @shuffle_v4f64_2233(<4 x double> %a, <4 x double> %b) { 172; AVX1-LABEL: shuffle_v4f64_2233: 173; AVX1: # %bb.0: 174; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 175; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,3] 176; AVX1-NEXT: retq 177; 178; AVX2-LABEL: shuffle_v4f64_2233: 179; AVX2: # %bb.0: 180; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,3,3] 181; AVX2-NEXT: retq 182; 183; AVX512VL-LABEL: shuffle_v4f64_2233: 184; AVX512VL: # %bb.0: 185; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,3,3] 186; AVX512VL-NEXT: retq 187 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 188 ret <4 x double> %shuffle 189} 190 191define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) { 192; AVX1-LABEL: shuffle_v4f64_3330: 193; AVX1: # %bb.0: 194; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 195; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 196; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[2] 197; AVX1-NEXT: retq 198; 199; AVX2-LABEL: shuffle_v4f64_3330: 200; AVX2: # %bb.0: 201; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0] 202; AVX2-NEXT: retq 203; 204; AVX512VL-LABEL: shuffle_v4f64_3330: 205; AVX512VL: # %bb.0: 206; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0] 207; AVX512VL-NEXT: retq 208 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 209 ret <4 x double> %shuffle 210} 211 212define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) { 213; AVX1-LABEL: shuffle_v4f64_3210: 214; AVX1: # %bb.0: 215; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 216; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 217; AVX1-NEXT: retq 218; 219; AVX2-LABEL: shuffle_v4f64_3210: 220; AVX2: # %bb.0: 221; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 222; AVX2-NEXT: retq 223; 224; AVX512VL-LABEL: shuffle_v4f64_3210: 225; AVX512VL: # %bb.0: 226; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 227; AVX512VL-NEXT: retq 228 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 229 ret <4 x double> %shuffle 230} 231 232define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) { 233; ALL-LABEL: shuffle_v4f64_0023: 234; ALL: # %bb.0: 235; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3] 236; ALL-NEXT: retq 237 238 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3> 239 ret <4 x double> %shuffle 240} 241 242define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) { 243; ALL-LABEL: shuffle_v4f64_0022: 244; ALL: # %bb.0: 245; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 246; ALL-NEXT: retq 247 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 248 ret <4 x double> %shuffle 249} 250 251define <4 x double> @shuffle_v4f64mem_0022(<4 x double>* %ptr, <4 x double> %b) { 252; ALL-LABEL: shuffle_v4f64mem_0022: 253; ALL: # %bb.0: 254; ALL-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2] 255; ALL-NEXT: retq 256 %a = load <4 x double>, <4 x double>* %ptr 257 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 258 ret <4 x double> %shuffle 259} 260 261define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) { 262; ALL-LABEL: shuffle_v4f64_1032: 263; ALL: # %bb.0: 264; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 265; ALL-NEXT: retq 266 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 267 ret <4 x double> %shuffle 268} 269 270define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) { 271; ALL-LABEL: shuffle_v4f64_1133: 272; ALL: # %bb.0: 273; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] 274; ALL-NEXT: retq 275 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 276 ret <4 x double> %shuffle 277} 278 279define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) { 280; ALL-LABEL: shuffle_v4f64_1023: 281; ALL: # %bb.0: 282; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] 283; ALL-NEXT: retq 284 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3> 285 ret <4 x double> %shuffle 286} 287 288define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) { 289; ALL-LABEL: shuffle_v4f64_1022: 290; ALL: # %bb.0: 291; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] 292; ALL-NEXT: retq 293 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2> 294 ret <4 x double> %shuffle 295} 296 297define <4 x double> @shuffle_v4f64_0213(<4 x double> %a, <4 x double> %b) { 298; AVX1-LABEL: shuffle_v4f64_0213: 299; AVX1: # %bb.0: 300; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,2,3] 301; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 302; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3] 303; AVX1-NEXT: retq 304; 305; AVX2-LABEL: shuffle_v4f64_0213: 306; AVX2: # %bb.0: 307; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 308; AVX2-NEXT: retq 309; 310; AVX512VL-LABEL: shuffle_v4f64_0213: 311; AVX512VL: # %bb.0: 312; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 313; AVX512VL-NEXT: retq 314 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 315 ret <4 x double> %shuffle 316} 317 318define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) { 319; AVX1OR2-LABEL: shuffle_v4f64_0423: 320; AVX1OR2: # %bb.0: 321; AVX1OR2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 322; AVX1OR2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 323; AVX1OR2-NEXT: retq 324; 325; AVX512VL-SLOW-LABEL: shuffle_v4f64_0423: 326; AVX512VL-SLOW: # %bb.0: 327; AVX512VL-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 328; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 329; AVX512VL-SLOW-NEXT: retq 330; 331; AVX512VL-FAST-LABEL: shuffle_v4f64_0423: 332; AVX512VL-FAST: # %bb.0: 333; AVX512VL-FAST-NEXT: vmovapd {{.*#+}} ymm2 = [0,4,2,3] 334; AVX512VL-FAST-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 335; AVX512VL-FAST-NEXT: retq 336 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3> 337 ret <4 x double> %shuffle 338} 339 340define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) { 341; AVX1OR2-LABEL: shuffle_v4f64_0462: 342; AVX1OR2: # %bb.0: 343; AVX1OR2-NEXT: vblendps {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm0[4,5,6,7] 344; AVX1OR2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 345; AVX1OR2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[2],ymm2[2] 346; AVX1OR2-NEXT: retq 347; 348; AVX512VL-LABEL: shuffle_v4f64_0462: 349; AVX512VL: # %bb.0: 350; AVX512VL-NEXT: vmovapd {{.*#+}} ymm2 = [0,4,6,2] 351; AVX512VL-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 352; AVX512VL-NEXT: retq 353 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2> 354 ret <4 x double> %shuffle 355} 356 357define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) { 358; ALL-LABEL: shuffle_v4f64_0426: 359; ALL: # %bb.0: 360; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 361; ALL-NEXT: retq 362 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 363 ret <4 x double> %shuffle 364} 365 366define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) { 367; ALL-LABEL: shuffle_v4f64_1537: 368; ALL: # %bb.0: 369; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 370; ALL-NEXT: retq 371 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 372 ret <4 x double> %shuffle 373} 374 375define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) { 376; ALL-LABEL: shuffle_v4f64_4062: 377; ALL: # %bb.0: 378; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 379; ALL-NEXT: retq 380 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 381 ret <4 x double> %shuffle 382} 383 384define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) { 385; ALL-LABEL: shuffle_v4f64_5173: 386; ALL: # %bb.0: 387; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] 388; ALL-NEXT: retq 389 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3> 390 ret <4 x double> %shuffle 391} 392 393define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) { 394; ALL-LABEL: shuffle_v4f64_5163: 395; ALL: # %bb.0: 396; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3] 397; ALL-NEXT: retq 398 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3> 399 ret <4 x double> %shuffle 400} 401 402define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) { 403; ALL-LABEL: shuffle_v4f64_0527: 404; ALL: # %bb.0: 405; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 406; ALL-NEXT: retq 407 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 408 ret <4 x double> %shuffle 409} 410 411define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) { 412; ALL-LABEL: shuffle_v4f64_4163: 413; ALL: # %bb.0: 414; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 415; ALL-NEXT: retq 416 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 417 ret <4 x double> %shuffle 418} 419 420define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) { 421; ALL-LABEL: shuffle_v4f64_0145: 422; ALL: # %bb.0: 423; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 424; ALL-NEXT: retq 425 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 426 ret <4 x double> %shuffle 427} 428 429define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) { 430; ALL-LABEL: shuffle_v4f64_4501: 431; ALL: # %bb.0: 432; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 433; ALL-NEXT: retq 434 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 435 ret <4 x double> %shuffle 436} 437 438define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) { 439; ALL-LABEL: shuffle_v4f64_0167: 440; ALL: # %bb.0: 441; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 442; ALL-NEXT: retq 443 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 444 ret <4 x double> %shuffle 445} 446 447define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) { 448; AVX1OR2-LABEL: shuffle_v4f64_1054: 449; AVX1OR2: # %bb.0: 450; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 451; AVX1OR2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 452; AVX1OR2-NEXT: retq 453; 454; AVX512VL-SLOW-LABEL: shuffle_v4f64_1054: 455; AVX512VL-SLOW: # %bb.0: 456; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 457; AVX512VL-SLOW-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 458; AVX512VL-SLOW-NEXT: retq 459; 460; AVX512VL-FAST-LABEL: shuffle_v4f64_1054: 461; AVX512VL-FAST: # %bb.0: 462; AVX512VL-FAST-NEXT: vmovapd {{.*#+}} ymm2 = [1,0,5,4] 463; AVX512VL-FAST-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 464; AVX512VL-FAST-NEXT: retq 465 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4> 466 ret <4 x double> %shuffle 467} 468 469define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) { 470; AVX1OR2-LABEL: shuffle_v4f64_3254: 471; AVX1OR2: # %bb.0: 472; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 473; AVX1OR2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 474; AVX1OR2-NEXT: retq 475; 476; AVX512VL-SLOW-LABEL: shuffle_v4f64_3254: 477; AVX512VL-SLOW: # %bb.0: 478; AVX512VL-SLOW-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 479; AVX512VL-SLOW-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 480; AVX512VL-SLOW-NEXT: retq 481; 482; AVX512VL-FAST-LABEL: shuffle_v4f64_3254: 483; AVX512VL-FAST: # %bb.0: 484; AVX512VL-FAST-NEXT: vmovapd {{.*#+}} ymm2 = [3,2,5,4] 485; AVX512VL-FAST-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 486; AVX512VL-FAST-NEXT: retq 487 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4> 488 ret <4 x double> %shuffle 489} 490 491define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) { 492; AVX1OR2-LABEL: shuffle_v4f64_3276: 493; AVX1OR2: # %bb.0: 494; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 495; AVX1OR2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 496; AVX1OR2-NEXT: retq 497; 498; AVX512VL-SLOW-LABEL: shuffle_v4f64_3276: 499; AVX512VL-SLOW: # %bb.0: 500; AVX512VL-SLOW-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 501; AVX512VL-SLOW-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 502; AVX512VL-SLOW-NEXT: retq 503; 504; AVX512VL-FAST-LABEL: shuffle_v4f64_3276: 505; AVX512VL-FAST: # %bb.0: 506; AVX512VL-FAST-NEXT: vmovapd {{.*#+}} ymm2 = [3,2,7,6] 507; AVX512VL-FAST-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 508; AVX512VL-FAST-NEXT: retq 509 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6> 510 ret <4 x double> %shuffle 511} 512 513define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) { 514; AVX1OR2-LABEL: shuffle_v4f64_1076: 515; AVX1OR2: # %bb.0: 516; AVX1OR2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 517; AVX1OR2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 518; AVX1OR2-NEXT: retq 519; 520; AVX512VL-SLOW-LABEL: shuffle_v4f64_1076: 521; AVX512VL-SLOW: # %bb.0: 522; AVX512VL-SLOW-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 523; AVX512VL-SLOW-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 524; AVX512VL-SLOW-NEXT: retq 525; 526; AVX512VL-FAST-LABEL: shuffle_v4f64_1076: 527; AVX512VL-FAST: # %bb.0: 528; AVX512VL-FAST-NEXT: vmovapd {{.*#+}} ymm2 = [1,0,7,6] 529; AVX512VL-FAST-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 530; AVX512VL-FAST-NEXT: retq 531 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6> 532 ret <4 x double> %shuffle 533} 534 535define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) { 536; AVX1-LABEL: shuffle_v4f64_0415: 537; AVX1: # %bb.0: 538; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 539; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 540; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 541; AVX1-NEXT: retq 542; 543; AVX2-LABEL: shuffle_v4f64_0415: 544; AVX2: # %bb.0: 545; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1] 546; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 547; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 548; AVX2-NEXT: retq 549; 550; AVX512VL-LABEL: shuffle_v4f64_0415: 551; AVX512VL: # %bb.0: 552; AVX512VL-NEXT: vmovapd {{.*#+}} ymm2 = [0,4,1,5] 553; AVX512VL-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 554; AVX512VL-NEXT: retq 555 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 556 ret <4 x double> %shuffle 557} 558 559define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) { 560; ALL-LABEL: shuffle_v4f64_u062: 561; ALL: # %bb.0: 562; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 563; ALL-NEXT: retq 564 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 undef, i32 0, i32 6, i32 2> 565 ret <4 x double> %shuffle 566} 567 568define <4 x double> @shuffle_v4f64_15uu(<4 x double> %a, <4 x double> %b) { 569; ALL-LABEL: shuffle_v4f64_15uu: 570; ALL: # %bb.0: 571; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 572; ALL-NEXT: retq 573 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef> 574 ret <4 x double> %shuffle 575} 576 577define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) { 578; ALL-LABEL: shuffle_v4f64_11uu: 579; ALL: # %bb.0: 580; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1] 581; ALL-NEXT: retq 582 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef> 583 ret <4 x double> %shuffle 584} 585 586define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) { 587; AVX1-LABEL: shuffle_v4f64_22uu: 588; AVX1: # %bb.0: 589; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 590; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 591; AVX1-NEXT: retq 592; 593; AVX2-LABEL: shuffle_v4f64_22uu: 594; AVX2: # %bb.0: 595; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 596; AVX2-NEXT: retq 597; 598; AVX512VL-LABEL: shuffle_v4f64_22uu: 599; AVX512VL: # %bb.0: 600; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 601; AVX512VL-NEXT: retq 602 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef> 603 ret <4 x double> %shuffle 604} 605 606define <4 x double> @shuffle_v4f64_3333(<4 x double> %a, <4 x double> %b) { 607; AVX1-LABEL: shuffle_v4f64_3333: 608; AVX1: # %bb.0: 609; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] 610; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 611; AVX1-NEXT: retq 612; 613; AVX2-LABEL: shuffle_v4f64_3333: 614; AVX2: # %bb.0: 615; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3] 616; AVX2-NEXT: retq 617; 618; AVX512VL-LABEL: shuffle_v4f64_3333: 619; AVX512VL: # %bb.0: 620; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3] 621; AVX512VL-NEXT: retq 622 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 623 ret <4 x double> %shuffle 624} 625 626define <4 x double> @shuffle_v4f64_0456(<4 x double> %a, <4 x double> %b) { 627; AVX1OR2-LABEL: shuffle_v4f64_0456: 628; AVX1OR2: # %bb.0: 629; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 630; AVX1OR2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2] 631; AVX1OR2-NEXT: retq 632; 633; AVX512VL-SLOW-LABEL: shuffle_v4f64_0456: 634; AVX512VL-SLOW: # %bb.0: 635; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 636; AVX512VL-SLOW-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2] 637; AVX512VL-SLOW-NEXT: retq 638; 639; AVX512VL-FAST-LABEL: shuffle_v4f64_0456: 640; AVX512VL-FAST: # %bb.0: 641; AVX512VL-FAST-NEXT: vmovapd {{.*#+}} ymm2 = [4,0,1,2] 642; AVX512VL-FAST-NEXT: vpermi2pd %ymm0, %ymm1, %ymm2 643; AVX512VL-FAST-NEXT: vmovapd %ymm2, %ymm0 644; AVX512VL-FAST-NEXT: retq 645 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 6> 646 ret <4 x double> %shuffle 647} 648 649define <4 x double> @shuffle_v4f64_0z3z(<4 x double> %a, <4 x double> %b) { 650; ALL-LABEL: shuffle_v4f64_0z3z: 651; ALL: # %bb.0: 652; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 653; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2] 654; ALL-NEXT: retq 655 %shuffle = shufflevector <4 x double> %a, <4 x double> <double 0.000000e+00, double undef, double undef, double undef>, <4 x i32> <i32 0, i32 4, i32 3, i32 4> 656 ret <4 x double> %shuffle 657} 658 659define <4 x double> @shuffle_v4f64_1z2z(<4 x double> %a, <4 x double> %b) { 660; ALL-LABEL: shuffle_v4f64_1z2z: 661; ALL: # %bb.0: 662; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 663; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[2] 664; ALL-NEXT: retq 665 %1 = shufflevector <4 x double> %a, <4 x double> <double 0.000000e+00, double undef, double undef, double undef>, <4 x i32> <i32 1, i32 4, i32 2, i32 4> 666 ret <4 x double> %1 667} 668 669define <4 x double> @shuffle_v4f64_0044(<4 x double> %a, <4 x double> %b) { 670; AVX1-LABEL: shuffle_v4f64_0044: 671; AVX1: # %bb.0: 672; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 673; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 674; AVX1-NEXT: retq 675; 676; AVX2-LABEL: shuffle_v4f64_0044: 677; AVX2: # %bb.0: 678; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 679; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1] 680; AVX2-NEXT: retq 681; 682; AVX512VL-SLOW-LABEL: shuffle_v4f64_0044: 683; AVX512VL-SLOW: # %bb.0: 684; AVX512VL-SLOW-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 685; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1] 686; AVX512VL-SLOW-NEXT: retq 687; 688; AVX512VL-FAST-LABEL: shuffle_v4f64_0044: 689; AVX512VL-FAST: # %bb.0: 690; AVX512VL-FAST-NEXT: vmovapd {{.*#+}} ymm2 = [0,0,4,4] 691; AVX512VL-FAST-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 692; AVX512VL-FAST-NEXT: retq 693 %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 4> 694 ret <4 x double> %1 695} 696 697define <4 x double> @shuffle_v4f64_0044_v2f64(<2 x double> %a, <2 x double> %b) { 698; ALL-LABEL: shuffle_v4f64_0044_v2f64: 699; ALL: # %bb.0: 700; ALL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 701; ALL-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 702; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 703; ALL-NEXT: retq 704 %1 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 0, i32 0> 705 %2 = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> <i32 0, i32 0> 706 %3 = shufflevector <2 x double> %1, <2 x double> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 707 ret <4 x double> %3 708} 709 710define <4 x double> @shuffle_v4f64_1032_v2f64(<2 x double> %a, <2 x double> %b) { 711; ALL-LABEL: shuffle_v4f64_1032_v2f64: 712; ALL: # %bb.0: 713; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 714; ALL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 715; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 716; ALL-NEXT: retq 717 %1 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0> 718 %2 = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> <i32 1, i32 0> 719 %3 = shufflevector <2 x double> %1, <2 x double> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 720 ret <4 x double> %3 721} 722 723;PR34359 724define <4 x double> @shuffle_v4f64_2345_0567_select(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3) { 725; ALL-LABEL: shuffle_v4f64_2345_0567_select: 726; ALL: # %bb.0: 727; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 728; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3,4,5,6,7] 729; ALL-NEXT: retq 730 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 731 %res = select <4 x i1> <i1 0, i1 1, i1 1, i1 1>, <4 x double> %shuf, <4 x double> %vec3 732 ret <4 x double> %res 733} 734 735define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) { 736; AVX1-LABEL: shuffle_v4i64_0000: 737; AVX1: # %bb.0: 738; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] 739; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 740; AVX1-NEXT: retq 741; 742; AVX2-LABEL: shuffle_v4i64_0000: 743; AVX2: # %bb.0: 744; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 745; AVX2-NEXT: retq 746; 747; AVX512VL-LABEL: shuffle_v4i64_0000: 748; AVX512VL: # %bb.0: 749; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0 750; AVX512VL-NEXT: retq 751 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 752 ret <4 x i64> %shuffle 753} 754 755define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) { 756; AVX1-LABEL: shuffle_v4i64_0001: 757; AVX1: # %bb.0: 758; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,1] 759; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 760; AVX1-NEXT: retq 761; 762; AVX2-LABEL: shuffle_v4i64_0001: 763; AVX2: # %bb.0: 764; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 765; AVX2-NEXT: retq 766; 767; AVX512VL-LABEL: shuffle_v4i64_0001: 768; AVX512VL: # %bb.0: 769; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 770; AVX512VL-NEXT: retq 771 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 772 ret <4 x i64> %shuffle 773} 774 775define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) { 776; AVX1-LABEL: shuffle_v4i64_0020: 777; AVX1: # %bb.0: 778; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 779; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 780; AVX1-NEXT: retq 781; 782; AVX2-LABEL: shuffle_v4i64_0020: 783; AVX2: # %bb.0: 784; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0] 785; AVX2-NEXT: retq 786; 787; AVX512VL-LABEL: shuffle_v4i64_0020: 788; AVX512VL: # %bb.0: 789; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0] 790; AVX512VL-NEXT: retq 791 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 792 ret <4 x i64> %shuffle 793} 794 795define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) { 796; AVX1-LABEL: shuffle_v4i64_0112: 797; AVX1: # %bb.0: 798; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 799; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[3],ymm0[2] 800; AVX1-NEXT: retq 801; 802; AVX2-LABEL: shuffle_v4i64_0112: 803; AVX2: # %bb.0: 804; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,2] 805; AVX2-NEXT: retq 806; 807; AVX512VL-LABEL: shuffle_v4i64_0112: 808; AVX512VL: # %bb.0: 809; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,2] 810; AVX512VL-NEXT: retq 811 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2> 812 ret <4 x i64> %shuffle 813} 814 815define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) { 816; AVX1-LABEL: shuffle_v4i64_0300: 817; AVX1: # %bb.0: 818; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 819; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 820; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[2] 821; AVX1-NEXT: retq 822; 823; AVX2-LABEL: shuffle_v4i64_0300: 824; AVX2: # %bb.0: 825; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0] 826; AVX2-NEXT: retq 827; 828; AVX512VL-LABEL: shuffle_v4i64_0300: 829; AVX512VL: # %bb.0: 830; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0] 831; AVX512VL-NEXT: retq 832 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 833 ret <4 x i64> %shuffle 834} 835 836define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) { 837; AVX1-LABEL: shuffle_v4i64_1000: 838; AVX1: # %bb.0: 839; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 840; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] 841; AVX1-NEXT: retq 842; 843; AVX2-LABEL: shuffle_v4i64_1000: 844; AVX2: # %bb.0: 845; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0] 846; AVX2-NEXT: retq 847; 848; AVX512VL-LABEL: shuffle_v4i64_1000: 849; AVX512VL: # %bb.0: 850; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0] 851; AVX512VL-NEXT: retq 852 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 853 ret <4 x i64> %shuffle 854} 855 856define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) { 857; AVX1-LABEL: shuffle_v4i64_2200: 858; AVX1: # %bb.0: 859; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 860; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 861; AVX1-NEXT: retq 862; 863; AVX2-LABEL: shuffle_v4i64_2200: 864; AVX2: # %bb.0: 865; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] 866; AVX2-NEXT: retq 867; 868; AVX512VL-LABEL: shuffle_v4i64_2200: 869; AVX512VL: # %bb.0: 870; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] 871; AVX512VL-NEXT: retq 872 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 873 ret <4 x i64> %shuffle 874} 875 876define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) { 877; AVX1-LABEL: shuffle_v4i64_3330: 878; AVX1: # %bb.0: 879; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 880; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 881; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[2] 882; AVX1-NEXT: retq 883; 884; AVX2-LABEL: shuffle_v4i64_3330: 885; AVX2: # %bb.0: 886; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0] 887; AVX2-NEXT: retq 888; 889; AVX512VL-LABEL: shuffle_v4i64_3330: 890; AVX512VL: # %bb.0: 891; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0] 892; AVX512VL-NEXT: retq 893 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 894 ret <4 x i64> %shuffle 895} 896 897define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) { 898; AVX1-LABEL: shuffle_v4i64_3210: 899; AVX1: # %bb.0: 900; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 901; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 902; AVX1-NEXT: retq 903; 904; AVX2-LABEL: shuffle_v4i64_3210: 905; AVX2: # %bb.0: 906; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 907; AVX2-NEXT: retq 908; 909; AVX512VL-LABEL: shuffle_v4i64_3210: 910; AVX512VL: # %bb.0: 911; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 912; AVX512VL-NEXT: retq 913 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 914 ret <4 x i64> %shuffle 915} 916 917define <4 x i64> @shuffle_v4i64_0213(<4 x i64> %a, <4 x i64> %b) { 918; AVX1-LABEL: shuffle_v4i64_0213: 919; AVX1: # %bb.0: 920; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,2,3] 921; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 922; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3] 923; AVX1-NEXT: retq 924; 925; AVX2-LABEL: shuffle_v4i64_0213: 926; AVX2: # %bb.0: 927; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 928; AVX2-NEXT: retq 929; 930; AVX512VL-LABEL: shuffle_v4i64_0213: 931; AVX512VL: # %bb.0: 932; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 933; AVX512VL-NEXT: retq 934 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 935 ret <4 x i64> %shuffle 936} 937 938define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) { 939; AVX1-LABEL: shuffle_v4i64_0124: 940; AVX1: # %bb.0: 941; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 942; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[2] 943; AVX1-NEXT: retq 944; 945; AVX2-LABEL: shuffle_v4i64_0124: 946; AVX2: # %bb.0: 947; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 948; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 949; AVX2-NEXT: retq 950; 951; AVX512VL-SLOW-LABEL: shuffle_v4i64_0124: 952; AVX512VL-SLOW: # %bb.0: 953; AVX512VL-SLOW-NEXT: vbroadcastsd %xmm1, %ymm1 954; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 955; AVX512VL-SLOW-NEXT: retq 956; 957; AVX512VL-FAST-LABEL: shuffle_v4i64_0124: 958; AVX512VL-FAST: # %bb.0: 959; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,4] 960; AVX512VL-FAST-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 961; AVX512VL-FAST-NEXT: retq 962 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 963 ret <4 x i64> %shuffle 964} 965 966define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) { 967; AVX1-LABEL: shuffle_v4i64_0142: 968; AVX1: # %bb.0: 969; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 970; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[2] 971; AVX1-NEXT: retq 972; 973; AVX2-LABEL: shuffle_v4i64_0142: 974; AVX2: # %bb.0: 975; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 976; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,2] 977; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 978; AVX2-NEXT: retq 979; 980; AVX512VL-LABEL: shuffle_v4i64_0142: 981; AVX512VL: # %bb.0: 982; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,2] 983; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 984; AVX512VL-NEXT: retq 985 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2> 986 ret <4 x i64> %shuffle 987} 988 989define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) { 990; AVX1-LABEL: shuffle_v4i64_0412: 991; AVX1: # %bb.0: 992; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0,1],ymm0[2],ymm1[3] 993; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 994; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2] 995; AVX1-NEXT: retq 996; 997; AVX2-LABEL: shuffle_v4i64_0412: 998; AVX2: # %bb.0: 999; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 1000; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,2] 1001; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 1002; AVX2-NEXT: retq 1003; 1004; AVX512VL-LABEL: shuffle_v4i64_0412: 1005; AVX512VL: # %bb.0: 1006; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,4,1,2] 1007; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1008; AVX512VL-NEXT: retq 1009 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2> 1010 ret <4 x i64> %shuffle 1011} 1012 1013define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) { 1014; AVX1-LABEL: shuffle_v4i64_4012: 1015; AVX1: # %bb.0: 1016; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 1017; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[2] 1018; AVX1-NEXT: retq 1019; 1020; AVX2-LABEL: shuffle_v4i64_4012: 1021; AVX2: # %bb.0: 1022; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,2] 1023; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 1024; AVX2-NEXT: retq 1025; 1026; AVX512VL-SLOW-LABEL: shuffle_v4i64_4012: 1027; AVX512VL-SLOW: # %bb.0: 1028; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,2] 1029; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 1030; AVX512VL-SLOW-NEXT: retq 1031; 1032; AVX512VL-FAST-LABEL: shuffle_v4i64_4012: 1033; AVX512VL-FAST: # %bb.0: 1034; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [4,0,1,2] 1035; AVX512VL-FAST-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1036; AVX512VL-FAST-NEXT: retq 1037 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2> 1038 ret <4 x i64> %shuffle 1039} 1040 1041define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) { 1042; ALL-LABEL: shuffle_v4i64_0145: 1043; ALL: # %bb.0: 1044; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1045; ALL-NEXT: retq 1046 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1047 ret <4 x i64> %shuffle 1048} 1049 1050define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) { 1051; AVX1-LABEL: shuffle_v4i64_0451: 1052; AVX1: # %bb.0: 1053; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1] 1054; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1055; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1056; AVX1-NEXT: retq 1057; 1058; AVX2-LABEL: shuffle_v4i64_0451: 1059; AVX2: # %bb.0: 1060; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,3] 1061; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1] 1062; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] 1063; AVX2-NEXT: retq 1064; 1065; AVX512VL-LABEL: shuffle_v4i64_0451: 1066; AVX512VL: # %bb.0: 1067; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,4,5,1] 1068; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1069; AVX512VL-NEXT: retq 1070 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1> 1071 ret <4 x i64> %shuffle 1072} 1073 1074define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) { 1075; ALL-LABEL: shuffle_v4i64_4501: 1076; ALL: # %bb.0: 1077; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1078; ALL-NEXT: retq 1079 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 1080 ret <4 x i64> %shuffle 1081} 1082 1083define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) { 1084; AVX1-LABEL: shuffle_v4i64_4015: 1085; AVX1: # %bb.0: 1086; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 1087; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1088; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1089; AVX1-NEXT: retq 1090; 1091; AVX2-LABEL: shuffle_v4i64_4015: 1092; AVX2: # %bb.0: 1093; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,2,1] 1094; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3] 1095; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7] 1096; AVX2-NEXT: retq 1097; 1098; AVX512VL-LABEL: shuffle_v4i64_4015: 1099; AVX512VL: # %bb.0: 1100; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,0,1,5] 1101; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1102; AVX512VL-NEXT: retq 1103 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5> 1104 ret <4 x i64> %shuffle 1105} 1106 1107define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) { 1108; AVX1-LABEL: shuffle_v4i64_2u35: 1109; AVX1: # %bb.0: 1110; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 1111; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 1112; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3] 1113; AVX1-NEXT: retq 1114; 1115; AVX2-LABEL: shuffle_v4i64_2u35: 1116; AVX2: # %bb.0: 1117; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 1118; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,1] 1119; AVX2-NEXT: retq 1120; 1121; AVX512VL-SLOW-LABEL: shuffle_v4i64_2u35: 1122; AVX512VL-SLOW: # %bb.0: 1123; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 1124; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,1] 1125; AVX512VL-SLOW-NEXT: retq 1126; 1127; AVX512VL-FAST-LABEL: shuffle_v4i64_2u35: 1128; AVX512VL-FAST: # %bb.0: 1129; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [2,5,3,5] 1130; AVX512VL-FAST-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1131; AVX512VL-FAST-NEXT: retq 1132 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5> 1133 ret <4 x i64> %shuffle 1134} 1135 1136define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) { 1137; AVX1-LABEL: shuffle_v4i64_1251: 1138; AVX1: # %bb.0: 1139; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 1140; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1141; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[3],ymm2[3] 1142; AVX1-NEXT: retq 1143; 1144; AVX2-LABEL: shuffle_v4i64_1251: 1145; AVX2: # %bb.0: 1146; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[1,1,1,1] 1147; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,2,2,1] 1148; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 1149; AVX2-NEXT: retq 1150; 1151; AVX512VL-LABEL: shuffle_v4i64_1251: 1152; AVX512VL: # %bb.0: 1153; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,5,1] 1154; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1155; AVX512VL-NEXT: retq 1156 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1> 1157 ret <4 x i64> %shuffle 1158} 1159 1160define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) { 1161; AVX1-LABEL: shuffle_v4i64_1054: 1162; AVX1: # %bb.0: 1163; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1164; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1165; AVX1-NEXT: retq 1166; 1167; AVX2-LABEL: shuffle_v4i64_1054: 1168; AVX2: # %bb.0: 1169; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1170; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1171; AVX2-NEXT: retq 1172; 1173; AVX512VL-SLOW-LABEL: shuffle_v4i64_1054: 1174; AVX512VL-SLOW: # %bb.0: 1175; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1176; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1177; AVX512VL-SLOW-NEXT: retq 1178; 1179; AVX512VL-FAST-LABEL: shuffle_v4i64_1054: 1180; AVX512VL-FAST: # %bb.0: 1181; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [1,0,5,4] 1182; AVX512VL-FAST-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1183; AVX512VL-FAST-NEXT: retq 1184 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4> 1185 ret <4 x i64> %shuffle 1186} 1187 1188define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) { 1189; AVX1-LABEL: shuffle_v4i64_3254: 1190; AVX1: # %bb.0: 1191; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 1192; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1193; AVX1-NEXT: retq 1194; 1195; AVX2-LABEL: shuffle_v4i64_3254: 1196; AVX2: # %bb.0: 1197; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 1198; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1199; AVX2-NEXT: retq 1200; 1201; AVX512VL-SLOW-LABEL: shuffle_v4i64_3254: 1202; AVX512VL-SLOW: # %bb.0: 1203; AVX512VL-SLOW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 1204; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1205; AVX512VL-SLOW-NEXT: retq 1206; 1207; AVX512VL-FAST-LABEL: shuffle_v4i64_3254: 1208; AVX512VL-FAST: # %bb.0: 1209; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,5,4] 1210; AVX512VL-FAST-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1211; AVX512VL-FAST-NEXT: retq 1212 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4> 1213 ret <4 x i64> %shuffle 1214} 1215 1216define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) { 1217; AVX1-LABEL: shuffle_v4i64_3276: 1218; AVX1: # %bb.0: 1219; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1220; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1221; AVX1-NEXT: retq 1222; 1223; AVX2-LABEL: shuffle_v4i64_3276: 1224; AVX2: # %bb.0: 1225; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1226; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1227; AVX2-NEXT: retq 1228; 1229; AVX512VL-SLOW-LABEL: shuffle_v4i64_3276: 1230; AVX512VL-SLOW: # %bb.0: 1231; AVX512VL-SLOW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1232; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1233; AVX512VL-SLOW-NEXT: retq 1234; 1235; AVX512VL-FAST-LABEL: shuffle_v4i64_3276: 1236; AVX512VL-FAST: # %bb.0: 1237; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,7,6] 1238; AVX512VL-FAST-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1239; AVX512VL-FAST-NEXT: retq 1240 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6> 1241 ret <4 x i64> %shuffle 1242} 1243 1244define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) { 1245; AVX1-LABEL: shuffle_v4i64_1076: 1246; AVX1: # %bb.0: 1247; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 1248; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1249; AVX1-NEXT: retq 1250; 1251; AVX2-LABEL: shuffle_v4i64_1076: 1252; AVX2: # %bb.0: 1253; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1254; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1255; AVX2-NEXT: retq 1256; 1257; AVX512VL-SLOW-LABEL: shuffle_v4i64_1076: 1258; AVX512VL-SLOW: # %bb.0: 1259; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1260; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1261; AVX512VL-SLOW-NEXT: retq 1262; 1263; AVX512VL-FAST-LABEL: shuffle_v4i64_1076: 1264; AVX512VL-FAST: # %bb.0: 1265; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [1,0,7,6] 1266; AVX512VL-FAST-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1267; AVX512VL-FAST-NEXT: retq 1268 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6> 1269 ret <4 x i64> %shuffle 1270} 1271 1272define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) { 1273; AVX1-LABEL: shuffle_v4i64_0415: 1274; AVX1: # %bb.0: 1275; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 1276; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1277; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1278; AVX1-NEXT: retq 1279; 1280; AVX2-LABEL: shuffle_v4i64_0415: 1281; AVX2: # %bb.0: 1282; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1] 1283; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 1284; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 1285; AVX2-NEXT: retq 1286; 1287; AVX512VL-LABEL: shuffle_v4i64_0415: 1288; AVX512VL: # %bb.0: 1289; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,4,1,5] 1290; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1291; AVX512VL-NEXT: retq 1292 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1293 ret <4 x i64> %shuffle 1294} 1295 1296define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) { 1297; AVX1-LABEL: shuffle_v4i64_z4z6: 1298; AVX1: # %bb.0: 1299; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1300; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 1301; AVX1-NEXT: retq 1302; 1303; AVX2-LABEL: shuffle_v4i64_z4z6: 1304; AVX2: # %bb.0: 1305; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23] 1306; AVX2-NEXT: retq 1307; 1308; AVX512VL-LABEL: shuffle_v4i64_z4z6: 1309; AVX512VL: # %bb.0: 1310; AVX512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23] 1311; AVX512VL-NEXT: retq 1312 %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 0, i32 4, i32 0, i32 6> 1313 ret <4 x i64> %shuffle 1314} 1315 1316define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) { 1317; AVX1-LABEL: shuffle_v4i64_5zuz: 1318; AVX1: # %bb.0: 1319; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1320; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1321; AVX1-NEXT: retq 1322; 1323; AVX2-LABEL: shuffle_v4i64_5zuz: 1324; AVX2: # %bb.0: 1325; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero 1326; AVX2-NEXT: retq 1327; 1328; AVX512VL-LABEL: shuffle_v4i64_5zuz: 1329; AVX512VL: # %bb.0: 1330; AVX512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero 1331; AVX512VL-NEXT: retq 1332 %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 5, i32 0, i32 undef, i32 0> 1333 ret <4 x i64> %shuffle 1334} 1335 1336define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) { 1337; ALL-LABEL: shuffle_v4i64_40u2: 1338; ALL: # %bb.0: 1339; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 1340; ALL-NEXT: retq 1341 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2> 1342 ret <4 x i64> %shuffle 1343} 1344 1345define <4 x i64> @shuffle_v4i64_15uu(<4 x i64> %a, <4 x i64> %b) { 1346; ALL-LABEL: shuffle_v4i64_15uu: 1347; ALL: # %bb.0: 1348; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1349; ALL-NEXT: retq 1350 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef> 1351 ret <4 x i64> %shuffle 1352} 1353 1354define <4 x i64> @shuffle_v4i64_11uu(<4 x i64> %a, <4 x i64> %b) { 1355; ALL-LABEL: shuffle_v4i64_11uu: 1356; ALL: # %bb.0: 1357; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3] 1358; ALL-NEXT: retq 1359 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef> 1360 ret <4 x i64> %shuffle 1361} 1362 1363define <4 x i64> @shuffle_v4i64_22uu(<4 x i64> %a, <4 x i64> %b) { 1364; AVX1-LABEL: shuffle_v4i64_22uu: 1365; AVX1: # %bb.0: 1366; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1367; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] 1368; AVX1-NEXT: retq 1369; 1370; AVX2-LABEL: shuffle_v4i64_22uu: 1371; AVX2: # %bb.0: 1372; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 1373; AVX2-NEXT: retq 1374; 1375; AVX512VL-LABEL: shuffle_v4i64_22uu: 1376; AVX512VL: # %bb.0: 1377; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 1378; AVX512VL-NEXT: retq 1379 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef> 1380 ret <4 x i64> %shuffle 1381} 1382 1383define <4 x i64> @shuffle_v4i64_3333(<4 x i64> %a, <4 x i64> %b) { 1384; AVX1-LABEL: shuffle_v4i64_3333: 1385; AVX1: # %bb.0: 1386; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] 1387; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 1388; AVX1-NEXT: retq 1389; 1390; AVX2-LABEL: shuffle_v4i64_3333: 1391; AVX2: # %bb.0: 1392; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3] 1393; AVX2-NEXT: retq 1394; 1395; AVX512VL-LABEL: shuffle_v4i64_3333: 1396; AVX512VL: # %bb.0: 1397; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3] 1398; AVX512VL-NEXT: retq 1399 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1400 ret <4 x i64> %shuffle 1401} 1402 1403define <4 x i64> @shuffle_v4i64_1z3z(<4 x i64> %a, <4 x i64> %b) { 1404; AVX1-LABEL: shuffle_v4i64_1z3z: 1405; AVX1: # %bb.0: 1406; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1407; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1408; AVX1-NEXT: retq 1409; 1410; AVX2-LABEL: shuffle_v4i64_1z3z: 1411; AVX2: # %bb.0: 1412; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero 1413; AVX2-NEXT: retq 1414; 1415; AVX512VL-LABEL: shuffle_v4i64_1z3z: 1416; AVX512VL: # %bb.0: 1417; AVX512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero 1418; AVX512VL-NEXT: retq 1419 %shuffle = shufflevector <4 x i64> %a, <4 x i64> <i64 0, i64 undef, i64 undef, i64 undef>, <4 x i32> <i32 1, i32 4, i32 3, i32 4> 1420 ret <4 x i64> %shuffle 1421} 1422 1423define <4 x i64> @shuffle_v4i64_1032_v2i64(<2 x i64> %a, <2 x i64> %b) { 1424; ALL-LABEL: shuffle_v4i64_1032_v2i64: 1425; ALL: # %bb.0: 1426; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1427; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1428; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1429; ALL-NEXT: retq 1430 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 1, i32 0> 1431 %2 = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> <i32 1, i32 0> 1432 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1433 ret <4 x i64> %3 1434} 1435 1436define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) { 1437; ALL-LABEL: stress_test1: 1438; ALL: retq 1439 %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0> 1440 %d = shufflevector <4 x i64> %c, <4 x i64> undef, <4 x i32> <i32 3, i32 undef, i32 2, i32 undef> 1441 %e = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 undef> 1442 %f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0> 1443 1444 ret <4 x i64> %f 1445} 1446 1447define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) { 1448; ALL-LABEL: insert_reg_and_zero_v4i64: 1449; ALL: # %bb.0: 1450; ALL-NEXT: vmovq %rdi, %xmm0 1451; ALL-NEXT: retq 1452 %v = insertelement <4 x i64> undef, i64 %a, i64 0 1453 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1454 ret <4 x i64> %shuffle 1455} 1456 1457define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) { 1458; ALL-LABEL: insert_mem_and_zero_v4i64: 1459; ALL: # %bb.0: 1460; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1461; ALL-NEXT: retq 1462 %a = load i64, i64* %ptr 1463 %v = insertelement <4 x i64> undef, i64 %a, i64 0 1464 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1465 ret <4 x i64> %shuffle 1466} 1467 1468define <4 x double> @insert_reg_and_zero_v4f64(double %a) { 1469; ALL-LABEL: insert_reg_and_zero_v4f64: 1470; ALL: # %bb.0: 1471; ALL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1472; ALL-NEXT: retq 1473 %v = insertelement <4 x double> undef, double %a, i32 0 1474 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1475 ret <4 x double> %shuffle 1476} 1477 1478define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) { 1479; ALL-LABEL: insert_mem_and_zero_v4f64: 1480; ALL: # %bb.0: 1481; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1482; ALL-NEXT: retq 1483 %a = load double, double* %ptr 1484 %v = insertelement <4 x double> undef, double %a, i32 0 1485 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1486 ret <4 x double> %shuffle 1487} 1488 1489define <4 x double> @splat_mem_v4f64(double* %ptr) { 1490; ALL-LABEL: splat_mem_v4f64: 1491; ALL: # %bb.0: 1492; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1493; ALL-NEXT: retq 1494 %a = load double, double* %ptr 1495 %v = insertelement <4 x double> undef, double %a, i32 0 1496 %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1497 ret <4 x double> %shuffle 1498} 1499 1500define <4 x i64> @splat_mem_v4i64(i64* %ptr) { 1501; ALL-LABEL: splat_mem_v4i64: 1502; ALL: # %bb.0: 1503; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1504; ALL-NEXT: retq 1505 %a = load i64, i64* %ptr 1506 %v = insertelement <4 x i64> undef, i64 %a, i64 0 1507 %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1508 ret <4 x i64> %shuffle 1509} 1510 1511define <4 x double> @splat_mem_v4f64_2(double* %p) { 1512; ALL-LABEL: splat_mem_v4f64_2: 1513; ALL: # %bb.0: 1514; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1515; ALL-NEXT: retq 1516 %1 = load double, double* %p 1517 %2 = insertelement <2 x double> undef, double %1, i32 0 1518 %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer 1519 ret <4 x double> %3 1520} 1521 1522define <4 x double> @splat_v4f64(<2 x double> %r) { 1523; AVX1-LABEL: splat_v4f64: 1524; AVX1: # %bb.0: 1525; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1526; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1527; AVX1-NEXT: retq 1528; 1529; AVX2-LABEL: splat_v4f64: 1530; AVX2: # %bb.0: 1531; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 1532; AVX2-NEXT: retq 1533; 1534; AVX512VL-LABEL: splat_v4f64: 1535; AVX512VL: # %bb.0: 1536; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0 1537; AVX512VL-NEXT: retq 1538 %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer 1539 ret <4 x double> %1 1540} 1541 1542define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) { 1543; ALL-LABEL: splat_mem_v4i64_from_v2i64: 1544; ALL: # %bb.0: 1545; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1546; ALL-NEXT: retq 1547 %v = load <2 x i64>, <2 x i64>* %ptr 1548 %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1549 ret <4 x i64> %shuffle 1550} 1551 1552define <4 x double> @splat_mem_v4f64_from_v2f64(<2 x double>* %ptr) { 1553; ALL-LABEL: splat_mem_v4f64_from_v2f64: 1554; ALL: # %bb.0: 1555; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1556; ALL-NEXT: retq 1557 %v = load <2 x double>, <2 x double>* %ptr 1558 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1559 ret <4 x double> %shuffle 1560} 1561 1562define <4 x i64> @splat128_mem_v4i64_from_v2i64(<2 x i64>* %ptr) { 1563; AVX1OR2-LABEL: splat128_mem_v4i64_from_v2i64: 1564; AVX1OR2: # %bb.0: 1565; AVX1OR2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 1566; AVX1OR2-NEXT: retq 1567; 1568; AVX512VL-LABEL: splat128_mem_v4i64_from_v2i64: 1569; AVX512VL: # %bb.0: 1570; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] 1571; AVX512VL-NEXT: retq 1572 %v = load <2 x i64>, <2 x i64>* %ptr 1573 %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1574 ret <4 x i64> %shuffle 1575} 1576 1577define <4 x double> @splat128_mem_v4f64_from_v2f64(<2 x double>* %ptr) { 1578; ALL-LABEL: splat128_mem_v4f64_from_v2f64: 1579; ALL: # %bb.0: 1580; ALL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 1581; ALL-NEXT: retq 1582 %v = load <2 x double>, <2 x double>* %ptr 1583 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1584 ret <4 x double> %shuffle 1585} 1586 1587define <4 x double> @broadcast_v4f64_0000_from_v2i64(<2 x i64> %a0) { 1588; AVX1-LABEL: broadcast_v4f64_0000_from_v2i64: 1589; AVX1: # %bb.0: 1590; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] 1591; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1592; AVX1-NEXT: retq 1593; 1594; AVX2-LABEL: broadcast_v4f64_0000_from_v2i64: 1595; AVX2: # %bb.0: 1596; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 1597; AVX2-NEXT: retq 1598; 1599; AVX512VL-LABEL: broadcast_v4f64_0000_from_v2i64: 1600; AVX512VL: # %bb.0: 1601; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0 1602; AVX512VL-NEXT: retq 1603 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1604 %2 = bitcast <4 x i64> %1 to <4 x double> 1605 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> zeroinitializer 1606 ret <4 x double> %3 1607} 1608 1609define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) { 1610; ALL-LABEL: bitcast_v4f64_0426: 1611; ALL: # %bb.0: 1612; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1613; ALL-NEXT: retq 1614 %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 1615 %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float> 1616 %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1617 %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16> 1618 %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13> 1619 %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double> 1620 ret <4 x double> %bitcast64 1621} 1622 1623define <4 x i64> @concat_v4i64_0167(<4 x i64> %a0, <4 x i64> %a1) { 1624; ALL-LABEL: concat_v4i64_0167: 1625; ALL: # %bb.0: 1626; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1627; ALL-NEXT: retq 1628 %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1> 1629 %a1hi = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 6, i32 7> 1630 %shuffle64 = shufflevector <2 x i64> %a0lo, <2 x i64> %a1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1631 ret <4 x i64> %shuffle64 1632} 1633 1634define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) { 1635; ALL-LABEL: concat_v4i64_0145_bc: 1636; ALL: # %bb.0: 1637; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1638; ALL-NEXT: retq 1639 %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1> 1640 %a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 4, i32 5> 1641 %bc0lo = bitcast <2 x i64> %a0lo to <4 x i32> 1642 %bc1lo = bitcast <2 x i64> %a1lo to <4 x i32> 1643 %shuffle32 = shufflevector <4 x i32> %bc0lo, <4 x i32> %bc1lo, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1644 %shuffle64 = bitcast <8 x i32> %shuffle32 to <4 x i64> 1645 ret <4 x i64> %shuffle64 1646} 1647 1648define <4 x i64> @insert_dup_mem_v4i64(i64* %ptr) { 1649; ALL-LABEL: insert_dup_mem_v4i64: 1650; ALL: # %bb.0: 1651; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1652; ALL-NEXT: retq 1653 %tmp = load i64, i64* %ptr, align 1 1654 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0 1655 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <4 x i32> zeroinitializer 1656 ret <4 x i64> %tmp2 1657} 1658 1659define <4 x i64> @shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b) { 1660; AVX1-LABEL: shuffle_v4i64_1234: 1661; AVX1: # %bb.0: 1662; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] 1663; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[3],ymm1[2] 1664; AVX1-NEXT: retq 1665; 1666; AVX2-LABEL: shuffle_v4i64_1234: 1667; AVX2: # %bb.0: 1668; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] 1669; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7],ymm0[24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23] 1670; AVX2-NEXT: retq 1671; 1672; AVX512VL-LABEL: shuffle_v4i64_1234: 1673; AVX512VL: # %bb.0: 1674; AVX512VL-NEXT: valignq {{.*#+}} ymm0 = ymm0[1,2,3],ymm1[0] 1675; AVX512VL-NEXT: retq 1676 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 1677 ret <4 x i64> %shuffle 1678} 1679 1680define <4 x i64> @shuffle_v4i64_1230(<4 x i64> %a) { 1681; AVX1-LABEL: shuffle_v4i64_1230: 1682; AVX1: # %bb.0: 1683; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 1684; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[3],ymm1[2] 1685; AVX1-NEXT: retq 1686; 1687; AVX2-LABEL: shuffle_v4i64_1230: 1688; AVX2: # %bb.0: 1689; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,2,3,0] 1690; AVX2-NEXT: retq 1691; 1692; AVX512VL-LABEL: shuffle_v4i64_1230: 1693; AVX512VL: # %bb.0: 1694; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,2,3,0] 1695; AVX512VL-NEXT: retq 1696 %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> 1697 ret <4 x i64> %shuffle 1698} 1699 1700define <4 x i64> @shuffle_v4i64_z0z3(<4 x i64> %a, <4 x i64> %b) { 1701; AVX1-LABEL: shuffle_v4i64_z0z3: 1702; AVX1: # %bb.0: 1703; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1704; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[3] 1705; AVX1-NEXT: retq 1706; 1707; AVX2-SLOW-LABEL: shuffle_v4i64_z0z3: 1708; AVX2-SLOW: # %bb.0: 1709; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,3] 1710; AVX2-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 1711; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1712; AVX2-SLOW-NEXT: retq 1713; 1714; AVX2-FAST-LABEL: shuffle_v4i64_z0z3: 1715; AVX2-FAST: # %bb.0: 1716; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31] 1717; AVX2-FAST-NEXT: retq 1718; 1719; AVX512VL-SLOW-LABEL: shuffle_v4i64_z0z3: 1720; AVX512VL-SLOW: # %bb.0: 1721; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,3] 1722; AVX512VL-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 1723; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1724; AVX512VL-SLOW-NEXT: retq 1725; 1726; AVX512VL-FAST-LABEL: shuffle_v4i64_z0z3: 1727; AVX512VL-FAST: # %bb.0: 1728; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31] 1729; AVX512VL-FAST-NEXT: retq 1730 %1 = shufflevector <4 x i64> %a, <4 x i64> <i64 0, i64 undef, i64 undef, i64 undef>, <4 x i32> <i32 4, i32 0, i32 4, i32 3> 1731 ret <4 x i64> %1 1732} 1733 1734define <4 x i64> @shuffle_v4i64_1z2z(<4 x i64> %a, <4 x i64> %b) { 1735; AVX1-LABEL: shuffle_v4i64_1z2z: 1736; AVX1: # %bb.0: 1737; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1738; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[2] 1739; AVX1-NEXT: retq 1740; 1741; AVX2-SLOW-LABEL: shuffle_v4i64_1z2z: 1742; AVX2-SLOW: # %bb.0: 1743; AVX2-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 1744; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 1745; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0] 1746; AVX2-SLOW-NEXT: retq 1747; 1748; AVX2-FAST-LABEL: shuffle_v4i64_1z2z: 1749; AVX2-FAST: # %bb.0: 1750; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero 1751; AVX2-FAST-NEXT: retq 1752; 1753; AVX512VL-SLOW-LABEL: shuffle_v4i64_1z2z: 1754; AVX512VL-SLOW: # %bb.0: 1755; AVX512VL-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 1756; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 1757; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0] 1758; AVX512VL-SLOW-NEXT: retq 1759; 1760; AVX512VL-FAST-LABEL: shuffle_v4i64_1z2z: 1761; AVX512VL-FAST: # %bb.0: 1762; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero 1763; AVX512VL-FAST-NEXT: retq 1764 %1 = shufflevector <4 x i64> %a, <4 x i64> <i64 0, i64 undef, i64 undef, i64 undef>, <4 x i32> <i32 1, i32 4, i32 2, i32 4> 1765 ret <4 x i64> %1 1766} 1767 1768define <4 x double> @add_v4f64_0246_1357(<4 x double> %a, <4 x double> %b) { 1769; AVX1-LABEL: add_v4f64_0246_1357: 1770; AVX1: # %bb.0: # %entry 1771; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 1772; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1773; AVX1-NEXT: vhaddpd %ymm2, %ymm0, %ymm0 1774; AVX1-NEXT: retq 1775; 1776; AVX2-LABEL: add_v4f64_0246_1357: 1777; AVX2: # %bb.0: # %entry 1778; AVX2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 1779; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 1780; AVX2-NEXT: retq 1781; 1782; AVX512VL-LABEL: add_v4f64_0246_1357: 1783; AVX512VL: # %bb.0: # %entry 1784; AVX512VL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 1785; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 1786; AVX512VL-NEXT: retq 1787entry: 1788 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 1789 %shuffle1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 1790 %add = fadd <4 x double> %shuffle, %shuffle1 1791 ret <4 x double> %add 1792} 1793 1794define <4 x double> @add_v4f64_4602_5713(<4 x double> %a, <4 x double> %b) { 1795; AVX1-LABEL: add_v4f64_4602_5713: 1796; AVX1: # %bb.0: # %entry 1797; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3],ymm0[2,3] 1798; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1799; AVX1-NEXT: vhaddpd %ymm2, %ymm0, %ymm0 1800; AVX1-NEXT: retq 1801; 1802; AVX2-LABEL: add_v4f64_4602_5713: 1803; AVX2: # %bb.0: # %entry 1804; AVX2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 1805; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,3,0,2] 1806; AVX2-NEXT: retq 1807; 1808; AVX512VL-LABEL: add_v4f64_4602_5713: 1809; AVX512VL: # %bb.0: # %entry 1810; AVX512VL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 1811; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,3,0,2] 1812; AVX512VL-NEXT: retq 1813entry: 1814 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 6, i32 0, i32 2> 1815 %shuffle1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 7, i32 1, i32 3> 1816 %add = fadd <4 x double> %shuffle, %shuffle1 1817 ret <4 x double> %add 1818} 1819 1820define <4 x i64> @add_v4i64_0246_1357(<4 x i64> %a, <4 x i64> %b) { 1821; AVX1-LABEL: add_v4i64_0246_1357: 1822; AVX1: # %bb.0: # %entry 1823; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 1824; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1825; AVX1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm0[0],ymm2[0],ymm0[2],ymm2[2] 1826; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] 1827; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1828; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1829; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 1830; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 1831; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1832; AVX1-NEXT: retq 1833; 1834; AVX2-LABEL: add_v4i64_0246_1357: 1835; AVX2: # %bb.0: # %entry 1836; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1837; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 1838; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1839; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1840; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0 1841; AVX2-NEXT: retq 1842; 1843; AVX512VL-SLOW-LABEL: add_v4i64_0246_1357: 1844; AVX512VL-SLOW: # %bb.0: # %entry 1845; AVX512VL-SLOW-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1846; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 1847; AVX512VL-SLOW-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1848; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1849; AVX512VL-SLOW-NEXT: vpaddq %ymm0, %ymm2, %ymm0 1850; AVX512VL-SLOW-NEXT: retq 1851; 1852; AVX512VL-FAST-LABEL: add_v4i64_0246_1357: 1853; AVX512VL-FAST: # %bb.0: # %entry 1854; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6] 1855; AVX512VL-FAST-NEXT: vpermi2q %ymm1, %ymm0, %ymm2 1856; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [1,3,5,7] 1857; AVX512VL-FAST-NEXT: vpermi2q %ymm1, %ymm0, %ymm3 1858; AVX512VL-FAST-NEXT: vpaddq %ymm3, %ymm2, %ymm0 1859; AVX512VL-FAST-NEXT: retq 1860entry: 1861 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 1862 %shuffle1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 1863 %add = add <4 x i64> %shuffle, %shuffle1 1864 ret <4 x i64> %add 1865} 1866 1867define <4 x i64> @add_v4i64_4602_5713(<4 x i64> %a, <4 x i64> %b) { 1868; AVX1-LABEL: add_v4i64_4602_5713: 1869; AVX1: # %bb.0: # %entry 1870; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3],ymm0[2,3] 1871; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1872; AVX1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm0[0],ymm2[0],ymm0[2],ymm2[2] 1873; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] 1874; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1875; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1876; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 1877; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 1878; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1879; AVX1-NEXT: retq 1880; 1881; AVX2-LABEL: add_v4i64_4602_5713: 1882; AVX2: # %bb.0: # %entry 1883; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 1884; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 1885; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] 1886; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1887; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0 1888; AVX2-NEXT: retq 1889; 1890; AVX512VL-SLOW-LABEL: add_v4i64_4602_5713: 1891; AVX512VL-SLOW: # %bb.0: # %entry 1892; AVX512VL-SLOW-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 1893; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 1894; AVX512VL-SLOW-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] 1895; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1896; AVX512VL-SLOW-NEXT: vpaddq %ymm0, %ymm2, %ymm0 1897; AVX512VL-SLOW-NEXT: retq 1898; 1899; AVX512VL-FAST-LABEL: add_v4i64_4602_5713: 1900; AVX512VL-FAST: # %bb.0: # %entry 1901; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6] 1902; AVX512VL-FAST-NEXT: vpermi2q %ymm0, %ymm1, %ymm2 1903; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [1,3,5,7] 1904; AVX512VL-FAST-NEXT: vpermi2q %ymm0, %ymm1, %ymm3 1905; AVX512VL-FAST-NEXT: vpaddq %ymm3, %ymm2, %ymm0 1906; AVX512VL-FAST-NEXT: retq 1907entry: 1908 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 6, i32 0, i32 2> 1909 %shuffle1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 5, i32 7, i32 1, i32 3> 1910 %add = add <4 x i64> %shuffle, %shuffle1 1911 ret <4 x i64> %add 1912} 1913 1914define <4 x double> @shuffle_v4f64_0zzz_optsize(<4 x double> %a) optsize { 1915; ALL-LABEL: shuffle_v4f64_0zzz_optsize: 1916; ALL: # %bb.0: 1917; ALL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1918; ALL-NEXT: retq 1919 %b = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1920 ret <4 x double> %b 1921} 1922 1923define <4 x i64> @shuffle_v4i64_0zzz_optsize(<4 x i64> %a) optsize { 1924; ALL-LABEL: shuffle_v4i64_0zzz_optsize: 1925; ALL: # %bb.0: 1926; ALL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1927; ALL-NEXT: retq 1928 %b = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1929 ret <4 x i64> %b 1930} 1931 1932define <8 x float> @shuffle_v8f32_0zzzzzzz_optsize(<8 x float> %a) optsize { 1933; ALL-LABEL: shuffle_v8f32_0zzzzzzz_optsize: 1934; ALL: # %bb.0: 1935; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 1936; ALL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1937; ALL-NEXT: retq 1938 %b = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1939 ret <8 x float> %b 1940} 1941 1942define <8 x i32> @shuffle_v8i32_0zzzzzzz_optsize(<8 x i32> %a) optsize { 1943; ALL-LABEL: shuffle_v8i32_0zzzzzzz_optsize: 1944; ALL: # %bb.0: 1945; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 1946; ALL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1947; ALL-NEXT: retq 1948 %b = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1949 ret <8 x i32> %b 1950} 1951 1952define <4 x double> @shuffle_v4f64_0zzz_pgso(<4 x double> %a) !prof !14 { 1953; ALL-LABEL: shuffle_v4f64_0zzz_pgso: 1954; ALL: # %bb.0: 1955; ALL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1956; ALL-NEXT: retq 1957 %b = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1958 ret <4 x double> %b 1959} 1960 1961define <4 x i64> @shuffle_v4i64_0zzz_pgso(<4 x i64> %a) !prof !14 { 1962; ALL-LABEL: shuffle_v4i64_0zzz_pgso: 1963; ALL: # %bb.0: 1964; ALL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1965; ALL-NEXT: retq 1966 %b = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1967 ret <4 x i64> %b 1968} 1969 1970define <8 x float> @shuffle_v8f32_0zzzzzzz_pgso(<8 x float> %a) !prof !14 { 1971; AVX1OR2-LABEL: shuffle_v8f32_0zzzzzzz_pgso: 1972; AVX1OR2: # %bb.0: 1973; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1 1974; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1975; AVX1OR2-NEXT: retq 1976; 1977; AVX512VL-LABEL: shuffle_v8f32_0zzzzzzz_pgso: 1978; AVX512VL: # %bb.0: 1979; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 1980; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1981; AVX512VL-NEXT: retq 1982 %b = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1983 ret <8 x float> %b 1984} 1985 1986define <8 x i32> @shuffle_v8i32_0zzzzzzz_pgso(<8 x i32> %a) !prof !14 { 1987; AVX1OR2-LABEL: shuffle_v8i32_0zzzzzzz_pgso: 1988; AVX1OR2: # %bb.0: 1989; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1 1990; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1991; AVX1OR2-NEXT: retq 1992; 1993; AVX512VL-LABEL: shuffle_v8i32_0zzzzzzz_pgso: 1994; AVX512VL: # %bb.0: 1995; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 1996; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1997; AVX512VL-NEXT: retq 1998 %b = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1999 ret <8 x i32> %b 2000} 2001 2002define <4 x i64> @unpckh_v4i64(<4 x i64> %x, <4 x i64> %y) { 2003; ALL-LABEL: unpckh_v4i64: 2004; ALL: # %bb.0: 2005; ALL-NEXT: vextractf128 $1, %ymm1, %xmm1 2006; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 2007; ALL-NEXT: retq 2008 %unpckh = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 1, i32 7, i32 undef, i32 undef> 2009 ret <4 x i64> %unpckh 2010} 2011 2012define <4 x double> @unpckh_v4f64(<4 x double> %x, <4 x double> %y) { 2013; ALL-LABEL: unpckh_v4f64: 2014; ALL: # %bb.0: 2015; ALL-NEXT: vextractf128 $1, %ymm1, %xmm1 2016; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 2017; ALL-NEXT: retq 2018 %unpckh = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 7, i32 undef, i32 undef> 2019 ret <4 x double> %unpckh 2020} 2021 2022!llvm.module.flags = !{!0} 2023!0 = !{i32 1, !"ProfileSummary", !1} 2024!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 2025!2 = !{!"ProfileFormat", !"InstrProf"} 2026!3 = !{!"TotalCount", i64 10000} 2027!4 = !{!"MaxCount", i64 10} 2028!5 = !{!"MaxInternalCount", i64 1} 2029!6 = !{!"MaxFunctionCount", i64 1000} 2030!7 = !{!"NumCounts", i64 3} 2031!8 = !{!"NumFunctions", i64 3} 2032!9 = !{!"DetailedSummary", !10} 2033!10 = !{!11, !12, !13} 2034!11 = !{i32 10000, i64 100, i32 1} 2035!12 = !{i32 999000, i64 100, i32 1} 2036!13 = !{i32 999999, i64 1, i32 2} 2037!14 = !{!"function_entry_count", i64 0} 2038