1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 3; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 4; RUN: llc < %s -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL 5 6target triple = "x86_64-unknown-unknown" 7 8define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) { 9; AVX1-LABEL: shuffle_v4f64_0000: 10; AVX1: # BB#0: 11; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 12; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 13; AVX1-NEXT: retq 14; 15; AVX2-LABEL: shuffle_v4f64_0000: 16; AVX2: # BB#0: 17; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 18; AVX2-NEXT: retq 19; 20; AVX512VL-LABEL: shuffle_v4f64_0000: 21; AVX512VL: # BB#0: 22; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0 23; AVX512VL-NEXT: retq 24 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 25 ret <4 x double> %shuffle 26} 27 28define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) { 29; AVX1-LABEL: shuffle_v4f64_0001: 30; AVX1: # BB#0: 31; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] 32; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 33; AVX1-NEXT: retq 34; 35; AVX2-LABEL: shuffle_v4f64_0001: 36; AVX2: # BB#0: 37; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 38; AVX2-NEXT: retq 39; 40; AVX512VL-LABEL: shuffle_v4f64_0001: 41; AVX512VL: # BB#0: 42; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 43; AVX512VL-NEXT: retq 44 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 45 ret <4 x double> %shuffle 46} 47 48define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) { 49; AVX1-LABEL: shuffle_v4f64_0020: 50; AVX1: # BB#0: 51; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 52; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 53; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 54; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 55; AVX1-NEXT: retq 56; 57; AVX2-LABEL: shuffle_v4f64_0020: 58; AVX2: # BB#0: 59; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0] 60; AVX2-NEXT: retq 61; 62; AVX512VL-LABEL: shuffle_v4f64_0020: 63; AVX512VL: # BB#0: 64; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0] 65; AVX512VL-NEXT: retq 66 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 67 ret <4 x double> %shuffle 68} 69 70define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) { 71; AVX1-LABEL: shuffle_v4f64_0300: 72; AVX1: # BB#0: 73; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 74; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2] 75; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 76; AVX1-NEXT: retq 77; 78; AVX2-LABEL: shuffle_v4f64_0300: 79; AVX2: # BB#0: 80; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0] 81; AVX2-NEXT: retq 82; 83; AVX512VL-LABEL: shuffle_v4f64_0300: 84; AVX512VL: # BB#0: 85; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0] 86; AVX512VL-NEXT: retq 87 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 88 ret <4 x double> %shuffle 89} 90 91define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) { 92; AVX1-LABEL: shuffle_v4f64_1000: 93; AVX1: # BB#0: 94; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 95; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 96; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 97; AVX1-NEXT: retq 98; 99; AVX2-LABEL: shuffle_v4f64_1000: 100; AVX2: # BB#0: 101; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0] 102; AVX2-NEXT: retq 103; 104; AVX512VL-LABEL: shuffle_v4f64_1000: 105; AVX512VL: # BB#0: 106; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0] 107; AVX512VL-NEXT: retq 108 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 109 ret <4 x double> %shuffle 110} 111 112define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) { 113; AVX1-LABEL: shuffle_v4f64_2200: 114; AVX1: # BB#0: 115; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 116; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 117; AVX1-NEXT: retq 118; 119; AVX2-LABEL: shuffle_v4f64_2200: 120; AVX2: # BB#0: 121; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] 122; AVX2-NEXT: retq 123; 124; AVX512VL-LABEL: shuffle_v4f64_2200: 125; AVX512VL: # BB#0: 126; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] 127; AVX512VL-NEXT: retq 128 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 129 ret <4 x double> %shuffle 130} 131 132define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) { 133; AVX1-LABEL: shuffle_v4f64_3330: 134; AVX1: # BB#0: 135; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 136; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3] 137; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2] 138; AVX1-NEXT: retq 139; 140; AVX2-LABEL: shuffle_v4f64_3330: 141; AVX2: # BB#0: 142; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0] 143; AVX2-NEXT: retq 144; 145; AVX512VL-LABEL: shuffle_v4f64_3330: 146; AVX512VL: # BB#0: 147; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0] 148; AVX512VL-NEXT: retq 149 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 150 ret <4 x double> %shuffle 151} 152 153define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) { 154; AVX1-LABEL: shuffle_v4f64_3210: 155; AVX1: # BB#0: 156; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 157; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 158; AVX1-NEXT: retq 159; 160; AVX2-LABEL: shuffle_v4f64_3210: 161; AVX2: # BB#0: 162; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 163; AVX2-NEXT: retq 164; 165; AVX512VL-LABEL: shuffle_v4f64_3210: 166; AVX512VL: # BB#0: 167; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 168; AVX512VL-NEXT: retq 169 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 170 ret <4 x double> %shuffle 171} 172 173define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) { 174; ALL-LABEL: shuffle_v4f64_0023: 175; ALL: # BB#0: 176; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3] 177; ALL-NEXT: retq 178 179 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3> 180 ret <4 x double> %shuffle 181} 182 183define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) { 184; ALL-LABEL: shuffle_v4f64_0022: 185; ALL: # BB#0: 186; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 187; ALL-NEXT: retq 188 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 189 ret <4 x double> %shuffle 190} 191 192define <4 x double> @shuffle_v4f64mem_0022(<4 x double>* %ptr, <4 x double> %b) { 193; ALL-LABEL: shuffle_v4f64mem_0022: 194; ALL: # BB#0: 195; ALL-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2] 196; ALL-NEXT: retq 197 %a = load <4 x double>, <4 x double>* %ptr 198 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 199 ret <4 x double> %shuffle 200} 201 202define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) { 203; ALL-LABEL: shuffle_v4f64_1032: 204; ALL: # BB#0: 205; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 206; ALL-NEXT: retq 207 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 208 ret <4 x double> %shuffle 209} 210 211define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) { 212; ALL-LABEL: shuffle_v4f64_1133: 213; ALL: # BB#0: 214; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] 215; ALL-NEXT: retq 216 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 217 ret <4 x double> %shuffle 218} 219 220define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) { 221; ALL-LABEL: shuffle_v4f64_1023: 222; ALL: # BB#0: 223; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] 224; ALL-NEXT: retq 225 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3> 226 ret <4 x double> %shuffle 227} 228 229define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) { 230; ALL-LABEL: shuffle_v4f64_1022: 231; ALL: # BB#0: 232; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] 233; ALL-NEXT: retq 234 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2> 235 ret <4 x double> %shuffle 236} 237 238define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) { 239; ALL-LABEL: shuffle_v4f64_0423: 240; ALL: # BB#0: 241; ALL-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 242; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] 243; ALL-NEXT: retq 244 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3> 245 ret <4 x double> %shuffle 246} 247 248define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) { 249; ALL-LABEL: shuffle_v4f64_0462: 250; ALL: # BB#0: 251; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2] 252; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 253; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] 254; ALL-NEXT: retq 255 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2> 256 ret <4 x double> %shuffle 257} 258 259define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) { 260; ALL-LABEL: shuffle_v4f64_0426: 261; ALL: # BB#0: 262; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 263; ALL-NEXT: retq 264 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 265 ret <4 x double> %shuffle 266} 267 268define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) { 269; ALL-LABEL: shuffle_v4f64_1537: 270; ALL: # BB#0: 271; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 272; ALL-NEXT: retq 273 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 274 ret <4 x double> %shuffle 275} 276 277define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) { 278; ALL-LABEL: shuffle_v4f64_4062: 279; ALL: # BB#0: 280; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 281; ALL-NEXT: retq 282 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 283 ret <4 x double> %shuffle 284} 285 286define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) { 287; ALL-LABEL: shuffle_v4f64_5173: 288; ALL: # BB#0: 289; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] 290; ALL-NEXT: retq 291 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3> 292 ret <4 x double> %shuffle 293} 294 295define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) { 296; ALL-LABEL: shuffle_v4f64_5163: 297; ALL: # BB#0: 298; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3] 299; ALL-NEXT: retq 300 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3> 301 ret <4 x double> %shuffle 302} 303 304define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) { 305; ALL-LABEL: shuffle_v4f64_0527: 306; ALL: # BB#0: 307; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] 308; ALL-NEXT: retq 309 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 310 ret <4 x double> %shuffle 311} 312 313define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) { 314; ALL-LABEL: shuffle_v4f64_4163: 315; ALL: # BB#0: 316; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3] 317; ALL-NEXT: retq 318 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 319 ret <4 x double> %shuffle 320} 321 322define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) { 323; AVX1-LABEL: shuffle_v4f64_0145: 324; AVX1: # BB#0: 325; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 326; AVX1-NEXT: retq 327; 328; AVX2-LABEL: shuffle_v4f64_0145: 329; AVX2: # BB#0: 330; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 331; AVX2-NEXT: retq 332; 333; AVX512VL-LABEL: shuffle_v4f64_0145: 334; AVX512VL: # BB#0: 335; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 336; AVX512VL-NEXT: retq 337 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 338 ret <4 x double> %shuffle 339} 340 341define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) { 342; AVX1-LABEL: shuffle_v4f64_4501: 343; AVX1: # BB#0: 344; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 345; AVX1-NEXT: retq 346; 347; AVX2-LABEL: shuffle_v4f64_4501: 348; AVX2: # BB#0: 349; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 350; AVX2-NEXT: retq 351; 352; AVX512VL-LABEL: shuffle_v4f64_4501: 353; AVX512VL: # BB#0: 354; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm1, %ymm0 355; AVX512VL-NEXT: retq 356 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 357 ret <4 x double> %shuffle 358} 359 360define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) { 361; ALL-LABEL: shuffle_v4f64_0167: 362; ALL: # BB#0: 363; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 364; ALL-NEXT: retq 365 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 366 ret <4 x double> %shuffle 367} 368 369define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) { 370; AVX1-LABEL: shuffle_v4f64_1054: 371; AVX1: # BB#0: 372; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 373; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 374; AVX1-NEXT: retq 375; 376; AVX2-LABEL: shuffle_v4f64_1054: 377; AVX2: # BB#0: 378; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 379; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 380; AVX2-NEXT: retq 381; 382; AVX512VL-LABEL: shuffle_v4f64_1054: 383; AVX512VL: # BB#0: 384; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 385; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 386; AVX512VL-NEXT: retq 387 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4> 388 ret <4 x double> %shuffle 389} 390 391define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) { 392; AVX1-LABEL: shuffle_v4f64_3254: 393; AVX1: # BB#0: 394; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 395; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 396; AVX1-NEXT: retq 397; 398; AVX2-LABEL: shuffle_v4f64_3254: 399; AVX2: # BB#0: 400; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 401; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 402; AVX2-NEXT: retq 403; 404; AVX512VL-LABEL: shuffle_v4f64_3254: 405; AVX512VL: # BB#0: 406; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 407; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 408; AVX512VL-NEXT: retq 409 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4> 410 ret <4 x double> %shuffle 411} 412 413define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) { 414; AVX1-LABEL: shuffle_v4f64_3276: 415; AVX1: # BB#0: 416; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 417; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 418; AVX1-NEXT: retq 419; 420; AVX2-LABEL: shuffle_v4f64_3276: 421; AVX2: # BB#0: 422; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 423; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 424; AVX2-NEXT: retq 425; 426; AVX512VL-LABEL: shuffle_v4f64_3276: 427; AVX512VL: # BB#0: 428; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 429; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 430; AVX512VL-NEXT: retq 431 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6> 432 ret <4 x double> %shuffle 433} 434 435define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) { 436; ALL-LABEL: shuffle_v4f64_1076: 437; ALL: # BB#0: 438; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 439; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 440; ALL-NEXT: retq 441 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6> 442 ret <4 x double> %shuffle 443} 444 445define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) { 446; AVX1-LABEL: shuffle_v4f64_0415: 447; AVX1: # BB#0: 448; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 449; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 450; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 451; AVX1-NEXT: retq 452; 453; AVX2-LABEL: shuffle_v4f64_0415: 454; AVX2: # BB#0: 455; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1] 456; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 457; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] 458; AVX2-NEXT: retq 459; 460; AVX512VL-LABEL: shuffle_v4f64_0415: 461; AVX512VL: # BB#0: 462; AVX512VL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1] 463; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 464; AVX512VL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] 465; AVX512VL-NEXT: retq 466 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 467 ret <4 x double> %shuffle 468} 469 470define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) { 471; ALL-LABEL: shuffle_v4f64_u062: 472; ALL: # BB#0: 473; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 474; ALL-NEXT: retq 475 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 undef, i32 0, i32 6, i32 2> 476 ret <4 x double> %shuffle 477} 478 479define <4 x double> @shuffle_v4f64_15uu(<4 x double> %a, <4 x double> %b) { 480; ALL-LABEL: shuffle_v4f64_15uu: 481; ALL: # BB#0: 482; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 483; ALL-NEXT: retq 484 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef> 485 ret <4 x double> %shuffle 486} 487 488define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) { 489; ALL-LABEL: shuffle_v4f64_11uu: 490; ALL: # BB#0: 491; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1] 492; ALL-NEXT: retq 493 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef> 494 ret <4 x double> %shuffle 495} 496 497define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) { 498; AVX1-LABEL: shuffle_v4f64_22uu: 499; AVX1: # BB#0: 500; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 501; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 502; AVX1-NEXT: retq 503; 504; AVX2-LABEL: shuffle_v4f64_22uu: 505; AVX2: # BB#0: 506; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3] 507; AVX2-NEXT: retq 508; 509; AVX512VL-LABEL: shuffle_v4f64_22uu: 510; AVX512VL: # BB#0: 511; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3] 512; AVX512VL-NEXT: retq 513 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef> 514 ret <4 x double> %shuffle 515} 516 517define <4 x double> @shuffle_v4f64_3333(<4 x double> %a, <4 x double> %b) { 518; AVX1-LABEL: shuffle_v4f64_3333: 519; AVX1: # BB#0: 520; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] 521; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 522; AVX1-NEXT: retq 523; 524; AVX2-LABEL: shuffle_v4f64_3333: 525; AVX2: # BB#0: 526; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3] 527; AVX2-NEXT: retq 528; 529; AVX512VL-LABEL: shuffle_v4f64_3333: 530; AVX512VL: # BB#0: 531; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3] 532; AVX512VL-NEXT: retq 533 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 534 ret <4 x double> %shuffle 535} 536 537define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) { 538; AVX1-LABEL: shuffle_v4i64_0000: 539; AVX1: # BB#0: 540; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 541; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 542; AVX1-NEXT: retq 543; 544; AVX2-LABEL: shuffle_v4i64_0000: 545; AVX2: # BB#0: 546; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 547; AVX2-NEXT: retq 548; 549; AVX512VL-LABEL: shuffle_v4i64_0000: 550; AVX512VL: # BB#0: 551; AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0 552; AVX512VL-NEXT: retq 553 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 554 ret <4 x i64> %shuffle 555} 556 557define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) { 558; AVX1-LABEL: shuffle_v4i64_0001: 559; AVX1: # BB#0: 560; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1] 561; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 562; AVX1-NEXT: retq 563; 564; AVX2-LABEL: shuffle_v4i64_0001: 565; AVX2: # BB#0: 566; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 567; AVX2-NEXT: retq 568; 569; AVX512VL-LABEL: shuffle_v4i64_0001: 570; AVX512VL: # BB#0: 571; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 572; AVX512VL-NEXT: retq 573 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 574 ret <4 x i64> %shuffle 575} 576 577define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) { 578; AVX1-LABEL: shuffle_v4i64_0020: 579; AVX1: # BB#0: 580; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 581; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 582; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 583; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 584; AVX1-NEXT: retq 585; 586; AVX2-LABEL: shuffle_v4i64_0020: 587; AVX2: # BB#0: 588; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0] 589; AVX2-NEXT: retq 590; 591; AVX512VL-LABEL: shuffle_v4i64_0020: 592; AVX512VL: # BB#0: 593; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0] 594; AVX512VL-NEXT: retq 595 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 596 ret <4 x i64> %shuffle 597} 598 599define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) { 600; AVX1-LABEL: shuffle_v4i64_0112: 601; AVX1: # BB#0: 602; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 603; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1],xmm1[0] 604; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 605; AVX1-NEXT: retq 606; 607; AVX2-LABEL: shuffle_v4i64_0112: 608; AVX2: # BB#0: 609; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2] 610; AVX2-NEXT: retq 611; 612; AVX512VL-LABEL: shuffle_v4i64_0112: 613; AVX512VL: # BB#0: 614; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2] 615; AVX512VL-NEXT: retq 616 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2> 617 ret <4 x i64> %shuffle 618} 619 620define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) { 621; AVX1-LABEL: shuffle_v4i64_0300: 622; AVX1: # BB#0: 623; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 624; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2] 625; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 626; AVX1-NEXT: retq 627; 628; AVX2-LABEL: shuffle_v4i64_0300: 629; AVX2: # BB#0: 630; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0] 631; AVX2-NEXT: retq 632; 633; AVX512VL-LABEL: shuffle_v4i64_0300: 634; AVX512VL: # BB#0: 635; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0] 636; AVX512VL-NEXT: retq 637 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 638 ret <4 x i64> %shuffle 639} 640 641define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) { 642; AVX1-LABEL: shuffle_v4i64_1000: 643; AVX1: # BB#0: 644; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 645; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 646; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 647; AVX1-NEXT: retq 648; 649; AVX2-LABEL: shuffle_v4i64_1000: 650; AVX2: # BB#0: 651; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0] 652; AVX2-NEXT: retq 653; 654; AVX512VL-LABEL: shuffle_v4i64_1000: 655; AVX512VL: # BB#0: 656; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0] 657; AVX512VL-NEXT: retq 658 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 659 ret <4 x i64> %shuffle 660} 661 662define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) { 663; AVX1-LABEL: shuffle_v4i64_2200: 664; AVX1: # BB#0: 665; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 666; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 667; AVX1-NEXT: retq 668; 669; AVX2-LABEL: shuffle_v4i64_2200: 670; AVX2: # BB#0: 671; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0] 672; AVX2-NEXT: retq 673; 674; AVX512VL-LABEL: shuffle_v4i64_2200: 675; AVX512VL: # BB#0: 676; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0] 677; AVX512VL-NEXT: retq 678 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 679 ret <4 x i64> %shuffle 680} 681 682define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) { 683; AVX1-LABEL: shuffle_v4i64_3330: 684; AVX1: # BB#0: 685; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 686; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3] 687; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2] 688; AVX1-NEXT: retq 689; 690; AVX2-LABEL: shuffle_v4i64_3330: 691; AVX2: # BB#0: 692; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0] 693; AVX2-NEXT: retq 694; 695; AVX512VL-LABEL: shuffle_v4i64_3330: 696; AVX512VL: # BB#0: 697; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0] 698; AVX512VL-NEXT: retq 699 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 700 ret <4 x i64> %shuffle 701} 702 703define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) { 704; AVX1-LABEL: shuffle_v4i64_3210: 705; AVX1: # BB#0: 706; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 707; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 708; AVX1-NEXT: retq 709; 710; AVX2-LABEL: shuffle_v4i64_3210: 711; AVX2: # BB#0: 712; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0] 713; AVX2-NEXT: retq 714; 715; AVX512VL-LABEL: shuffle_v4i64_3210: 716; AVX512VL: # BB#0: 717; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0] 718; AVX512VL-NEXT: retq 719 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 720 ret <4 x i64> %shuffle 721} 722 723define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) { 724; AVX1-LABEL: shuffle_v4i64_0124: 725; AVX1: # BB#0: 726; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 727; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 728; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3] 729; AVX1-NEXT: retq 730; 731; AVX2-LABEL: shuffle_v4i64_0124: 732; AVX2: # BB#0: 733; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 734; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 735; AVX2-NEXT: retq 736; 737; AVX512VL-LABEL: shuffle_v4i64_0124: 738; AVX512VL: # BB#0: 739; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1 740; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 741; AVX512VL-NEXT: retq 742 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 743 ret <4 x i64> %shuffle 744} 745 746define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) { 747; AVX1-LABEL: shuffle_v4i64_0142: 748; AVX1: # BB#0: 749; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 750; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2] 751; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3] 752; AVX1-NEXT: retq 753; 754; AVX2-LABEL: shuffle_v4i64_0142: 755; AVX2: # BB#0: 756; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 757; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2] 758; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 759; AVX2-NEXT: retq 760; 761; AVX512VL-LABEL: shuffle_v4i64_0142: 762; AVX512VL: # BB#0: 763; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm1 764; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2] 765; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 766; AVX512VL-NEXT: retq 767 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2> 768 ret <4 x i64> %shuffle 769} 770 771define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) { 772; AVX1-LABEL: shuffle_v4i64_0412: 773; AVX1: # BB#0: 774; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 775; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0] 776; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 777; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 778; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] 779; AVX1-NEXT: retq 780; 781; AVX2-LABEL: shuffle_v4i64_0412: 782; AVX2: # BB#0: 783; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1 784; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2] 785; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 786; AVX2-NEXT: retq 787; 788; AVX512VL-LABEL: shuffle_v4i64_0412: 789; AVX512VL: # BB#0: 790; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1 791; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2] 792; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 793; AVX512VL-NEXT: retq 794 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2> 795 ret <4 x i64> %shuffle 796} 797 798define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) { 799; AVX1-LABEL: shuffle_v4i64_4012: 800; AVX1: # BB#0: 801; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 802; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0] 803; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 804; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 805; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] 806; AVX1-NEXT: retq 807; 808; AVX2-LABEL: shuffle_v4i64_4012: 809; AVX2: # BB#0: 810; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2] 811; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 812; AVX2-NEXT: retq 813; 814; AVX512VL-LABEL: shuffle_v4i64_4012: 815; AVX512VL: # BB#0: 816; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2] 817; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 818; AVX512VL-NEXT: retq 819 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2> 820 ret <4 x i64> %shuffle 821} 822 823define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) { 824; AVX1-LABEL: shuffle_v4i64_0145: 825; AVX1: # BB#0: 826; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 827; AVX1-NEXT: retq 828; 829; AVX2-LABEL: shuffle_v4i64_0145: 830; AVX2: # BB#0: 831; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 832; AVX2-NEXT: retq 833; 834; AVX512VL-LABEL: shuffle_v4i64_0145: 835; AVX512VL: # BB#0: 836; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 837; AVX512VL-NEXT: retq 838 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 839 ret <4 x i64> %shuffle 840} 841 842define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) { 843; AVX1-LABEL: shuffle_v4i64_0451: 844; AVX1: # BB#0: 845; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1] 846; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 847; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 848; AVX1-NEXT: retq 849; 850; AVX2-LABEL: shuffle_v4i64_0451: 851; AVX2: # BB#0: 852; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3] 853; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1] 854; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] 855; AVX2-NEXT: retq 856; 857; AVX512VL-LABEL: shuffle_v4i64_0451: 858; AVX512VL: # BB#0: 859; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3] 860; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1] 861; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] 862; AVX512VL-NEXT: retq 863 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1> 864 ret <4 x i64> %shuffle 865} 866 867define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) { 868; AVX1-LABEL: shuffle_v4i64_4501: 869; AVX1: # BB#0: 870; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 871; AVX1-NEXT: retq 872; 873; AVX2-LABEL: shuffle_v4i64_4501: 874; AVX2: # BB#0: 875; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 876; AVX2-NEXT: retq 877; 878; AVX512VL-LABEL: shuffle_v4i64_4501: 879; AVX512VL: # BB#0: 880; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 881; AVX512VL-NEXT: retq 882 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 883 ret <4 x i64> %shuffle 884} 885 886define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) { 887; AVX1-LABEL: shuffle_v4i64_4015: 888; AVX1: # BB#0: 889; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 890; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 891; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 892; AVX1-NEXT: retq 893; 894; AVX2-LABEL: shuffle_v4i64_4015: 895; AVX2: # BB#0: 896; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,2,1] 897; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3] 898; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7] 899; AVX2-NEXT: retq 900; 901; AVX512VL-LABEL: shuffle_v4i64_4015: 902; AVX512VL: # BB#0: 903; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,2,1] 904; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3] 905; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7] 906; AVX512VL-NEXT: retq 907 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5> 908 ret <4 x i64> %shuffle 909} 910 911define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) { 912; AVX1-LABEL: shuffle_v4i64_2u35: 913; AVX1: # BB#0: 914; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 915; AVX1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm0[1],xmm1[1] 916; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 917; AVX1-NEXT: retq 918; 919; AVX2-LABEL: shuffle_v4i64_2u35: 920; AVX2: # BB#0: 921; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 922; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1] 923; AVX2-NEXT: retq 924; 925; AVX512VL-LABEL: shuffle_v4i64_2u35: 926; AVX512VL: # BB#0: 927; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 928; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1] 929; AVX512VL-NEXT: retq 930 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5> 931 ret <4 x i64> %shuffle 932} 933 934define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) { 935; AVX1-LABEL: shuffle_v4i64_1251: 936; AVX1: # BB#0: 937; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 938; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[2],ymm2[3] 939; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 940; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 941; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3] 942; AVX1-NEXT: retq 943; 944; AVX2-LABEL: shuffle_v4i64_1251: 945; AVX2: # BB#0: 946; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3] 947; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1] 948; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 949; AVX2-NEXT: retq 950; 951; AVX512VL-LABEL: shuffle_v4i64_1251: 952; AVX512VL: # BB#0: 953; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3] 954; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1] 955; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 956; AVX512VL-NEXT: retq 957 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1> 958 ret <4 x i64> %shuffle 959} 960 961define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) { 962; AVX1-LABEL: shuffle_v4i64_1054: 963; AVX1: # BB#0: 964; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 965; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 966; AVX1-NEXT: retq 967; 968; AVX2-LABEL: shuffle_v4i64_1054: 969; AVX2: # BB#0: 970; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 971; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 972; AVX2-NEXT: retq 973; 974; AVX512VL-LABEL: shuffle_v4i64_1054: 975; AVX512VL: # BB#0: 976; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 977; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 978; AVX512VL-NEXT: retq 979 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4> 980 ret <4 x i64> %shuffle 981} 982 983define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) { 984; AVX1-LABEL: shuffle_v4i64_3254: 985; AVX1: # BB#0: 986; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 987; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 988; AVX1-NEXT: retq 989; 990; AVX2-LABEL: shuffle_v4i64_3254: 991; AVX2: # BB#0: 992; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 993; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 994; AVX2-NEXT: retq 995; 996; AVX512VL-LABEL: shuffle_v4i64_3254: 997; AVX512VL: # BB#0: 998; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 999; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1000; AVX512VL-NEXT: retq 1001 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4> 1002 ret <4 x i64> %shuffle 1003} 1004 1005define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) { 1006; AVX1-LABEL: shuffle_v4i64_3276: 1007; AVX1: # BB#0: 1008; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1009; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1010; AVX1-NEXT: retq 1011; 1012; AVX2-LABEL: shuffle_v4i64_3276: 1013; AVX2: # BB#0: 1014; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1015; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1016; AVX2-NEXT: retq 1017; 1018; AVX512VL-LABEL: shuffle_v4i64_3276: 1019; AVX512VL: # BB#0: 1020; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1021; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1022; AVX512VL-NEXT: retq 1023 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6> 1024 ret <4 x i64> %shuffle 1025} 1026 1027define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) { 1028; AVX1-LABEL: shuffle_v4i64_1076: 1029; AVX1: # BB#0: 1030; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 1031; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1032; AVX1-NEXT: retq 1033; 1034; AVX2-LABEL: shuffle_v4i64_1076: 1035; AVX2: # BB#0: 1036; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1037; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1038; AVX2-NEXT: retq 1039; 1040; AVX512VL-LABEL: shuffle_v4i64_1076: 1041; AVX512VL: # BB#0: 1042; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1043; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1044; AVX512VL-NEXT: retq 1045 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6> 1046 ret <4 x i64> %shuffle 1047} 1048 1049define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) { 1050; AVX1-LABEL: shuffle_v4i64_0415: 1051; AVX1: # BB#0: 1052; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 1053; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1054; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1055; AVX1-NEXT: retq 1056; 1057; AVX2-LABEL: shuffle_v4i64_0415: 1058; AVX2: # BB#0: 1059; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1] 1060; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3] 1061; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 1062; AVX2-NEXT: retq 1063; 1064; AVX512VL-LABEL: shuffle_v4i64_0415: 1065; AVX512VL: # BB#0: 1066; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1] 1067; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3] 1068; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 1069; AVX512VL-NEXT: retq 1070 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1071 ret <4 x i64> %shuffle 1072} 1073 1074define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) { 1075; AVX1-LABEL: shuffle_v4i64_z4z6: 1076; AVX1: # BB#0: 1077; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1 1078; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 1079; AVX1-NEXT: retq 1080; 1081; AVX2-LABEL: shuffle_v4i64_z4z6: 1082; AVX2: # BB#0: 1083; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23] 1084; AVX2-NEXT: retq 1085; 1086; AVX512VL-LABEL: shuffle_v4i64_z4z6: 1087; AVX512VL: # BB#0: 1088; AVX512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23] 1089; AVX512VL-NEXT: retq 1090 %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 0, i32 4, i32 0, i32 6> 1091 ret <4 x i64> %shuffle 1092} 1093 1094define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) { 1095; AVX1-LABEL: shuffle_v4i64_5zuz: 1096; AVX1: # BB#0: 1097; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1 1098; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1099; AVX1-NEXT: retq 1100; 1101; AVX2-LABEL: shuffle_v4i64_5zuz: 1102; AVX2: # BB#0: 1103; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero 1104; AVX2-NEXT: retq 1105; 1106; AVX512VL-LABEL: shuffle_v4i64_5zuz: 1107; AVX512VL: # BB#0: 1108; AVX512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero 1109; AVX512VL-NEXT: retq 1110 %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 5, i32 0, i32 undef, i32 0> 1111 ret <4 x i64> %shuffle 1112} 1113 1114define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) { 1115; AVX1-LABEL: shuffle_v4i64_40u2: 1116; AVX1: # BB#0: 1117; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 1118; AVX1-NEXT: retq 1119; 1120; AVX2-LABEL: shuffle_v4i64_40u2: 1121; AVX2: # BB#0: 1122; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 1123; AVX2-NEXT: retq 1124; 1125; AVX512VL-LABEL: shuffle_v4i64_40u2: 1126; AVX512VL: # BB#0: 1127; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 1128; AVX512VL-NEXT: retq 1129 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2> 1130 ret <4 x i64> %shuffle 1131} 1132 1133define <4 x i64> @shuffle_v4i64_15uu(<4 x i64> %a, <4 x i64> %b) { 1134; ALL-LABEL: shuffle_v4i64_15uu: 1135; ALL: # BB#0: 1136; ALL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1137; ALL-NEXT: retq 1138 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef> 1139 ret <4 x i64> %shuffle 1140} 1141 1142define <4 x i64> @shuffle_v4i64_11uu(<4 x i64> %a, <4 x i64> %b) { 1143; ALL-LABEL: shuffle_v4i64_11uu: 1144; ALL: # BB#0: 1145; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1146; ALL-NEXT: retq 1147 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef> 1148 ret <4 x i64> %shuffle 1149} 1150 1151define <4 x i64> @shuffle_v4i64_22uu(<4 x i64> %a, <4 x i64> %b) { 1152; AVX1-LABEL: shuffle_v4i64_22uu: 1153; AVX1: # BB#0: 1154; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1155; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1156; AVX1-NEXT: retq 1157; 1158; AVX2-LABEL: shuffle_v4i64_22uu: 1159; AVX2: # BB#0: 1160; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3] 1161; AVX2-NEXT: retq 1162; 1163; AVX512VL-LABEL: shuffle_v4i64_22uu: 1164; AVX512VL: # BB#0: 1165; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3] 1166; AVX512VL-NEXT: retq 1167 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef> 1168 ret <4 x i64> %shuffle 1169} 1170 1171define <4 x i64> @shuffle_v4i64_3333(<4 x i64> %a, <4 x i64> %b) { 1172; AVX1-LABEL: shuffle_v4i64_3333: 1173; AVX1: # BB#0: 1174; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] 1175; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 1176; AVX1-NEXT: retq 1177; 1178; AVX2-LABEL: shuffle_v4i64_3333: 1179; AVX2: # BB#0: 1180; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3] 1181; AVX2-NEXT: retq 1182; 1183; AVX512VL-LABEL: shuffle_v4i64_3333: 1184; AVX512VL: # BB#0: 1185; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3] 1186; AVX512VL-NEXT: retq 1187 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1188 ret <4 x i64> %shuffle 1189} 1190 1191define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) { 1192; ALL-LABEL: stress_test1: 1193; ALL: retq 1194 %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0> 1195 %d = shufflevector <4 x i64> %c, <4 x i64> undef, <4 x i32> <i32 3, i32 undef, i32 2, i32 undef> 1196 %e = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 undef> 1197 %f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0> 1198 1199 ret <4 x i64> %f 1200} 1201 1202define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) { 1203; ALL-LABEL: insert_reg_and_zero_v4i64: 1204; ALL: # BB#0: 1205; ALL-NEXT: vmovq %rdi, %xmm0 1206; ALL-NEXT: retq 1207 %v = insertelement <4 x i64> undef, i64 %a, i64 0 1208 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1209 ret <4 x i64> %shuffle 1210} 1211 1212define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) { 1213; ALL-LABEL: insert_mem_and_zero_v4i64: 1214; ALL: # BB#0: 1215; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 1216; ALL-NEXT: retq 1217 %a = load i64, i64* %ptr 1218 %v = insertelement <4 x i64> undef, i64 %a, i64 0 1219 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1220 ret <4 x i64> %shuffle 1221} 1222 1223define <4 x double> @insert_reg_and_zero_v4f64(double %a) { 1224; AVX1-LABEL: insert_reg_and_zero_v4f64: 1225; AVX1: # BB#0: 1226; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 1227; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1 1228; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 1229; AVX1-NEXT: retq 1230; 1231; AVX2-LABEL: insert_reg_and_zero_v4f64: 1232; AVX2: # BB#0: 1233; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 1234; AVX2-NEXT: vxorpd %ymm1, %ymm1, %ymm1 1235; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 1236; AVX2-NEXT: retq 1237; 1238; AVX512VL-LABEL: insert_reg_and_zero_v4f64: 1239; AVX512VL: # BB#0: 1240; AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1241; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1242; AVX512VL-NEXT: retq 1243 %v = insertelement <4 x double> undef, double %a, i32 0 1244 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1245 ret <4 x double> %shuffle 1246} 1247 1248define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) { 1249; ALL-LABEL: insert_mem_and_zero_v4f64: 1250; ALL: # BB#0: 1251; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1252; ALL-NEXT: retq 1253 %a = load double, double* %ptr 1254 %v = insertelement <4 x double> undef, double %a, i32 0 1255 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1256 ret <4 x double> %shuffle 1257} 1258 1259define <4 x double> @splat_mem_v4f64(double* %ptr) { 1260; ALL-LABEL: splat_mem_v4f64: 1261; ALL: # BB#0: 1262; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1263; ALL-NEXT: retq 1264 %a = load double, double* %ptr 1265 %v = insertelement <4 x double> undef, double %a, i32 0 1266 %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1267 ret <4 x double> %shuffle 1268} 1269 1270define <4 x i64> @splat_mem_v4i64(i64* %ptr) { 1271; AVX1-LABEL: splat_mem_v4i64: 1272; AVX1: # BB#0: 1273; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 1274; AVX1-NEXT: retq 1275; 1276; AVX2-LABEL: splat_mem_v4i64: 1277; AVX2: # BB#0: 1278; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 1279; AVX2-NEXT: retq 1280; 1281; AVX512VL-LABEL: splat_mem_v4i64: 1282; AVX512VL: # BB#0: 1283; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0 1284; AVX512VL-NEXT: retq 1285 %a = load i64, i64* %ptr 1286 %v = insertelement <4 x i64> undef, i64 %a, i64 0 1287 %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1288 ret <4 x i64> %shuffle 1289} 1290 1291define <4 x double> @splat_mem_v4f64_2(double* %p) { 1292; ALL-LABEL: splat_mem_v4f64_2: 1293; ALL: # BB#0: 1294; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1295; ALL-NEXT: retq 1296 %1 = load double, double* %p 1297 %2 = insertelement <2 x double> undef, double %1, i32 0 1298 %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer 1299 ret <4 x double> %3 1300} 1301 1302define <4 x double> @splat_v4f64(<2 x double> %r) { 1303; AVX1-LABEL: splat_v4f64: 1304; AVX1: # BB#0: 1305; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1306; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1307; AVX1-NEXT: retq 1308; 1309; AVX2-LABEL: splat_v4f64: 1310; AVX2: # BB#0: 1311; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 1312; AVX2-NEXT: retq 1313; 1314; AVX512VL-LABEL: splat_v4f64: 1315; AVX512VL: # BB#0: 1316; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0 1317; AVX512VL-NEXT: retq 1318 %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer 1319 ret <4 x double> %1 1320} 1321 1322define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) { 1323; AVX1-LABEL: splat_mem_v4i64_from_v2i64: 1324; AVX1: # BB#0: 1325; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 1326; AVX1-NEXT: retq 1327; 1328; AVX2-LABEL: splat_mem_v4i64_from_v2i64: 1329; AVX2: # BB#0: 1330; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 1331; AVX2-NEXT: retq 1332; 1333; AVX512VL-LABEL: splat_mem_v4i64_from_v2i64: 1334; AVX512VL: # BB#0: 1335; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0 1336; AVX512VL-NEXT: retq 1337 %v = load <2 x i64>, <2 x i64>* %ptr 1338 %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1339 ret <4 x i64> %shuffle 1340} 1341 1342define <4 x double> @splat_mem_v4f64_from_v2f64(<2 x double>* %ptr) { 1343; ALL-LABEL: splat_mem_v4f64_from_v2f64: 1344; ALL: # BB#0: 1345; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1346; ALL-NEXT: retq 1347 %v = load <2 x double>, <2 x double>* %ptr 1348 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1349 ret <4 x double> %shuffle 1350} 1351 1352define <4 x i64> @splat128_mem_v4i64_from_v2i64(<2 x i64>* %ptr) { 1353; AVX1-LABEL: splat128_mem_v4i64_from_v2i64: 1354; AVX1: # BB#0: 1355; AVX1-NEXT: vmovaps (%rdi), %xmm0 1356; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1357; AVX1-NEXT: retq 1358; 1359; AVX2-LABEL: splat128_mem_v4i64_from_v2i64: 1360; AVX2: # BB#0: 1361; AVX2-NEXT: vmovaps (%rdi), %xmm0 1362; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1363; AVX2-NEXT: retq 1364; 1365; AVX512VL-LABEL: splat128_mem_v4i64_from_v2i64: 1366; AVX512VL: # BB#0: 1367; AVX512VL-NEXT: vmovdqa64 (%rdi), %xmm0 1368; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 1369; AVX512VL-NEXT: retq 1370 %v = load <2 x i64>, <2 x i64>* %ptr 1371 %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1372 ret <4 x i64> %shuffle 1373} 1374 1375define <4 x double> @splat128_mem_v4f64_from_v2f64(<2 x double>* %ptr) { 1376; AVX1-LABEL: splat128_mem_v4f64_from_v2f64: 1377; AVX1: # BB#0: 1378; AVX1-NEXT: vmovaps (%rdi), %xmm0 1379; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1380; AVX1-NEXT: retq 1381; 1382; AVX2-LABEL: splat128_mem_v4f64_from_v2f64: 1383; AVX2: # BB#0: 1384; AVX2-NEXT: vmovaps (%rdi), %xmm0 1385; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1386; AVX2-NEXT: retq 1387; 1388; AVX512VL-LABEL: splat128_mem_v4f64_from_v2f64: 1389; AVX512VL: # BB#0: 1390; AVX512VL-NEXT: vmovapd (%rdi), %xmm0 1391; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 1392; AVX512VL-NEXT: retq 1393 %v = load <2 x double>, <2 x double>* %ptr 1394 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1395 ret <4 x double> %shuffle 1396} 1397 1398define <4 x double> @broadcast_v4f64_0000_from_v2i64(<2 x i64> %a0) { 1399; AVX1-LABEL: broadcast_v4f64_0000_from_v2i64: 1400; AVX1: # BB#0: 1401; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1402; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1403; AVX1-NEXT: retq 1404; 1405; AVX2-LABEL: broadcast_v4f64_0000_from_v2i64: 1406; AVX2: # BB#0: 1407; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 1408; AVX2-NEXT: retq 1409; 1410; AVX512VL-LABEL: broadcast_v4f64_0000_from_v2i64: 1411; AVX512VL: # BB#0: 1412; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0 1413; AVX512VL-NEXT: retq 1414 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1415 %2 = bitcast <4 x i64> %1 to <4 x double> 1416 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> zeroinitializer 1417 ret <4 x double> %3 1418} 1419 1420define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) { 1421; AVX1-LABEL: bitcast_v4f64_0426: 1422; AVX1: # BB#0: 1423; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1424; AVX1-NEXT: retq 1425; 1426; AVX2-LABEL: bitcast_v4f64_0426: 1427; AVX2: # BB#0: 1428; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1429; AVX2-NEXT: retq 1430; 1431; AVX512VL-LABEL: bitcast_v4f64_0426: 1432; AVX512VL: # BB#0: 1433; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1434; AVX512VL-NEXT: retq 1435 %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 1436 %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float> 1437 %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1438 %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16> 1439 %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13> 1440 %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double> 1441 ret <4 x double> %bitcast64 1442} 1443 1444define <4 x i64> @concat_v4i64_0167(<4 x i64> %a0, <4 x i64> %a1) { 1445; AVX1-LABEL: concat_v4i64_0167: 1446; AVX1: # BB#0: 1447; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 1448; AVX1-NEXT: retq 1449; 1450; AVX2-LABEL: concat_v4i64_0167: 1451; AVX2: # BB#0: 1452; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1453; AVX2-NEXT: retq 1454; 1455; AVX512VL-LABEL: concat_v4i64_0167: 1456; AVX512VL: # BB#0: 1457; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1458; AVX512VL-NEXT: retq 1459 %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1> 1460 %a1hi = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 6, i32 7> 1461 %shuffle64 = shufflevector <2 x i64> %a0lo, <2 x i64> %a1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1462 ret <4 x i64> %shuffle64 1463} 1464 1465define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) { 1466; AVX1-LABEL: concat_v4i64_0145_bc: 1467; AVX1: # BB#0: 1468; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1469; AVX1-NEXT: retq 1470; 1471; AVX2-LABEL: concat_v4i64_0145_bc: 1472; AVX2: # BB#0: 1473; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1474; AVX2-NEXT: retq 1475; 1476; AVX512VL-LABEL: concat_v4i64_0145_bc: 1477; AVX512VL: # BB#0: 1478; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 1479; AVX512VL-NEXT: retq 1480 %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1> 1481 %a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 4, i32 5> 1482 %bc0lo = bitcast <2 x i64> %a0lo to <4 x i32> 1483 %bc1lo = bitcast <2 x i64> %a1lo to <4 x i32> 1484 %shuffle32 = shufflevector <4 x i32> %bc0lo, <4 x i32> %bc1lo, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1485 %shuffle64 = bitcast <8 x i32> %shuffle32 to <4 x i64> 1486 ret <4 x i64> %shuffle64 1487} 1488 1489define <4 x i64> @insert_dup_mem_v4i64(i64* %ptr) { 1490; AVX1-LABEL: insert_dup_mem_v4i64: 1491; AVX1: # BB#0: 1492; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 1493; AVX1-NEXT: retq 1494; 1495; AVX2-LABEL: insert_dup_mem_v4i64: 1496; AVX2: # BB#0: 1497; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 1498; AVX2-NEXT: retq 1499; 1500; AVX512VL-LABEL: insert_dup_mem_v4i64: 1501; AVX512VL: # BB#0: 1502; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0 1503; AVX512VL-NEXT: retq 1504 %tmp = load i64, i64* %ptr, align 1 1505 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0 1506 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <4 x i32> zeroinitializer 1507 ret <4 x i64> %tmp2 1508} 1509