1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 3; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 4 5target triple = "x86_64-unknown-unknown" 6 7define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) { 8; AVX1-LABEL: shuffle_v8f32_00000000: 9; AVX1: # BB#0: 10; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 11; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 12; AVX1-NEXT: retq 13; 14; AVX2-LABEL: shuffle_v8f32_00000000: 15; AVX2: # BB#0: 16; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 17; AVX2-NEXT: retq 18 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19 ret <8 x float> %shuffle 20} 21 22define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) { 23; AVX1-LABEL: shuffle_v8f32_00000010: 24; AVX1: # BB#0: 25; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 26; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 27; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 28; AVX1-NEXT: retq 29; 30; AVX2-LABEL: shuffle_v8f32_00000010: 31; AVX2: # BB#0: 32; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0] 33; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 34; AVX2-NEXT: retq 35 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 36 ret <8 x float> %shuffle 37} 38 39define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) { 40; AVX1-LABEL: shuffle_v8f32_00000200: 41; AVX1: # BB#0: 42; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 43; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0] 44; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 45; AVX1-NEXT: retq 46; 47; AVX2-LABEL: shuffle_v8f32_00000200: 48; AVX2: # BB#0: 49; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0] 50; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 51; AVX2-NEXT: retq 52 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 53 ret <8 x float> %shuffle 54} 55 56define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) { 57; AVX1-LABEL: shuffle_v8f32_00003000: 58; AVX1: # BB#0: 59; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 60; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0] 61; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 62; AVX1-NEXT: retq 63; 64; AVX2-LABEL: shuffle_v8f32_00003000: 65; AVX2: # BB#0: 66; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0] 67; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 68; AVX2-NEXT: retq 69 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 70 ret <8 x float> %shuffle 71} 72 73define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) { 74; AVX1-LABEL: shuffle_v8f32_00040000: 75; AVX1: # BB#0: 76; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 77; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4] 78; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7] 79; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7] 80; AVX1-NEXT: retq 81; 82; AVX2-LABEL: shuffle_v8f32_00040000: 83; AVX2: # BB#0: 84; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] 85; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 86; AVX2-NEXT: retq 87 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 88 ret <8 x float> %shuffle 89} 90 91define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) { 92; AVX1-LABEL: shuffle_v8f32_00500000: 93; AVX1: # BB#0: 94; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 95; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 96; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4] 97; AVX1-NEXT: retq 98; 99; AVX2-LABEL: shuffle_v8f32_00500000: 100; AVX2: # BB#0: 101; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] 102; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 103; AVX2-NEXT: retq 104 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 105 ret <8 x float> %shuffle 106} 107 108define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) { 109; AVX1-LABEL: shuffle_v8f32_06000000: 110; AVX1: # BB#0: 111; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 112; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 113; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] 114; AVX1-NEXT: retq 115; 116; AVX2-LABEL: shuffle_v8f32_06000000: 117; AVX2: # BB#0: 118; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] 119; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 120; AVX2-NEXT: retq 121 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 122 ret <8 x float> %shuffle 123} 124 125define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) { 126; AVX1-LABEL: shuffle_v8f32_70000000: 127; AVX1: # BB#0: 128; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 129; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 130; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] 131; AVX1-NEXT: retq 132; 133; AVX2-LABEL: shuffle_v8f32_70000000: 134; AVX2: # BB#0: 135; AVX2-NEXT: movl $7, %eax 136; AVX2-NEXT: vmovd %eax, %xmm1 137; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 138; AVX2-NEXT: retq 139 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 140 ret <8 x float> %shuffle 141} 142 143define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) { 144; ALL-LABEL: shuffle_v8f32_01014545: 145; ALL: # BB#0: 146; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 147; ALL-NEXT: retq 148 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 149 ret <8 x float> %shuffle 150} 151 152define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) { 153; AVX1-LABEL: shuffle_v8f32_00112233: 154; AVX1: # BB#0: 155; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1] 156; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3] 157; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 158; AVX1-NEXT: retq 159; 160; AVX2-LABEL: shuffle_v8f32_00112233: 161; AVX2: # BB#0: 162; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] 163; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 164; AVX2-NEXT: retq 165 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 166 ret <8 x float> %shuffle 167} 168 169define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) { 170; AVX1-LABEL: shuffle_v8f32_00001111: 171; AVX1: # BB#0: 172; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 173; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 174; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 175; AVX1-NEXT: retq 176; 177; AVX2-LABEL: shuffle_v8f32_00001111: 178; AVX2: # BB#0: 179; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1] 180; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 181; AVX2-NEXT: retq 182 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 183 ret <8 x float> %shuffle 184} 185 186define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) { 187; ALL-LABEL: shuffle_v8f32_81a3c5e7: 188; ALL: # BB#0: 189; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 190; ALL-NEXT: retq 191 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 192 ret <8 x float> %shuffle 193} 194 195define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) { 196; AVX1-LABEL: shuffle_v8f32_08080808: 197; AVX1: # BB#0: 198; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0] 199; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 200; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 201; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 202; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 203; AVX1-NEXT: retq 204; 205; AVX2-LABEL: shuffle_v8f32_08080808: 206; AVX2: # BB#0: 207; AVX2-NEXT: vbroadcastss %xmm1, %ymm1 208; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 209; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 210; AVX2-NEXT: retq 211 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8> 212 ret <8 x float> %shuffle 213} 214 215define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) { 216; ALL-LABEL: shuffle_v8f32_08084c4c: 217; ALL: # BB#0: 218; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 219; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 220; ALL-NEXT: retq 221 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12> 222 ret <8 x float> %shuffle 223} 224 225define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) { 226; ALL-LABEL: shuffle_v8f32_8823cc67: 227; ALL: # BB#0: 228; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7] 229; ALL-NEXT: retq 230 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7> 231 ret <8 x float> %shuffle 232} 233 234define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) { 235; ALL-LABEL: shuffle_v8f32_9832dc76: 236; ALL: # BB#0: 237; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6] 238; ALL-NEXT: retq 239 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6> 240 ret <8 x float> %shuffle 241} 242 243define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) { 244; ALL-LABEL: shuffle_v8f32_9810dc54: 245; ALL: # BB#0: 246; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4] 247; ALL-NEXT: retq 248 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4> 249 ret <8 x float> %shuffle 250} 251 252define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) { 253; ALL-LABEL: shuffle_v8f32_08194c5d: 254; ALL: # BB#0: 255; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 256; ALL-NEXT: retq 257 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 258 ret <8 x float> %shuffle 259} 260 261define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) { 262; ALL-LABEL: shuffle_v8f32_2a3b6e7f: 263; ALL: # BB#0: 264; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 265; ALL-NEXT: retq 266 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 267 ret <8 x float> %shuffle 268} 269 270define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) { 271; AVX1-LABEL: shuffle_v8f32_08192a3b: 272; AVX1: # BB#0: 273; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 274; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 275; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 276; AVX1-NEXT: retq 277; 278; AVX2-LABEL: shuffle_v8f32_08192a3b: 279; AVX2: # BB#0: 280; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3> 281; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 282; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u> 283; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 284; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 285; AVX2-NEXT: retq 286 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 287 ret <8 x float> %shuffle 288} 289 290define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) { 291; AVX1-LABEL: shuffle_v8f32_08991abb: 292; AVX1: # BB#0: 293; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0] 294; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1] 295; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 296; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] 297; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 298; AVX1-NEXT: retq 299; 300; AVX2-LABEL: shuffle_v8f32_08991abb: 301; AVX2: # BB#0: 302; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 303; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 304; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3> 305; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 306; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 307; AVX2-NEXT: retq 308 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11> 309 ret <8 x float> %shuffle 310} 311 312define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) { 313; AVX1-LABEL: shuffle_v8f32_091b2d3f: 314; AVX1: # BB#0: 315; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 316; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] 317; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 318; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 319; AVX1-NEXT: retq 320; 321; AVX2-LABEL: shuffle_v8f32_091b2d3f: 322; AVX2: # BB#0: 323; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u> 324; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 325; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 326; AVX2-NEXT: retq 327 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 328 ret <8 x float> %shuffle 329} 330 331define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) { 332; AVX1-LABEL: shuffle_v8f32_09ab1def: 333; AVX1: # BB#0: 334; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 335; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 336; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 337; AVX1-NEXT: retq 338; 339; AVX2-LABEL: shuffle_v8f32_09ab1def: 340; AVX2: # BB#0: 341; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 342; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 343; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 344; AVX2-NEXT: retq 345 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 346 ret <8 x float> %shuffle 347} 348 349define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) { 350; ALL-LABEL: shuffle_v8f32_00014445: 351; ALL: # BB#0: 352; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 353; ALL-NEXT: retq 354 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5> 355 ret <8 x float> %shuffle 356} 357 358define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) { 359; ALL-LABEL: shuffle_v8f32_00204464: 360; ALL: # BB#0: 361; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 362; ALL-NEXT: retq 363 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4> 364 ret <8 x float> %shuffle 365} 366 367define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) { 368; ALL-LABEL: shuffle_v8f32_03004744: 369; ALL: # BB#0: 370; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 371; ALL-NEXT: retq 372 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4> 373 ret <8 x float> %shuffle 374} 375 376define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) { 377; ALL-LABEL: shuffle_v8f32_10005444: 378; ALL: # BB#0: 379; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 380; ALL-NEXT: retq 381 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4> 382 ret <8 x float> %shuffle 383} 384 385define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) { 386; ALL-LABEL: shuffle_v8f32_22006644: 387; ALL: # BB#0: 388; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 389; ALL-NEXT: retq 390 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4> 391 ret <8 x float> %shuffle 392} 393 394define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) { 395; ALL-LABEL: shuffle_v8f32_33307774: 396; ALL: # BB#0: 397; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 398; ALL-NEXT: retq 399 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4> 400 ret <8 x float> %shuffle 401} 402 403define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) { 404; ALL-LABEL: shuffle_v8f32_32107654: 405; ALL: # BB#0: 406; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 407; ALL-NEXT: retq 408 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 409 ret <8 x float> %shuffle 410} 411 412define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) { 413; ALL-LABEL: shuffle_v8f32_00234467: 414; ALL: # BB#0: 415; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 416; ALL-NEXT: retq 417 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7> 418 ret <8 x float> %shuffle 419} 420 421define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) { 422; ALL-LABEL: shuffle_v8f32_00224466: 423; ALL: # BB#0: 424; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 425; ALL-NEXT: retq 426 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 427 ret <8 x float> %shuffle 428} 429 430define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) { 431; ALL-LABEL: shuffle_v8f32_10325476: 432; ALL: # BB#0: 433; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 434; ALL-NEXT: retq 435 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 436 ret <8 x float> %shuffle 437} 438 439define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) { 440; ALL-LABEL: shuffle_v8f32_11335577: 441; ALL: # BB#0: 442; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 443; ALL-NEXT: retq 444 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 445 ret <8 x float> %shuffle 446} 447 448define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) { 449; ALL-LABEL: shuffle_v8f32_10235467: 450; ALL: # BB#0: 451; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 452; ALL-NEXT: retq 453 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 454 ret <8 x float> %shuffle 455} 456 457define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) { 458; ALL-LABEL: shuffle_v8f32_10225466: 459; ALL: # BB#0: 460; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 461; ALL-NEXT: retq 462 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6> 463 ret <8 x float> %shuffle 464} 465 466define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) { 467; ALL-LABEL: shuffle_v8f32_00015444: 468; ALL: # BB#0: 469; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4] 470; ALL-NEXT: retq 471 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> 472 ret <8 x float> %shuffle 473} 474 475define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) { 476; ALL-LABEL: shuffle_v8f32_00204644: 477; ALL: # BB#0: 478; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4] 479; ALL-NEXT: retq 480 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> 481 ret <8 x float> %shuffle 482} 483 484define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) { 485; ALL-LABEL: shuffle_v8f32_03004474: 486; ALL: # BB#0: 487; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4] 488; ALL-NEXT: retq 489 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> 490 ret <8 x float> %shuffle 491} 492 493define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) { 494; ALL-LABEL: shuffle_v8f32_10004444: 495; ALL: # BB#0: 496; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4] 497; ALL-NEXT: retq 498 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 499 ret <8 x float> %shuffle 500} 501 502define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) { 503; ALL-LABEL: shuffle_v8f32_22006446: 504; ALL: # BB#0: 505; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6] 506; ALL-NEXT: retq 507 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> 508 ret <8 x float> %shuffle 509} 510 511define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) { 512; ALL-LABEL: shuffle_v8f32_33307474: 513; ALL: # BB#0: 514; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4] 515; ALL-NEXT: retq 516 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> 517 ret <8 x float> %shuffle 518} 519 520define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) { 521; ALL-LABEL: shuffle_v8f32_32104567: 522; ALL: # BB#0: 523; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7] 524; ALL-NEXT: retq 525 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 526 ret <8 x float> %shuffle 527} 528 529define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) { 530; ALL-LABEL: shuffle_v8f32_00236744: 531; ALL: # BB#0: 532; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4] 533; ALL-NEXT: retq 534 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> 535 ret <8 x float> %shuffle 536} 537 538define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) { 539; ALL-LABEL: shuffle_v8f32_00226644: 540; ALL: # BB#0: 541; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4] 542; ALL-NEXT: retq 543 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> 544 ret <8 x float> %shuffle 545} 546 547define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) { 548; ALL-LABEL: shuffle_v8f32_10324567: 549; ALL: # BB#0: 550; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7] 551; ALL-NEXT: retq 552 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 553 ret <8 x float> %shuffle 554} 555 556define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) { 557; ALL-LABEL: shuffle_v8f32_11334567: 558; ALL: # BB#0: 559; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7] 560; ALL-NEXT: retq 561 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> 562 ret <8 x float> %shuffle 563} 564 565define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) { 566; ALL-LABEL: shuffle_v8f32_01235467: 567; ALL: # BB#0: 568; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7] 569; ALL-NEXT: retq 570 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 571 ret <8 x float> %shuffle 572} 573 574define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) { 575; ALL-LABEL: shuffle_v8f32_01235466: 576; ALL: # BB#0: 577; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6] 578; ALL-NEXT: retq 579 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> 580 ret <8 x float> %shuffle 581} 582 583define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) { 584; ALL-LABEL: shuffle_v8f32_002u6u44: 585; ALL: # BB#0: 586; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4] 587; ALL-NEXT: retq 588 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> 589 ret <8 x float> %shuffle 590} 591 592define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) { 593; ALL-LABEL: shuffle_v8f32_00uu66uu: 594; ALL: # BB#0: 595; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u] 596; ALL-NEXT: retq 597 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> 598 ret <8 x float> %shuffle 599} 600 601define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) { 602; ALL-LABEL: shuffle_v8f32_103245uu: 603; ALL: # BB#0: 604; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u] 605; ALL-NEXT: retq 606 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> 607 ret <8 x float> %shuffle 608} 609 610define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) { 611; ALL-LABEL: shuffle_v8f32_1133uu67: 612; ALL: # BB#0: 613; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7] 614; ALL-NEXT: retq 615 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> 616 ret <8 x float> %shuffle 617} 618 619define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) { 620; ALL-LABEL: shuffle_v8f32_0uu354uu: 621; ALL: # BB#0: 622; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u] 623; ALL-NEXT: retq 624 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> 625 ret <8 x float> %shuffle 626} 627 628define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) { 629; ALL-LABEL: shuffle_v8f32_uuu3uu66: 630; ALL: # BB#0: 631; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6] 632; ALL-NEXT: retq 633 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> 634 ret <8 x float> %shuffle 635} 636 637define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) { 638; AVX1-LABEL: shuffle_v8f32_c348cda0: 639; AVX1: # BB#0: 640; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 641; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4] 642; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 643; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4] 644; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3] 645; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 646; AVX1-NEXT: retq 647; 648; AVX2-LABEL: shuffle_v8f32_c348cda0: 649; AVX2: # BB#0: 650; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0> 651; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 652; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u> 653; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 654; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 655; AVX2-NEXT: retq 656 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0> 657 ret <8 x float> %shuffle 658} 659 660define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) { 661; AVX1-LABEL: shuffle_v8f32_f511235a: 662; AVX1: # BB#0: 663; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 664; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2] 665; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5] 666; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3] 667; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] 668; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6] 669; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 670; AVX1-NEXT: retq 671; 672; AVX2-LABEL: shuffle_v8f32_f511235a: 673; AVX2: # BB#0: 674; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2> 675; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 676; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u> 677; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 678; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 679; AVX2-NEXT: retq 680 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10> 681 ret <8 x float> %shuffle 682} 683 684define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) { 685; AVX1-LABEL: shuffle_v8f32_32103210: 686; AVX1: # BB#0: 687; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 688; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 689; AVX1-NEXT: retq 690; 691; AVX2-LABEL: shuffle_v8f32_32103210: 692; AVX2: # BB#0: 693; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0] 694; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 695; AVX2-NEXT: retq 696 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0> 697 ret <8 x float> %shuffle 698} 699 700define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) { 701; AVX1-LABEL: shuffle_v8f32_76547654: 702; AVX1: # BB#0: 703; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 704; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 705; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 706; AVX1-NEXT: retq 707; 708; AVX2-LABEL: shuffle_v8f32_76547654: 709; AVX2: # BB#0: 710; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 711; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 712; AVX2-NEXT: retq 713 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 714 ret <8 x float> %shuffle 715} 716 717define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) { 718; AVX1-LABEL: shuffle_v8f32_76543210: 719; AVX1: # BB#0: 720; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 721; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 722; AVX1-NEXT: retq 723; 724; AVX2-LABEL: shuffle_v8f32_76543210: 725; AVX2: # BB#0: 726; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 727; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 728; AVX2-NEXT: retq 729 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 730 ret <8 x float> %shuffle 731} 732 733define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) { 734; ALL-LABEL: shuffle_v8f32_3210ba98: 735; ALL: # BB#0: 736; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 737; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 738; ALL-NEXT: retq 739 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8> 740 ret <8 x float> %shuffle 741} 742 743define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) { 744; ALL-LABEL: shuffle_v8f32_3210fedc: 745; ALL: # BB#0: 746; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 747; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 748; ALL-NEXT: retq 749 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12> 750 ret <8 x float> %shuffle 751} 752 753define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) { 754; ALL-LABEL: shuffle_v8f32_7654fedc: 755; ALL: # BB#0: 756; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 757; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 758; ALL-NEXT: retq 759 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 760 ret <8 x float> %shuffle 761} 762 763define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) { 764; ALL-LABEL: shuffle_v8f32_fedc7654: 765; ALL: # BB#0: 766; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 767; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 768; ALL-NEXT: retq 769 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> 770 ret <8 x float> %shuffle 771} 772 773define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) { 774; AVX1-LABEL: PR21138: 775; AVX1: # BB#0: 776; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 777; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] 778; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 779; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 780; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 781; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 782; AVX1-NEXT: retq 783; 784; AVX2-LABEL: PR21138: 785; AVX2: # BB#0: 786; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,u,u,1,3,5,7> 787; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 788; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <1,3,5,7,u,u,u,u> 789; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 790; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 791; AVX2-NEXT: retq 792 %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 793 ret <8 x float> %shuffle 794} 795 796define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) { 797; ALL-LABEL: shuffle_v8f32_ba987654: 798; ALL: # BB#0: 799; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 800; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 801; ALL-NEXT: retq 802 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 803 ret <8 x float> %shuffle 804} 805 806define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) { 807; ALL-LABEL: shuffle_v8f32_ba983210: 808; ALL: # BB#0: 809; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 810; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 811; ALL-NEXT: retq 812 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 813 ret <8 x float> %shuffle 814} 815 816define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) { 817; ALL-LABEL: shuffle_v8f32_80u1c4u5: 818; ALL: # BB#0: 819; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 820; ALL-NEXT: retq 821 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5> 822 ret <8 x float> %shuffle 823} 824 825define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) { 826; ALL-LABEL: shuffle_v8f32_a2u3e6f7: 827; ALL: # BB#0: 828; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7] 829; ALL-NEXT: retq 830 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7> 831 ret <8 x float> %shuffle 832} 833 834define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) { 835; AVX1-LABEL: shuffle_v8f32_uuuu1111: 836; AVX1: # BB#0: 837; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 838; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 839; AVX1-NEXT: retq 840; 841; AVX2-LABEL: shuffle_v8f32_uuuu1111: 842; AVX2: # BB#0: 843; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 844; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 845; AVX2-NEXT: retq 846 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1> 847 ret <8 x float> %shuffle 848} 849 850define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) { 851; AVX1-LABEL: shuffle_v8f32_44444444: 852; AVX1: # BB#0: 853; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 854; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 855; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 856; AVX1-NEXT: retq 857; 858; AVX2-LABEL: shuffle_v8f32_44444444: 859; AVX2: # BB#0: 860; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 861; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 862; AVX2-NEXT: retq 863 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 864 ret <8 x float> %shuffle 865} 866 867define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) { 868; AVX1-LABEL: shuffle_v8f32_5555uuuu: 869; AVX1: # BB#0: 870; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 871; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 872; AVX1-NEXT: retq 873; 874; AVX2-LABEL: shuffle_v8f32_5555uuuu: 875; AVX2: # BB#0: 876; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 877; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 878; AVX2-NEXT: retq 879 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef> 880 ret <8 x float> %shuffle 881} 882 883define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) { 884; AVX1-LABEL: shuffle_v8i32_00000000: 885; AVX1: # BB#0: 886; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 887; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 888; AVX1-NEXT: retq 889; 890; AVX2-LABEL: shuffle_v8i32_00000000: 891; AVX2: # BB#0: 892; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 893; AVX2-NEXT: retq 894 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 895 ret <8 x i32> %shuffle 896} 897 898define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) { 899; AVX1-LABEL: shuffle_v8i32_00000010: 900; AVX1: # BB#0: 901; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 902; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 903; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 904; AVX1-NEXT: retq 905; 906; AVX2-LABEL: shuffle_v8i32_00000010: 907; AVX2: # BB#0: 908; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0] 909; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 910; AVX2-NEXT: retq 911 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 912 ret <8 x i32> %shuffle 913} 914 915define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) { 916; AVX1-LABEL: shuffle_v8i32_00000200: 917; AVX1: # BB#0: 918; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 919; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0] 920; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 921; AVX1-NEXT: retq 922; 923; AVX2-LABEL: shuffle_v8i32_00000200: 924; AVX2: # BB#0: 925; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0] 926; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 927; AVX2-NEXT: retq 928 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 929 ret <8 x i32> %shuffle 930} 931 932define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) { 933; AVX1-LABEL: shuffle_v8i32_00003000: 934; AVX1: # BB#0: 935; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 936; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0] 937; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 938; AVX1-NEXT: retq 939; 940; AVX2-LABEL: shuffle_v8i32_00003000: 941; AVX2: # BB#0: 942; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0] 943; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 944; AVX2-NEXT: retq 945 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 946 ret <8 x i32> %shuffle 947} 948 949define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) { 950; AVX1-LABEL: shuffle_v8i32_00040000: 951; AVX1: # BB#0: 952; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 953; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4] 954; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7] 955; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7] 956; AVX1-NEXT: retq 957; 958; AVX2-LABEL: shuffle_v8i32_00040000: 959; AVX2: # BB#0: 960; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] 961; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 962; AVX2-NEXT: retq 963 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 964 ret <8 x i32> %shuffle 965} 966 967define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) { 968; AVX1-LABEL: shuffle_v8i32_00500000: 969; AVX1: # BB#0: 970; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 971; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 972; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4] 973; AVX1-NEXT: retq 974; 975; AVX2-LABEL: shuffle_v8i32_00500000: 976; AVX2: # BB#0: 977; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] 978; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 979; AVX2-NEXT: retq 980 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 981 ret <8 x i32> %shuffle 982} 983 984define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) { 985; AVX1-LABEL: shuffle_v8i32_06000000: 986; AVX1: # BB#0: 987; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 988; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 989; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] 990; AVX1-NEXT: retq 991; 992; AVX2-LABEL: shuffle_v8i32_06000000: 993; AVX2: # BB#0: 994; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] 995; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 996; AVX2-NEXT: retq 997 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 998 ret <8 x i32> %shuffle 999} 1000 1001define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) { 1002; AVX1-LABEL: shuffle_v8i32_70000000: 1003; AVX1: # BB#0: 1004; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 1005; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 1006; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] 1007; AVX1-NEXT: retq 1008; 1009; AVX2-LABEL: shuffle_v8i32_70000000: 1010; AVX2: # BB#0: 1011; AVX2-NEXT: movl $7, %eax 1012; AVX2-NEXT: vmovd %eax, %xmm1 1013; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1014; AVX2-NEXT: retq 1015 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1016 ret <8 x i32> %shuffle 1017} 1018 1019define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) { 1020; AVX1-LABEL: shuffle_v8i32_01014545: 1021; AVX1: # BB#0: 1022; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 1023; AVX1-NEXT: retq 1024; 1025; AVX2-LABEL: shuffle_v8i32_01014545: 1026; AVX2: # BB#0: 1027; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 1028; AVX2-NEXT: retq 1029 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 1030 ret <8 x i32> %shuffle 1031} 1032 1033define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) { 1034; AVX1-LABEL: shuffle_v8i32_00112233: 1035; AVX1: # BB#0: 1036; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1] 1037; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 1038; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1039; AVX1-NEXT: retq 1040; 1041; AVX2-LABEL: shuffle_v8i32_00112233: 1042; AVX2: # BB#0: 1043; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] 1044; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1045; AVX2-NEXT: retq 1046 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 1047 ret <8 x i32> %shuffle 1048} 1049 1050define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) { 1051; AVX1-LABEL: shuffle_v8i32_00001111: 1052; AVX1: # BB#0: 1053; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 1054; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1055; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1056; AVX1-NEXT: retq 1057; 1058; AVX2-LABEL: shuffle_v8i32_00001111: 1059; AVX2: # BB#0: 1060; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1] 1061; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1062; AVX2-NEXT: retq 1063 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 1064 ret <8 x i32> %shuffle 1065} 1066 1067define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) { 1068; AVX1-LABEL: shuffle_v8i32_81a3c5e7: 1069; AVX1: # BB#0: 1070; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 1071; AVX1-NEXT: retq 1072; 1073; AVX2-LABEL: shuffle_v8i32_81a3c5e7: 1074; AVX2: # BB#0: 1075; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 1076; AVX2-NEXT: retq 1077 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 1078 ret <8 x i32> %shuffle 1079} 1080 1081define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) { 1082; AVX1-LABEL: shuffle_v8i32_08080808: 1083; AVX1: # BB#0: 1084; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0] 1085; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 1086; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1087; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1088; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1089; AVX1-NEXT: retq 1090; 1091; AVX2-LABEL: shuffle_v8i32_08080808: 1092; AVX2: # BB#0: 1093; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 1094; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 1095; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1096; AVX2-NEXT: retq 1097 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8> 1098 ret <8 x i32> %shuffle 1099} 1100 1101define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) { 1102; AVX1-LABEL: shuffle_v8i32_08084c4c: 1103; AVX1: # BB#0: 1104; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 1105; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1106; AVX1-NEXT: retq 1107; 1108; AVX2-LABEL: shuffle_v8i32_08084c4c: 1109; AVX2: # BB#0: 1110; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4] 1111; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 1112; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1113; AVX2-NEXT: retq 1114 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12> 1115 ret <8 x i32> %shuffle 1116} 1117 1118define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) { 1119; AVX1-LABEL: shuffle_v8i32_8823cc67: 1120; AVX1: # BB#0: 1121; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7] 1122; AVX1-NEXT: retq 1123; 1124; AVX2-LABEL: shuffle_v8i32_8823cc67: 1125; AVX2: # BB#0: 1126; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7] 1127; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1128; AVX2-NEXT: retq 1129 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7> 1130 ret <8 x i32> %shuffle 1131} 1132 1133define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) { 1134; AVX1-LABEL: shuffle_v8i32_9832dc76: 1135; AVX1: # BB#0: 1136; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6] 1137; AVX1-NEXT: retq 1138; 1139; AVX2-LABEL: shuffle_v8i32_9832dc76: 1140; AVX2: # BB#0: 1141; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1142; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1143; AVX2-NEXT: retq 1144 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6> 1145 ret <8 x i32> %shuffle 1146} 1147 1148define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) { 1149; AVX1-LABEL: shuffle_v8i32_9810dc54: 1150; AVX1: # BB#0: 1151; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4] 1152; AVX1-NEXT: retq 1153; 1154; AVX2-LABEL: shuffle_v8i32_9810dc54: 1155; AVX2: # BB#0: 1156; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4] 1157; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7] 1158; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1159; AVX2-NEXT: retq 1160 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4> 1161 ret <8 x i32> %shuffle 1162} 1163 1164define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) { 1165; AVX1-LABEL: shuffle_v8i32_08194c5d: 1166; AVX1: # BB#0: 1167; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1168; AVX1-NEXT: retq 1169; 1170; AVX2-LABEL: shuffle_v8i32_08194c5d: 1171; AVX2: # BB#0: 1172; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1173; AVX2-NEXT: retq 1174 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 1175 ret <8 x i32> %shuffle 1176} 1177 1178define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) { 1179; AVX1-LABEL: shuffle_v8i32_2a3b6e7f: 1180; AVX1: # BB#0: 1181; AVX1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1182; AVX1-NEXT: retq 1183; 1184; AVX2-LABEL: shuffle_v8i32_2a3b6e7f: 1185; AVX2: # BB#0: 1186; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1187; AVX2-NEXT: retq 1188 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1189 ret <8 x i32> %shuffle 1190} 1191 1192define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) { 1193; AVX1-LABEL: shuffle_v8i32_08192a3b: 1194; AVX1: # BB#0: 1195; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1196; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1197; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1198; AVX1-NEXT: retq 1199; 1200; AVX2-LABEL: shuffle_v8i32_08192a3b: 1201; AVX2: # BB#0: 1202; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3> 1203; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1204; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1205; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1206; AVX2-NEXT: retq 1207 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 1208 ret <8 x i32> %shuffle 1209} 1210 1211define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) { 1212; AVX1-LABEL: shuffle_v8i32_08991abb: 1213; AVX1: # BB#0: 1214; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0] 1215; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1] 1216; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1217; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] 1218; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1219; AVX1-NEXT: retq 1220; 1221; AVX2-LABEL: shuffle_v8i32_08991abb: 1222; AVX2: # BB#0: 1223; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 1224; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 1225; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3> 1226; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1227; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1228; AVX2-NEXT: retq 1229 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11> 1230 ret <8 x i32> %shuffle 1231} 1232 1233define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) { 1234; AVX1-LABEL: shuffle_v8i32_091b2d3f: 1235; AVX1: # BB#0: 1236; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 1237; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] 1238; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1239; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1240; AVX1-NEXT: retq 1241; 1242; AVX2-LABEL: shuffle_v8i32_091b2d3f: 1243; AVX2: # BB#0: 1244; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1245; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1246; AVX2-NEXT: retq 1247 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 1248 ret <8 x i32> %shuffle 1249} 1250 1251define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) { 1252; AVX1-LABEL: shuffle_v8i32_09ab1def: 1253; AVX1: # BB#0: 1254; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 1255; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1256; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1257; AVX1-NEXT: retq 1258; 1259; AVX2-LABEL: shuffle_v8i32_09ab1def: 1260; AVX2: # BB#0: 1261; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 1262; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 1263; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1264; AVX2-NEXT: retq 1265 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 1266 ret <8 x i32> %shuffle 1267} 1268 1269define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) { 1270; AVX1-LABEL: shuffle_v8i32_00014445: 1271; AVX1: # BB#0: 1272; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 1273; AVX1-NEXT: retq 1274; 1275; AVX2-LABEL: shuffle_v8i32_00014445: 1276; AVX2: # BB#0: 1277; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 1278; AVX2-NEXT: retq 1279 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5> 1280 ret <8 x i32> %shuffle 1281} 1282 1283define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) { 1284; AVX1-LABEL: shuffle_v8i32_00204464: 1285; AVX1: # BB#0: 1286; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 1287; AVX1-NEXT: retq 1288; 1289; AVX2-LABEL: shuffle_v8i32_00204464: 1290; AVX2: # BB#0: 1291; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 1292; AVX2-NEXT: retq 1293 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4> 1294 ret <8 x i32> %shuffle 1295} 1296 1297define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) { 1298; AVX1-LABEL: shuffle_v8i32_03004744: 1299; AVX1: # BB#0: 1300; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 1301; AVX1-NEXT: retq 1302; 1303; AVX2-LABEL: shuffle_v8i32_03004744: 1304; AVX2: # BB#0: 1305; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 1306; AVX2-NEXT: retq 1307 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4> 1308 ret <8 x i32> %shuffle 1309} 1310 1311define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) { 1312; AVX1-LABEL: shuffle_v8i32_10005444: 1313; AVX1: # BB#0: 1314; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 1315; AVX1-NEXT: retq 1316; 1317; AVX2-LABEL: shuffle_v8i32_10005444: 1318; AVX2: # BB#0: 1319; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 1320; AVX2-NEXT: retq 1321 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4> 1322 ret <8 x i32> %shuffle 1323} 1324 1325define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) { 1326; AVX1-LABEL: shuffle_v8i32_22006644: 1327; AVX1: # BB#0: 1328; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 1329; AVX1-NEXT: retq 1330; 1331; AVX2-LABEL: shuffle_v8i32_22006644: 1332; AVX2: # BB#0: 1333; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 1334; AVX2-NEXT: retq 1335 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4> 1336 ret <8 x i32> %shuffle 1337} 1338 1339define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) { 1340; AVX1-LABEL: shuffle_v8i32_33307774: 1341; AVX1: # BB#0: 1342; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 1343; AVX1-NEXT: retq 1344; 1345; AVX2-LABEL: shuffle_v8i32_33307774: 1346; AVX2: # BB#0: 1347; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 1348; AVX2-NEXT: retq 1349 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4> 1350 ret <8 x i32> %shuffle 1351} 1352 1353define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) { 1354; AVX1-LABEL: shuffle_v8i32_32107654: 1355; AVX1: # BB#0: 1356; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1357; AVX1-NEXT: retq 1358; 1359; AVX2-LABEL: shuffle_v8i32_32107654: 1360; AVX2: # BB#0: 1361; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1362; AVX2-NEXT: retq 1363 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1364 ret <8 x i32> %shuffle 1365} 1366 1367define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) { 1368; AVX1-LABEL: shuffle_v8i32_00234467: 1369; AVX1: # BB#0: 1370; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 1371; AVX1-NEXT: retq 1372; 1373; AVX2-LABEL: shuffle_v8i32_00234467: 1374; AVX2: # BB#0: 1375; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 1376; AVX2-NEXT: retq 1377 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7> 1378 ret <8 x i32> %shuffle 1379} 1380 1381define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) { 1382; AVX1-LABEL: shuffle_v8i32_00224466: 1383; AVX1: # BB#0: 1384; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1385; AVX1-NEXT: retq 1386; 1387; AVX2-LABEL: shuffle_v8i32_00224466: 1388; AVX2: # BB#0: 1389; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1390; AVX2-NEXT: retq 1391 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 1392 ret <8 x i32> %shuffle 1393} 1394 1395define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) { 1396; AVX1-LABEL: shuffle_v8i32_10325476: 1397; AVX1: # BB#0: 1398; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1399; AVX1-NEXT: retq 1400; 1401; AVX2-LABEL: shuffle_v8i32_10325476: 1402; AVX2: # BB#0: 1403; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1404; AVX2-NEXT: retq 1405 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 1406 ret <8 x i32> %shuffle 1407} 1408 1409define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) { 1410; AVX1-LABEL: shuffle_v8i32_11335577: 1411; AVX1: # BB#0: 1412; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1413; AVX1-NEXT: retq 1414; 1415; AVX2-LABEL: shuffle_v8i32_11335577: 1416; AVX2: # BB#0: 1417; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1418; AVX2-NEXT: retq 1419 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 1420 ret <8 x i32> %shuffle 1421} 1422 1423define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) { 1424; AVX1-LABEL: shuffle_v8i32_10235467: 1425; AVX1: # BB#0: 1426; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 1427; AVX1-NEXT: retq 1428; 1429; AVX2-LABEL: shuffle_v8i32_10235467: 1430; AVX2: # BB#0: 1431; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 1432; AVX2-NEXT: retq 1433 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 1434 ret <8 x i32> %shuffle 1435} 1436 1437define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) { 1438; AVX1-LABEL: shuffle_v8i32_10225466: 1439; AVX1: # BB#0: 1440; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 1441; AVX1-NEXT: retq 1442; 1443; AVX2-LABEL: shuffle_v8i32_10225466: 1444; AVX2: # BB#0: 1445; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 1446; AVX2-NEXT: retq 1447 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6> 1448 ret <8 x i32> %shuffle 1449} 1450 1451define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) { 1452; AVX1-LABEL: shuffle_v8i32_00015444: 1453; AVX1: # BB#0: 1454; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4] 1455; AVX1-NEXT: retq 1456; 1457; AVX2-LABEL: shuffle_v8i32_00015444: 1458; AVX2: # BB#0: 1459; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4] 1460; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1461; AVX2-NEXT: retq 1462 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> 1463 ret <8 x i32> %shuffle 1464} 1465 1466define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) { 1467; AVX1-LABEL: shuffle_v8i32_00204644: 1468; AVX1: # BB#0: 1469; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4] 1470; AVX1-NEXT: retq 1471; 1472; AVX2-LABEL: shuffle_v8i32_00204644: 1473; AVX2: # BB#0: 1474; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4] 1475; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1476; AVX2-NEXT: retq 1477 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> 1478 ret <8 x i32> %shuffle 1479} 1480 1481define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) { 1482; AVX1-LABEL: shuffle_v8i32_03004474: 1483; AVX1: # BB#0: 1484; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4] 1485; AVX1-NEXT: retq 1486; 1487; AVX2-LABEL: shuffle_v8i32_03004474: 1488; AVX2: # BB#0: 1489; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4] 1490; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1491; AVX2-NEXT: retq 1492 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> 1493 ret <8 x i32> %shuffle 1494} 1495 1496define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) { 1497; AVX1-LABEL: shuffle_v8i32_10004444: 1498; AVX1: # BB#0: 1499; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4] 1500; AVX1-NEXT: retq 1501; 1502; AVX2-LABEL: shuffle_v8i32_10004444: 1503; AVX2: # BB#0: 1504; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4] 1505; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1506; AVX2-NEXT: retq 1507 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 1508 ret <8 x i32> %shuffle 1509} 1510 1511define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) { 1512; AVX1-LABEL: shuffle_v8i32_22006446: 1513; AVX1: # BB#0: 1514; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6] 1515; AVX1-NEXT: retq 1516; 1517; AVX2-LABEL: shuffle_v8i32_22006446: 1518; AVX2: # BB#0: 1519; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6] 1520; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1521; AVX2-NEXT: retq 1522 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> 1523 ret <8 x i32> %shuffle 1524} 1525 1526define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) { 1527; AVX1-LABEL: shuffle_v8i32_33307474: 1528; AVX1: # BB#0: 1529; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4] 1530; AVX1-NEXT: retq 1531; 1532; AVX2-LABEL: shuffle_v8i32_33307474: 1533; AVX2: # BB#0: 1534; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4] 1535; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1536; AVX2-NEXT: retq 1537 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> 1538 ret <8 x i32> %shuffle 1539} 1540 1541define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) { 1542; AVX1-LABEL: shuffle_v8i32_32104567: 1543; AVX1: # BB#0: 1544; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7] 1545; AVX1-NEXT: retq 1546; 1547; AVX2-LABEL: shuffle_v8i32_32104567: 1548; AVX2: # BB#0: 1549; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7] 1550; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1551; AVX2-NEXT: retq 1552 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 1553 ret <8 x i32> %shuffle 1554} 1555 1556define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) { 1557; AVX1-LABEL: shuffle_v8i32_00236744: 1558; AVX1: # BB#0: 1559; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4] 1560; AVX1-NEXT: retq 1561; 1562; AVX2-LABEL: shuffle_v8i32_00236744: 1563; AVX2: # BB#0: 1564; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4] 1565; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1566; AVX2-NEXT: retq 1567 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> 1568 ret <8 x i32> %shuffle 1569} 1570 1571define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) { 1572; AVX1-LABEL: shuffle_v8i32_00226644: 1573; AVX1: # BB#0: 1574; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4] 1575; AVX1-NEXT: retq 1576; 1577; AVX2-LABEL: shuffle_v8i32_00226644: 1578; AVX2: # BB#0: 1579; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4] 1580; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1581; AVX2-NEXT: retq 1582 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> 1583 ret <8 x i32> %shuffle 1584} 1585 1586define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) { 1587; AVX1-LABEL: shuffle_v8i32_10324567: 1588; AVX1: # BB#0: 1589; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7] 1590; AVX1-NEXT: retq 1591; 1592; AVX2-LABEL: shuffle_v8i32_10324567: 1593; AVX2: # BB#0: 1594; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7] 1595; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1596; AVX2-NEXT: retq 1597 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 1598 ret <8 x i32> %shuffle 1599} 1600 1601define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) { 1602; AVX1-LABEL: shuffle_v8i32_11334567: 1603; AVX1: # BB#0: 1604; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7] 1605; AVX1-NEXT: retq 1606; 1607; AVX2-LABEL: shuffle_v8i32_11334567: 1608; AVX2: # BB#0: 1609; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7] 1610; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1611; AVX2-NEXT: retq 1612 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> 1613 ret <8 x i32> %shuffle 1614} 1615 1616define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) { 1617; AVX1-LABEL: shuffle_v8i32_01235467: 1618; AVX1: # BB#0: 1619; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7] 1620; AVX1-NEXT: retq 1621; 1622; AVX2-LABEL: shuffle_v8i32_01235467: 1623; AVX2: # BB#0: 1624; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7] 1625; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1626; AVX2-NEXT: retq 1627 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 1628 ret <8 x i32> %shuffle 1629} 1630 1631define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) { 1632; AVX1-LABEL: shuffle_v8i32_01235466: 1633; AVX1: # BB#0: 1634; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6] 1635; AVX1-NEXT: retq 1636; 1637; AVX2-LABEL: shuffle_v8i32_01235466: 1638; AVX2: # BB#0: 1639; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6] 1640; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1641; AVX2-NEXT: retq 1642 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> 1643 ret <8 x i32> %shuffle 1644} 1645 1646define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) { 1647; AVX1-LABEL: shuffle_v8i32_002u6u44: 1648; AVX1: # BB#0: 1649; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4] 1650; AVX1-NEXT: retq 1651; 1652; AVX2-LABEL: shuffle_v8i32_002u6u44: 1653; AVX2: # BB#0: 1654; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4> 1655; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1656; AVX2-NEXT: retq 1657 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> 1658 ret <8 x i32> %shuffle 1659} 1660 1661define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) { 1662; AVX1-LABEL: shuffle_v8i32_00uu66uu: 1663; AVX1: # BB#0: 1664; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u] 1665; AVX1-NEXT: retq 1666; 1667; AVX2-LABEL: shuffle_v8i32_00uu66uu: 1668; AVX2: # BB#0: 1669; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u> 1670; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1671; AVX2-NEXT: retq 1672 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> 1673 ret <8 x i32> %shuffle 1674} 1675 1676define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) { 1677; AVX1-LABEL: shuffle_v8i32_103245uu: 1678; AVX1: # BB#0: 1679; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u] 1680; AVX1-NEXT: retq 1681; 1682; AVX2-LABEL: shuffle_v8i32_103245uu: 1683; AVX2: # BB#0: 1684; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u> 1685; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1686; AVX2-NEXT: retq 1687 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> 1688 ret <8 x i32> %shuffle 1689} 1690 1691define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) { 1692; AVX1-LABEL: shuffle_v8i32_1133uu67: 1693; AVX1: # BB#0: 1694; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7] 1695; AVX1-NEXT: retq 1696; 1697; AVX2-LABEL: shuffle_v8i32_1133uu67: 1698; AVX2: # BB#0: 1699; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7> 1700; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1701; AVX2-NEXT: retq 1702 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> 1703 ret <8 x i32> %shuffle 1704} 1705 1706define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) { 1707; AVX1-LABEL: shuffle_v8i32_0uu354uu: 1708; AVX1: # BB#0: 1709; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u] 1710; AVX1-NEXT: retq 1711; 1712; AVX2-LABEL: shuffle_v8i32_0uu354uu: 1713; AVX2: # BB#0: 1714; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u> 1715; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1716; AVX2-NEXT: retq 1717 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> 1718 ret <8 x i32> %shuffle 1719} 1720 1721define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) { 1722; AVX1-LABEL: shuffle_v8i32_uuu3uu66: 1723; AVX1: # BB#0: 1724; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6] 1725; AVX1-NEXT: retq 1726; 1727; AVX2-LABEL: shuffle_v8i32_uuu3uu66: 1728; AVX2: # BB#0: 1729; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6> 1730; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1731; AVX2-NEXT: retq 1732 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> 1733 ret <8 x i32> %shuffle 1734} 1735 1736define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) { 1737; AVX1-LABEL: shuffle_v8i32_6caa87e5: 1738; AVX1: # BB#0: 1739; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 1740; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6] 1741; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1742; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1743; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1744; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1745; AVX1-NEXT: retq 1746; 1747; AVX2-LABEL: shuffle_v8i32_6caa87e5: 1748; AVX2: # BB#0: 1749; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u> 1750; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1751; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2] 1752; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1753; AVX2-NEXT: retq 1754 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5> 1755 ret <8 x i32> %shuffle 1756} 1757 1758define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) { 1759; AVX1-LABEL: shuffle_v8i32_32103210: 1760; AVX1: # BB#0: 1761; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1762; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1763; AVX1-NEXT: retq 1764; 1765; AVX2-LABEL: shuffle_v8i32_32103210: 1766; AVX2: # BB#0: 1767; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0] 1768; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1769; AVX2-NEXT: retq 1770 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0> 1771 ret <8 x i32> %shuffle 1772} 1773 1774define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) { 1775; AVX1-LABEL: shuffle_v8i32_76547654: 1776; AVX1: # BB#0: 1777; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1778; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1779; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1780; AVX1-NEXT: retq 1781; 1782; AVX2-LABEL: shuffle_v8i32_76547654: 1783; AVX2: # BB#0: 1784; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 1785; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1786; AVX2-NEXT: retq 1787 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 1788 ret <8 x i32> %shuffle 1789} 1790 1791define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) { 1792; AVX1-LABEL: shuffle_v8i32_76543210: 1793; AVX1: # BB#0: 1794; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 1795; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1796; AVX1-NEXT: retq 1797; 1798; AVX2-LABEL: shuffle_v8i32_76543210: 1799; AVX2: # BB#0: 1800; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 1801; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1802; AVX2-NEXT: retq 1803 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1804 ret <8 x i32> %shuffle 1805} 1806 1807define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) { 1808; AVX1-LABEL: shuffle_v8i32_3210ba98: 1809; AVX1: # BB#0: 1810; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1811; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1812; AVX1-NEXT: retq 1813; 1814; AVX2-LABEL: shuffle_v8i32_3210ba98: 1815; AVX2: # BB#0: 1816; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1817; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1818; AVX2-NEXT: retq 1819 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8> 1820 ret <8 x i32> %shuffle 1821} 1822 1823define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) { 1824; AVX1-LABEL: shuffle_v8i32_3210fedc: 1825; AVX1: # BB#0: 1826; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 1827; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1828; AVX1-NEXT: retq 1829; 1830; AVX2-LABEL: shuffle_v8i32_3210fedc: 1831; AVX2: # BB#0: 1832; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1833; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1834; AVX2-NEXT: retq 1835 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12> 1836 ret <8 x i32> %shuffle 1837} 1838 1839define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) { 1840; AVX1-LABEL: shuffle_v8i32_7654fedc: 1841; AVX1: # BB#0: 1842; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1843; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1844; AVX1-NEXT: retq 1845; 1846; AVX2-LABEL: shuffle_v8i32_7654fedc: 1847; AVX2: # BB#0: 1848; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1849; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1850; AVX2-NEXT: retq 1851 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 1852 ret <8 x i32> %shuffle 1853} 1854 1855define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) { 1856; AVX1-LABEL: shuffle_v8i32_fedc7654: 1857; AVX1: # BB#0: 1858; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 1859; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1860; AVX1-NEXT: retq 1861; 1862; AVX2-LABEL: shuffle_v8i32_fedc7654: 1863; AVX2: # BB#0: 1864; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 1865; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1866; AVX2-NEXT: retq 1867 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> 1868 ret <8 x i32> %shuffle 1869} 1870 1871define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) { 1872; AVX1-LABEL: shuffle_v8i32_ba987654: 1873; AVX1: # BB#0: 1874; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 1875; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1876; AVX1-NEXT: retq 1877; 1878; AVX2-LABEL: shuffle_v8i32_ba987654: 1879; AVX2: # BB#0: 1880; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1881; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1882; AVX2-NEXT: retq 1883 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 1884 ret <8 x i32> %shuffle 1885} 1886 1887define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) { 1888; AVX1-LABEL: shuffle_v8i32_ba983210: 1889; AVX1: # BB#0: 1890; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 1891; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1892; AVX1-NEXT: retq 1893; 1894; AVX2-LABEL: shuffle_v8i32_ba983210: 1895; AVX2: # BB#0: 1896; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1897; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1898; AVX2-NEXT: retq 1899 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 1900 ret <8 x i32> %shuffle 1901} 1902 1903define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) { 1904; AVX1-LABEL: shuffle_v8i32_zuu8zuuc: 1905; AVX1: # BB#0: 1906; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1907; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4] 1908; AVX1-NEXT: retq 1909; 1910; AVX2-LABEL: shuffle_v8i32_zuu8zuuc: 1911; AVX2: # BB#0: 1912; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19] 1913; AVX2-NEXT: retq 1914 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12> 1915 ret <8 x i32> %shuffle 1916} 1917 1918define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) { 1919; AVX1-LABEL: shuffle_v8i32_9ubzdefz: 1920; AVX1: # BB#0: 1921; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1922; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4] 1923; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 1924; AVX1-NEXT: retq 1925; 1926; AVX2-LABEL: shuffle_v8i32_9ubzdefz: 1927; AVX2: # BB#0: 1928; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero 1929; AVX2-NEXT: retq 1930 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0> 1931 ret <8 x i32> %shuffle 1932} 1933 1934define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) { 1935; AVX1-LABEL: shuffle_v8i32_80u1b4uu: 1936; AVX1: # BB#0: 1937; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 1938; AVX1-NEXT: retq 1939; 1940; AVX2-LABEL: shuffle_v8i32_80u1b4uu: 1941; AVX2: # BB#0: 1942; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 1943; AVX2-NEXT: retq 1944 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef> 1945 ret <8 x i32> %shuffle 1946} 1947 1948define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) { 1949; AVX1-LABEL: shuffle_v8i32_uuuu1111: 1950; AVX1: # BB#0: 1951; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1952; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1953; AVX1-NEXT: retq 1954; 1955; AVX2-LABEL: shuffle_v8i32_uuuu1111: 1956; AVX2: # BB#0: 1957; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 1958; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1959; AVX2-NEXT: retq 1960 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1> 1961 ret <8 x i32> %shuffle 1962} 1963 1964define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) { 1965; AVX1-LABEL: shuffle_v8i32_44444444: 1966; AVX1: # BB#0: 1967; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1968; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1969; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1970; AVX1-NEXT: retq 1971; 1972; AVX2-LABEL: shuffle_v8i32_44444444: 1973; AVX2: # BB#0: 1974; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 1975; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1976; AVX2-NEXT: retq 1977 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 1978 ret <8 x i32> %shuffle 1979} 1980 1981define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) { 1982; AVX1-LABEL: shuffle_v8i32_5555uuuu: 1983; AVX1: # BB#0: 1984; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1985; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1986; AVX1-NEXT: retq 1987; 1988; AVX2-LABEL: shuffle_v8i32_5555uuuu: 1989; AVX2: # BB#0: 1990; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 1991; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1992; AVX2-NEXT: retq 1993 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef> 1994 ret <8 x i32> %shuffle 1995} 1996 1997define <8 x float> @splat_mem_v8f32_2(float* %p) { 1998; ALL-LABEL: splat_mem_v8f32_2: 1999; ALL: # BB#0: 2000; ALL-NEXT: vbroadcastss (%rdi), %ymm0 2001; ALL-NEXT: retq 2002 %1 = load float, float* %p 2003 %2 = insertelement <4 x float> undef, float %1, i32 0 2004 %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer 2005 ret <8 x float> %3 2006} 2007 2008define <8 x float> @splat_v8f32(<4 x float> %r) { 2009; AVX1-LABEL: splat_v8f32: 2010; AVX1: # BB#0: 2011; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 2012; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2013; AVX1-NEXT: retq 2014; 2015; AVX2-LABEL: splat_v8f32: 2016; AVX2: # BB#0: 2017; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 2018; AVX2-NEXT: retq 2019 %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer 2020 ret <8 x float> %1 2021} 2022 2023; 2024; Shuffle to logical bit shifts 2025; 2026 2027define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) { 2028; AVX1-LABEL: shuffle_v8i32_z0U2zUz6: 2029; AVX1: # BB#0: 2030; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 2031; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 2032; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5] 2033; AVX1-NEXT: retq 2034; 2035; AVX2-LABEL: shuffle_v8i32_z0U2zUz6: 2036; AVX2: # BB#0: 2037; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0 2038; AVX2-NEXT: retq 2039 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6> 2040 ret <8 x i32> %shuffle 2041} 2042 2043define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) { 2044; AVX1-LABEL: shuffle_v8i32_1U3z5zUU: 2045; AVX1: # BB#0: 2046; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 2047; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 2048; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 2049; AVX1-NEXT: retq 2050; 2051; AVX2-LABEL: shuffle_v8i32_1U3z5zUU: 2052; AVX2: # BB#0: 2053; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 2054; AVX2-NEXT: retq 2055 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef> 2056 ret <8 x i32> %shuffle 2057} 2058 2059define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) { 2060; AVX1-LABEL: shuffle_v8i32_B012F456: 2061; AVX1: # BB#0: 2062; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4] 2063; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6] 2064; AVX1-NEXT: retq 2065; 2066; AVX2-LABEL: shuffle_v8i32_B012F456: 2067; AVX2: # BB#0: 2068; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27] 2069; AVX2-NEXT: retq 2070 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6> 2071 ret <8 x i32> %shuffle 2072} 2073 2074define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) { 2075; AVX1-LABEL: shuffle_v8i32_1238567C: 2076; AVX1: # BB#0: 2077; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4] 2078; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 2079; AVX1-NEXT: retq 2080; 2081; AVX2-LABEL: shuffle_v8i32_1238567C: 2082; AVX2: # BB#0: 2083; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19] 2084; AVX2-NEXT: retq 2085 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12> 2086 ret <8 x i32> %shuffle 2087} 2088 2089define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) { 2090; AVX1-LABEL: shuffle_v8i32_9AB0DEF4: 2091; AVX1: # BB#0: 2092; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4] 2093; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4] 2094; AVX1-NEXT: retq 2095; 2096; AVX2-LABEL: shuffle_v8i32_9AB0DEF4: 2097; AVX2: # BB#0: 2098; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19] 2099; AVX2-NEXT: retq 2100 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4> 2101 ret <8 x i32> %shuffle 2102} 2103 2104define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) { 2105; AVX1-LABEL: shuffle_v8i32_389A7CDE: 2106; AVX1: # BB#0: 2107; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4] 2108; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6] 2109; AVX1-NEXT: retq 2110; 2111; AVX2-LABEL: shuffle_v8i32_389A7CDE: 2112; AVX2: # BB#0: 2113; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27] 2114; AVX2-NEXT: retq 2115 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14> 2116 ret <8 x i32> %shuffle 2117} 2118 2119define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) { 2120; AVX1-LABEL: shuffle_v8i32_30127456: 2121; AVX1: # BB#0: 2122; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6] 2123; AVX1-NEXT: retq 2124; 2125; AVX2-LABEL: shuffle_v8i32_30127456: 2126; AVX2: # BB#0: 2127; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6] 2128; AVX2-NEXT: retq 2129 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6> 2130 ret <8 x i32> %shuffle 2131} 2132 2133define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) { 2134; AVX1-LABEL: shuffle_v8i32_12305674: 2135; AVX1: # BB#0: 2136; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4] 2137; AVX1-NEXT: retq 2138; 2139; AVX2-LABEL: shuffle_v8i32_12305674: 2140; AVX2: # BB#0: 2141; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4] 2142; AVX2-NEXT: retq 2143 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4> 2144 ret <8 x i32> %shuffle 2145} 2146 2147define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2148; ALL-LABEL: concat_v2f32_1: 2149; ALL: # BB#0: # %entry 2150; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2151; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 2152; ALL-NEXT: retq 2153entry: 2154 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2155 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2156 %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2157 %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2158 %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef> 2159 ret <8 x float> %tmp76 2160} 2161 2162define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2163; ALL-LABEL: concat_v2f32_2: 2164; ALL: # BB#0: # %entry 2165; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2166; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 2167; ALL-NEXT: retq 2168entry: 2169 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2170 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2171 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 2172 ret <8 x float> %tmp76 2173} 2174 2175define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2176; ALL-LABEL: concat_v2f32_3: 2177; ALL: # BB#0: # %entry 2178; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2179; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 2180; ALL-NEXT: retq 2181entry: 2182 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2183 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2184 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2185 %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 2186 ret <8 x float> %res 2187} 2188 2189define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) { 2190; ALL-LABEL: insert_mem_and_zero_v8i32: 2191; ALL: # BB#0: 2192; ALL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2193; ALL-NEXT: retq 2194 %a = load i32, i32* %ptr 2195 %v = insertelement <8 x i32> undef, i32 %a, i32 0 2196 %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2197 ret <8 x i32> %shuffle 2198} 2199 2200define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) { 2201; AVX1-LABEL: concat_v8i32_0123CDEF: 2202; AVX1: # BB#0: 2203; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 2204; AVX1-NEXT: retq 2205; 2206; AVX2-LABEL: concat_v8i32_0123CDEF: 2207; AVX2: # BB#0: 2208; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 2209; AVX2-NEXT: retq 2210 %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2211 %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2212 %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2213 ret <8 x i32> %shuf 2214} 2215 2216define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) { 2217; ALL-LABEL: concat_v8i32_4567CDEF_bc: 2218; ALL: # BB#0: 2219; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2220; ALL-NEXT: retq 2221 %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2222 %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15> 2223 %bc0hi = bitcast <4 x i32> %a0hi to <2 x i64> 2224 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64> 2225 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2226 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32> 2227 ret <8 x i32> %shuffle32 2228} 2229 2230define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) { 2231; ALL-LABEL: concat_v8f32_4567CDEF_bc: 2232; ALL: # BB#0: 2233; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2234; ALL-NEXT: retq 2235 %a0 = bitcast <8 x float> %f0 to <4 x i64> 2236 %a1 = bitcast <8 x float> %f1 to <8 x i32> 2237 %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 2238 %a1hi = shufflevector <8 x i32> %a1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2239 %bc0hi = bitcast <2 x i64> %a0hi to <2 x i64> 2240 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64> 2241 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2242 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x float> 2243 ret <8 x float> %shuffle32 2244} 2245 2246define <8 x i32> @insert_dup_mem_v8i32(i32* %ptr) { 2247; ALL-LABEL: insert_dup_mem_v8i32: 2248; ALL: # BB#0: 2249; ALL-NEXT: vbroadcastss (%rdi), %ymm0 2250; ALL-NEXT: retq 2251 %tmp = load i32, i32* %ptr, align 4 2252 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2253 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer 2254 ret <8 x i32> %tmp2 2255} 2256