1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 3; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 4 5target triple = "x86_64-unknown-unknown" 6 7define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) { 8; AVX1-LABEL: shuffle_v8f32_00000000: 9; AVX1: # BB#0: 10; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 11; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 12; AVX1-NEXT: retq 13; 14; AVX2-LABEL: shuffle_v8f32_00000000: 15; AVX2: # BB#0: 16; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 17; AVX2-NEXT: retq 18 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19 ret <8 x float> %shuffle 20} 21 22define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) { 23; AVX1-LABEL: shuffle_v8f32_00000010: 24; AVX1: # BB#0: 25; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 26; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 27; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 28; AVX1-NEXT: retq 29; 30; AVX2-LABEL: shuffle_v8f32_00000010: 31; AVX2: # BB#0: 32; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0] 33; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 34; AVX2-NEXT: retq 35 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 36 ret <8 x float> %shuffle 37} 38 39define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) { 40; AVX1-LABEL: shuffle_v8f32_00000200: 41; AVX1: # BB#0: 42; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 43; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0] 44; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 45; AVX1-NEXT: retq 46; 47; AVX2-LABEL: shuffle_v8f32_00000200: 48; AVX2: # BB#0: 49; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0] 50; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 51; AVX2-NEXT: retq 52 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 53 ret <8 x float> %shuffle 54} 55 56define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) { 57; AVX1-LABEL: shuffle_v8f32_00003000: 58; AVX1: # BB#0: 59; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 60; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0] 61; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 62; AVX1-NEXT: retq 63; 64; AVX2-LABEL: shuffle_v8f32_00003000: 65; AVX2: # BB#0: 66; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0] 67; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 68; AVX2-NEXT: retq 69 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 70 ret <8 x float> %shuffle 71} 72 73define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) { 74; AVX1-LABEL: shuffle_v8f32_00040000: 75; AVX1: # BB#0: 76; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3] 77; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 78; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 79; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 80; AVX1-NEXT: retq 81; 82; AVX2-LABEL: shuffle_v8f32_00040000: 83; AVX2: # BB#0: 84; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] 85; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 86; AVX2-NEXT: retq 87 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 88 ret <8 x float> %shuffle 89} 90 91define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) { 92; AVX1-LABEL: shuffle_v8f32_00500000: 93; AVX1: # BB#0: 94; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 95; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 96; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4] 97; AVX1-NEXT: retq 98; 99; AVX2-LABEL: shuffle_v8f32_00500000: 100; AVX2: # BB#0: 101; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] 102; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 103; AVX2-NEXT: retq 104 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 105 ret <8 x float> %shuffle 106} 107 108define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) { 109; AVX1-LABEL: shuffle_v8f32_06000000: 110; AVX1: # BB#0: 111; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 112; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 113; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] 114; AVX1-NEXT: retq 115; 116; AVX2-LABEL: shuffle_v8f32_06000000: 117; AVX2: # BB#0: 118; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] 119; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 120; AVX2-NEXT: retq 121 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 122 ret <8 x float> %shuffle 123} 124 125define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) { 126; AVX1-LABEL: shuffle_v8f32_70000000: 127; AVX1: # BB#0: 128; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 129; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 130; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] 131; AVX1-NEXT: retq 132; 133; AVX2-LABEL: shuffle_v8f32_70000000: 134; AVX2: # BB#0: 135; AVX2-NEXT: movl $7, %eax 136; AVX2-NEXT: vmovd %eax, %xmm1 137; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 138; AVX2-NEXT: retq 139 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 140 ret <8 x float> %shuffle 141} 142 143define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) { 144; ALL-LABEL: shuffle_v8f32_01014545: 145; ALL: # BB#0: 146; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 147; ALL-NEXT: retq 148 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 149 ret <8 x float> %shuffle 150} 151 152define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) { 153; AVX1-LABEL: shuffle_v8f32_00112233: 154; AVX1: # BB#0: 155; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1] 156; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 157; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 158; AVX1-NEXT: retq 159; 160; AVX2-LABEL: shuffle_v8f32_00112233: 161; AVX2: # BB#0: 162; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] 163; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 164; AVX2-NEXT: retq 165 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 166 ret <8 x float> %shuffle 167} 168 169define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) { 170; AVX1-LABEL: shuffle_v8f32_00001111: 171; AVX1: # BB#0: 172; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 173; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 174; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 175; AVX1-NEXT: retq 176; 177; AVX2-LABEL: shuffle_v8f32_00001111: 178; AVX2: # BB#0: 179; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1] 180; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 181; AVX2-NEXT: retq 182 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 183 ret <8 x float> %shuffle 184} 185 186define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) { 187; ALL-LABEL: shuffle_v8f32_81a3c5e7: 188; ALL: # BB#0: 189; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 190; ALL-NEXT: retq 191 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 192 ret <8 x float> %shuffle 193} 194 195define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) { 196; AVX1-LABEL: shuffle_v8f32_08080808: 197; AVX1: # BB#0: 198; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 199; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3] 200; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 201; AVX1-NEXT: retq 202; 203; AVX2-LABEL: shuffle_v8f32_08080808: 204; AVX2: # BB#0: 205; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 206; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 207; AVX2-NEXT: retq 208 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8> 209 ret <8 x float> %shuffle 210} 211 212define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) { 213; ALL-LABEL: shuffle_v8f32_08084c4c: 214; ALL: # BB#0: 215; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 216; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 217; ALL-NEXT: retq 218 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12> 219 ret <8 x float> %shuffle 220} 221 222define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) { 223; ALL-LABEL: shuffle_v8f32_8823cc67: 224; ALL: # BB#0: 225; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7] 226; ALL-NEXT: retq 227 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7> 228 ret <8 x float> %shuffle 229} 230 231define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) { 232; ALL-LABEL: shuffle_v8f32_9832dc76: 233; ALL: # BB#0: 234; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6] 235; ALL-NEXT: retq 236 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6> 237 ret <8 x float> %shuffle 238} 239 240define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) { 241; ALL-LABEL: shuffle_v8f32_9810dc54: 242; ALL: # BB#0: 243; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4] 244; ALL-NEXT: retq 245 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4> 246 ret <8 x float> %shuffle 247} 248 249define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) { 250; ALL-LABEL: shuffle_v8f32_08194c5d: 251; ALL: # BB#0: 252; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 253; ALL-NEXT: retq 254 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 255 ret <8 x float> %shuffle 256} 257 258define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) { 259; ALL-LABEL: shuffle_v8f32_2a3b6e7f: 260; ALL: # BB#0: 261; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 262; ALL-NEXT: retq 263 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 264 ret <8 x float> %shuffle 265} 266 267define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) { 268; AVX1-LABEL: shuffle_v8f32_08192a3b: 269; AVX1: # BB#0: 270; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 271; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 272; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 273; AVX1-NEXT: retq 274; 275; AVX2-LABEL: shuffle_v8f32_08192a3b: 276; AVX2: # BB#0: 277; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3> 278; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 279; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u> 280; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 281; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 282; AVX2-NEXT: retq 283 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 284 ret <8 x float> %shuffle 285} 286 287define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) { 288; AVX1-LABEL: shuffle_v8f32_08991abb: 289; AVX1: # BB#0: 290; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0] 291; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1] 292; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 293; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] 294; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 295; AVX1-NEXT: retq 296; 297; AVX2-LABEL: shuffle_v8f32_08991abb: 298; AVX2: # BB#0: 299; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 300; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 301; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3> 302; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 303; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 304; AVX2-NEXT: retq 305 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11> 306 ret <8 x float> %shuffle 307} 308 309define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) { 310; AVX1-LABEL: shuffle_v8f32_091b2d3f: 311; AVX1: # BB#0: 312; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 313; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] 314; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 315; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 316; AVX1-NEXT: retq 317; 318; AVX2-LABEL: shuffle_v8f32_091b2d3f: 319; AVX2: # BB#0: 320; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u> 321; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 322; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 323; AVX2-NEXT: retq 324 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 325 ret <8 x float> %shuffle 326} 327 328define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) { 329; AVX1-LABEL: shuffle_v8f32_09ab1def: 330; AVX1: # BB#0: 331; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 332; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 333; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 334; AVX1-NEXT: retq 335; 336; AVX2-LABEL: shuffle_v8f32_09ab1def: 337; AVX2: # BB#0: 338; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 339; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 340; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 341; AVX2-NEXT: retq 342 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 343 ret <8 x float> %shuffle 344} 345 346define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) { 347; ALL-LABEL: shuffle_v8f32_00014445: 348; ALL: # BB#0: 349; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 350; ALL-NEXT: retq 351 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5> 352 ret <8 x float> %shuffle 353} 354 355define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) { 356; ALL-LABEL: shuffle_v8f32_00204464: 357; ALL: # BB#0: 358; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 359; ALL-NEXT: retq 360 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4> 361 ret <8 x float> %shuffle 362} 363 364define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) { 365; ALL-LABEL: shuffle_v8f32_03004744: 366; ALL: # BB#0: 367; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 368; ALL-NEXT: retq 369 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4> 370 ret <8 x float> %shuffle 371} 372 373define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) { 374; ALL-LABEL: shuffle_v8f32_10005444: 375; ALL: # BB#0: 376; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 377; ALL-NEXT: retq 378 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4> 379 ret <8 x float> %shuffle 380} 381 382define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) { 383; ALL-LABEL: shuffle_v8f32_22006644: 384; ALL: # BB#0: 385; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 386; ALL-NEXT: retq 387 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4> 388 ret <8 x float> %shuffle 389} 390 391define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) { 392; ALL-LABEL: shuffle_v8f32_33307774: 393; ALL: # BB#0: 394; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 395; ALL-NEXT: retq 396 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4> 397 ret <8 x float> %shuffle 398} 399 400define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) { 401; ALL-LABEL: shuffle_v8f32_32107654: 402; ALL: # BB#0: 403; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 404; ALL-NEXT: retq 405 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 406 ret <8 x float> %shuffle 407} 408 409define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) { 410; ALL-LABEL: shuffle_v8f32_00234467: 411; ALL: # BB#0: 412; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 413; ALL-NEXT: retq 414 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7> 415 ret <8 x float> %shuffle 416} 417 418define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) { 419; ALL-LABEL: shuffle_v8f32_00224466: 420; ALL: # BB#0: 421; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 422; ALL-NEXT: retq 423 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 424 ret <8 x float> %shuffle 425} 426 427define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) { 428; ALL-LABEL: shuffle_v8f32_10325476: 429; ALL: # BB#0: 430; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 431; ALL-NEXT: retq 432 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 433 ret <8 x float> %shuffle 434} 435 436define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) { 437; ALL-LABEL: shuffle_v8f32_11335577: 438; ALL: # BB#0: 439; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 440; ALL-NEXT: retq 441 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 442 ret <8 x float> %shuffle 443} 444 445define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) { 446; ALL-LABEL: shuffle_v8f32_10235467: 447; ALL: # BB#0: 448; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 449; ALL-NEXT: retq 450 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 451 ret <8 x float> %shuffle 452} 453 454define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) { 455; ALL-LABEL: shuffle_v8f32_10225466: 456; ALL: # BB#0: 457; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 458; ALL-NEXT: retq 459 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6> 460 ret <8 x float> %shuffle 461} 462 463define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) { 464; ALL-LABEL: shuffle_v8f32_00015444: 465; ALL: # BB#0: 466; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4] 467; ALL-NEXT: retq 468 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> 469 ret <8 x float> %shuffle 470} 471 472define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) { 473; ALL-LABEL: shuffle_v8f32_00204644: 474; ALL: # BB#0: 475; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4] 476; ALL-NEXT: retq 477 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> 478 ret <8 x float> %shuffle 479} 480 481define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) { 482; ALL-LABEL: shuffle_v8f32_03004474: 483; ALL: # BB#0: 484; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4] 485; ALL-NEXT: retq 486 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> 487 ret <8 x float> %shuffle 488} 489 490define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) { 491; ALL-LABEL: shuffle_v8f32_10004444: 492; ALL: # BB#0: 493; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4] 494; ALL-NEXT: retq 495 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 496 ret <8 x float> %shuffle 497} 498 499define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) { 500; ALL-LABEL: shuffle_v8f32_22006446: 501; ALL: # BB#0: 502; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6] 503; ALL-NEXT: retq 504 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> 505 ret <8 x float> %shuffle 506} 507 508define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) { 509; ALL-LABEL: shuffle_v8f32_33307474: 510; ALL: # BB#0: 511; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4] 512; ALL-NEXT: retq 513 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> 514 ret <8 x float> %shuffle 515} 516 517define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) { 518; ALL-LABEL: shuffle_v8f32_32104567: 519; ALL: # BB#0: 520; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7] 521; ALL-NEXT: retq 522 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 523 ret <8 x float> %shuffle 524} 525 526define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) { 527; ALL-LABEL: shuffle_v8f32_00236744: 528; ALL: # BB#0: 529; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4] 530; ALL-NEXT: retq 531 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> 532 ret <8 x float> %shuffle 533} 534 535define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) { 536; ALL-LABEL: shuffle_v8f32_00226644: 537; ALL: # BB#0: 538; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4] 539; ALL-NEXT: retq 540 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> 541 ret <8 x float> %shuffle 542} 543 544define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) { 545; ALL-LABEL: shuffle_v8f32_10324567: 546; ALL: # BB#0: 547; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7] 548; ALL-NEXT: retq 549 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 550 ret <8 x float> %shuffle 551} 552 553define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) { 554; ALL-LABEL: shuffle_v8f32_11334567: 555; ALL: # BB#0: 556; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7] 557; ALL-NEXT: retq 558 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> 559 ret <8 x float> %shuffle 560} 561 562define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) { 563; ALL-LABEL: shuffle_v8f32_01235467: 564; ALL: # BB#0: 565; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7] 566; ALL-NEXT: retq 567 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 568 ret <8 x float> %shuffle 569} 570 571define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) { 572; ALL-LABEL: shuffle_v8f32_01235466: 573; ALL: # BB#0: 574; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6] 575; ALL-NEXT: retq 576 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> 577 ret <8 x float> %shuffle 578} 579 580define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) { 581; ALL-LABEL: shuffle_v8f32_002u6u44: 582; ALL: # BB#0: 583; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4] 584; ALL-NEXT: retq 585 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> 586 ret <8 x float> %shuffle 587} 588 589define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) { 590; ALL-LABEL: shuffle_v8f32_00uu66uu: 591; ALL: # BB#0: 592; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u] 593; ALL-NEXT: retq 594 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> 595 ret <8 x float> %shuffle 596} 597 598define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) { 599; ALL-LABEL: shuffle_v8f32_103245uu: 600; ALL: # BB#0: 601; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u] 602; ALL-NEXT: retq 603 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> 604 ret <8 x float> %shuffle 605} 606 607define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) { 608; ALL-LABEL: shuffle_v8f32_1133uu67: 609; ALL: # BB#0: 610; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7] 611; ALL-NEXT: retq 612 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> 613 ret <8 x float> %shuffle 614} 615 616define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) { 617; ALL-LABEL: shuffle_v8f32_0uu354uu: 618; ALL: # BB#0: 619; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u] 620; ALL-NEXT: retq 621 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> 622 ret <8 x float> %shuffle 623} 624 625define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) { 626; ALL-LABEL: shuffle_v8f32_uuu3uu66: 627; ALL: # BB#0: 628; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6] 629; ALL-NEXT: retq 630 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> 631 ret <8 x float> %shuffle 632} 633 634define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) { 635; AVX1-LABEL: shuffle_v8f32_c348cda0: 636; AVX1: # BB#0: 637; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 638; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4] 639; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 640; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4] 641; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3] 642; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 643; AVX1-NEXT: retq 644; 645; AVX2-LABEL: shuffle_v8f32_c348cda0: 646; AVX2: # BB#0: 647; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 648; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1] 649; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4] 650; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,1,2,1] 651; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 652; AVX2-NEXT: retq 653 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0> 654 ret <8 x float> %shuffle 655} 656 657define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) { 658; AVX1-LABEL: shuffle_v8f32_f511235a: 659; AVX1: # BB#0: 660; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6] 661; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] 662; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 663; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2] 664; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5] 665; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3] 666; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 667; AVX1-NEXT: retq 668; 669; AVX2-LABEL: shuffle_v8f32_f511235a: 670; AVX2: # BB#0: 671; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u> 672; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 673; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6] 674; AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] 675; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 676; AVX2-NEXT: retq 677 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10> 678 ret <8 x float> %shuffle 679} 680 681define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) { 682; AVX1-LABEL: shuffle_v8f32_32103210: 683; AVX1: # BB#0: 684; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 685; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 686; AVX1-NEXT: retq 687; 688; AVX2-LABEL: shuffle_v8f32_32103210: 689; AVX2: # BB#0: 690; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 691; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1] 692; AVX2-NEXT: retq 693 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0> 694 ret <8 x float> %shuffle 695} 696 697define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) { 698; ALL-LABEL: shuffle_v8f32_76547654: 699; ALL: # BB#0: 700; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 701; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 702; ALL-NEXT: retq 703 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 704 ret <8 x float> %shuffle 705} 706 707define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) { 708; ALL-LABEL: shuffle_v8f32_76543210: 709; ALL: # BB#0: 710; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 711; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 712; ALL-NEXT: retq 713 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 714 ret <8 x float> %shuffle 715} 716 717define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) { 718; ALL-LABEL: shuffle_v8f32_3210ba98: 719; ALL: # BB#0: 720; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 721; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 722; ALL-NEXT: retq 723 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8> 724 ret <8 x float> %shuffle 725} 726 727define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) { 728; ALL-LABEL: shuffle_v8f32_3210fedc: 729; ALL: # BB#0: 730; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 731; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 732; ALL-NEXT: retq 733 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12> 734 ret <8 x float> %shuffle 735} 736 737define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) { 738; ALL-LABEL: shuffle_v8f32_7654fedc: 739; ALL: # BB#0: 740; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 741; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 742; ALL-NEXT: retq 743 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 744 ret <8 x float> %shuffle 745} 746 747define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) { 748; ALL-LABEL: shuffle_v8f32_fedc7654: 749; ALL: # BB#0: 750; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 751; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 752; ALL-NEXT: retq 753 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> 754 ret <8 x float> %shuffle 755} 756 757define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) { 758; AVX1-LABEL: PR21138: 759; AVX1: # BB#0: 760; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 761; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] 762; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 763; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 764; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 765; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 766; AVX1-NEXT: retq 767; 768; AVX2-LABEL: PR21138: 769; AVX2: # BB#0: 770; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[1,3,1,3,5,7,5,7] 771; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,3] 772; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,3,5,7,5,7] 773; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,3] 774; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 775; AVX2-NEXT: retq 776 %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 777 ret <8 x float> %shuffle 778} 779 780define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) { 781; ALL-LABEL: shuffle_v8f32_ba987654: 782; ALL: # BB#0: 783; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 784; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 785; ALL-NEXT: retq 786 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 787 ret <8 x float> %shuffle 788} 789 790define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) { 791; ALL-LABEL: shuffle_v8f32_ba983210: 792; ALL: # BB#0: 793; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 794; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 795; ALL-NEXT: retq 796 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 3, i32 2, i32 1, i32 0> 797 ret <8 x float> %shuffle 798} 799 800define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) { 801; ALL-LABEL: shuffle_v8f32_80u1c4u5: 802; ALL: # BB#0: 803; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 804; ALL-NEXT: retq 805 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5> 806 ret <8 x float> %shuffle 807} 808 809define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) { 810; ALL-LABEL: shuffle_v8f32_a2u3e6f7: 811; ALL: # BB#0: 812; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7] 813; ALL-NEXT: retq 814 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7> 815 ret <8 x float> %shuffle 816} 817 818define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) { 819; ALL-LABEL: shuffle_v8f32_uuuu1111: 820; ALL: # BB#0: 821; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 822; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 823; ALL-NEXT: retq 824 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1> 825 ret <8 x float> %shuffle 826} 827 828define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) { 829; AVX1-LABEL: shuffle_v8f32_44444444: 830; AVX1: # BB#0: 831; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 832; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 833; AVX1-NEXT: retq 834; 835; AVX2-LABEL: shuffle_v8f32_44444444: 836; AVX2: # BB#0: 837; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 838; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 839; AVX2-NEXT: retq 840 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 841 ret <8 x float> %shuffle 842} 843 844define <8 x float> @shuffle_v8f32_1188uuuu(<8 x float> %a, <8 x float> %b) { 845; ALL-LABEL: shuffle_v8f32_1188uuuu: 846; ALL: # BB#0: 847; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0] 848; ALL-NEXT: retq 849 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 850 ret <8 x float> %shuffle 851} 852 853define <8 x float> @shuffle_v8f32_uuuu3210(<8 x float> %a, <8 x float> %b) { 854; ALL-LABEL: shuffle_v8f32_uuuu3210: 855; ALL: # BB#0: 856; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 857; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 858; ALL-NEXT: retq 859 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 2, i32 1, i32 0> 860 ret <8 x float> %shuffle 861} 862 863define <8 x float> @shuffle_v8f32_uuuu1188(<8 x float> %a, <8 x float> %b) { 864; ALL-LABEL: shuffle_v8f32_uuuu1188: 865; ALL: # BB#0: 866; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0] 867; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 868; ALL-NEXT: retq 869 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 8, i32 8> 870 ret <8 x float> %shuffle 871} 872 873define <8 x float> @shuffle_v8f32_1111uuuu(<8 x float> %a, <8 x float> %b) { 874; ALL-LABEL: shuffle_v8f32_1111uuuu: 875; ALL: # BB#0: 876; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 877; ALL-NEXT: retq 878 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef> 879 ret <8 x float> %shuffle 880} 881 882define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) { 883; ALL-LABEL: shuffle_v8f32_5555uuuu: 884; ALL: # BB#0: 885; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0 886; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 887; ALL-NEXT: retq 888 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef> 889 ret <8 x float> %shuffle 890} 891 892define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) { 893; AVX1-LABEL: shuffle_v8i32_00000000: 894; AVX1: # BB#0: 895; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 896; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 897; AVX1-NEXT: retq 898; 899; AVX2-LABEL: shuffle_v8i32_00000000: 900; AVX2: # BB#0: 901; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 902; AVX2-NEXT: retq 903 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 904 ret <8 x i32> %shuffle 905} 906 907define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) { 908; AVX1-LABEL: shuffle_v8i32_00000010: 909; AVX1: # BB#0: 910; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 911; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0] 912; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 913; AVX1-NEXT: retq 914; 915; AVX2-LABEL: shuffle_v8i32_00000010: 916; AVX2: # BB#0: 917; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0] 918; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 919; AVX2-NEXT: retq 920 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 921 ret <8 x i32> %shuffle 922} 923 924define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) { 925; AVX1-LABEL: shuffle_v8i32_00000200: 926; AVX1: # BB#0: 927; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 928; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,0] 929; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 930; AVX1-NEXT: retq 931; 932; AVX2-LABEL: shuffle_v8i32_00000200: 933; AVX2: # BB#0: 934; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0] 935; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 936; AVX2-NEXT: retq 937 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 938 ret <8 x i32> %shuffle 939} 940 941define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) { 942; AVX1-LABEL: shuffle_v8i32_00003000: 943; AVX1: # BB#0: 944; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 945; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,0,0,0] 946; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 947; AVX1-NEXT: retq 948; 949; AVX2-LABEL: shuffle_v8i32_00003000: 950; AVX2: # BB#0: 951; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0] 952; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 953; AVX2-NEXT: retq 954 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 955 ret <8 x i32> %shuffle 956} 957 958define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) { 959; AVX1-LABEL: shuffle_v8i32_00040000: 960; AVX1: # BB#0: 961; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3] 962; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 963; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 964; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 965; AVX1-NEXT: retq 966; 967; AVX2-LABEL: shuffle_v8i32_00040000: 968; AVX2: # BB#0: 969; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] 970; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 971; AVX2-NEXT: retq 972 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 973 ret <8 x i32> %shuffle 974} 975 976define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) { 977; AVX1-LABEL: shuffle_v8i32_00500000: 978; AVX1: # BB#0: 979; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 980; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 981; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4] 982; AVX1-NEXT: retq 983; 984; AVX2-LABEL: shuffle_v8i32_00500000: 985; AVX2: # BB#0: 986; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] 987; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 988; AVX2-NEXT: retq 989 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 990 ret <8 x i32> %shuffle 991} 992 993define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) { 994; AVX1-LABEL: shuffle_v8i32_06000000: 995; AVX1: # BB#0: 996; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 997; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 998; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] 999; AVX1-NEXT: retq 1000; 1001; AVX2-LABEL: shuffle_v8i32_06000000: 1002; AVX2: # BB#0: 1003; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] 1004; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1005; AVX2-NEXT: retq 1006 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1007 ret <8 x i32> %shuffle 1008} 1009 1010define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) { 1011; AVX1-LABEL: shuffle_v8i32_70000000: 1012; AVX1: # BB#0: 1013; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 1014; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 1015; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] 1016; AVX1-NEXT: retq 1017; 1018; AVX2-LABEL: shuffle_v8i32_70000000: 1019; AVX2: # BB#0: 1020; AVX2-NEXT: movl $7, %eax 1021; AVX2-NEXT: vmovd %eax, %xmm1 1022; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1023; AVX2-NEXT: retq 1024 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1025 ret <8 x i32> %shuffle 1026} 1027 1028define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) { 1029; AVX1-LABEL: shuffle_v8i32_01014545: 1030; AVX1: # BB#0: 1031; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 1032; AVX1-NEXT: retq 1033; 1034; AVX2-LABEL: shuffle_v8i32_01014545: 1035; AVX2: # BB#0: 1036; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 1037; AVX2-NEXT: retq 1038 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 1039 ret <8 x i32> %shuffle 1040} 1041 1042define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) { 1043; AVX1-LABEL: shuffle_v8i32_00112233: 1044; AVX1: # BB#0: 1045; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,1,1] 1046; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1047; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1048; AVX1-NEXT: retq 1049; 1050; AVX2-LABEL: shuffle_v8i32_00112233: 1051; AVX2: # BB#0: 1052; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] 1053; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1054; AVX2-NEXT: retq 1055 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 1056 ret <8 x i32> %shuffle 1057} 1058 1059define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) { 1060; AVX1-LABEL: shuffle_v8i32_00001111: 1061; AVX1: # BB#0: 1062; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 1063; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1064; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1065; AVX1-NEXT: retq 1066; 1067; AVX2-LABEL: shuffle_v8i32_00001111: 1068; AVX2: # BB#0: 1069; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1] 1070; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1071; AVX2-NEXT: retq 1072 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 1073 ret <8 x i32> %shuffle 1074} 1075 1076define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) { 1077; AVX1-LABEL: shuffle_v8i32_81a3c5e7: 1078; AVX1: # BB#0: 1079; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 1080; AVX1-NEXT: retq 1081; 1082; AVX2-LABEL: shuffle_v8i32_81a3c5e7: 1083; AVX2: # BB#0: 1084; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 1085; AVX2-NEXT: retq 1086 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 1087 ret <8 x i32> %shuffle 1088} 1089 1090define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) { 1091; AVX1-LABEL: shuffle_v8i32_08080808: 1092; AVX1: # BB#0: 1093; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 1094; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3] 1095; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1096; AVX1-NEXT: retq 1097; 1098; AVX2-LABEL: shuffle_v8i32_08080808: 1099; AVX2: # BB#0: 1100; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1101; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 1102; AVX2-NEXT: retq 1103 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8> 1104 ret <8 x i32> %shuffle 1105} 1106 1107define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) { 1108; AVX1-LABEL: shuffle_v8i32_08084c4c: 1109; AVX1: # BB#0: 1110; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 1111; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1112; AVX1-NEXT: retq 1113; 1114; AVX2-LABEL: shuffle_v8i32_08084c4c: 1115; AVX2: # BB#0: 1116; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4] 1117; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 1118; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1119; AVX2-NEXT: retq 1120 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12> 1121 ret <8 x i32> %shuffle 1122} 1123 1124define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) { 1125; AVX1-LABEL: shuffle_v8i32_8823cc67: 1126; AVX1: # BB#0: 1127; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7] 1128; AVX1-NEXT: retq 1129; 1130; AVX2-LABEL: shuffle_v8i32_8823cc67: 1131; AVX2: # BB#0: 1132; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7] 1133; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1134; AVX2-NEXT: retq 1135 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7> 1136 ret <8 x i32> %shuffle 1137} 1138 1139define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) { 1140; AVX1-LABEL: shuffle_v8i32_9832dc76: 1141; AVX1: # BB#0: 1142; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6] 1143; AVX1-NEXT: retq 1144; 1145; AVX2-LABEL: shuffle_v8i32_9832dc76: 1146; AVX2: # BB#0: 1147; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1148; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1149; AVX2-NEXT: retq 1150 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6> 1151 ret <8 x i32> %shuffle 1152} 1153 1154define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) { 1155; AVX1-LABEL: shuffle_v8i32_9810dc54: 1156; AVX1: # BB#0: 1157; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4] 1158; AVX1-NEXT: retq 1159; 1160; AVX2-LABEL: shuffle_v8i32_9810dc54: 1161; AVX2: # BB#0: 1162; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4] 1163; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7] 1164; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1165; AVX2-NEXT: retq 1166 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4> 1167 ret <8 x i32> %shuffle 1168} 1169 1170define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) { 1171; AVX1-LABEL: shuffle_v8i32_08194c5d: 1172; AVX1: # BB#0: 1173; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1174; AVX1-NEXT: retq 1175; 1176; AVX2-LABEL: shuffle_v8i32_08194c5d: 1177; AVX2: # BB#0: 1178; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1179; AVX2-NEXT: retq 1180 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 1181 ret <8 x i32> %shuffle 1182} 1183 1184define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) { 1185; AVX1-LABEL: shuffle_v8i32_2a3b6e7f: 1186; AVX1: # BB#0: 1187; AVX1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1188; AVX1-NEXT: retq 1189; 1190; AVX2-LABEL: shuffle_v8i32_2a3b6e7f: 1191; AVX2: # BB#0: 1192; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1193; AVX2-NEXT: retq 1194 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1195 ret <8 x i32> %shuffle 1196} 1197 1198define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) { 1199; AVX1-LABEL: shuffle_v8i32_08192a3b: 1200; AVX1: # BB#0: 1201; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1202; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1203; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1204; AVX1-NEXT: retq 1205; 1206; AVX2-LABEL: shuffle_v8i32_08192a3b: 1207; AVX2: # BB#0: 1208; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3> 1209; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1210; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1211; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1212; AVX2-NEXT: retq 1213 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 1214 ret <8 x i32> %shuffle 1215} 1216 1217define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) { 1218; AVX1-LABEL: shuffle_v8i32_08991abb: 1219; AVX1: # BB#0: 1220; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0] 1221; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1] 1222; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1223; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] 1224; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1225; AVX1-NEXT: retq 1226; 1227; AVX2-LABEL: shuffle_v8i32_08991abb: 1228; AVX2: # BB#0: 1229; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 1230; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 1231; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3> 1232; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1233; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1234; AVX2-NEXT: retq 1235 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11> 1236 ret <8 x i32> %shuffle 1237} 1238 1239define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) { 1240; AVX1-LABEL: shuffle_v8i32_091b2d3f: 1241; AVX1: # BB#0: 1242; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,1,3] 1243; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3] 1244; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1245; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1246; AVX1-NEXT: retq 1247; 1248; AVX2-LABEL: shuffle_v8i32_091b2d3f: 1249; AVX2: # BB#0: 1250; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1251; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1252; AVX2-NEXT: retq 1253 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 1254 ret <8 x i32> %shuffle 1255} 1256 1257define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) { 1258; AVX1-LABEL: shuffle_v8i32_09ab1def: 1259; AVX1: # BB#0: 1260; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1261; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1262; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1263; AVX1-NEXT: retq 1264; 1265; AVX2-LABEL: shuffle_v8i32_09ab1def: 1266; AVX2: # BB#0: 1267; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 1268; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 1269; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1270; AVX2-NEXT: retq 1271 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 1272 ret <8 x i32> %shuffle 1273} 1274 1275define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) { 1276; AVX1-LABEL: shuffle_v8i32_00014445: 1277; AVX1: # BB#0: 1278; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 1279; AVX1-NEXT: retq 1280; 1281; AVX2-LABEL: shuffle_v8i32_00014445: 1282; AVX2: # BB#0: 1283; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 1284; AVX2-NEXT: retq 1285 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5> 1286 ret <8 x i32> %shuffle 1287} 1288 1289define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) { 1290; AVX1-LABEL: shuffle_v8i32_00204464: 1291; AVX1: # BB#0: 1292; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 1293; AVX1-NEXT: retq 1294; 1295; AVX2-LABEL: shuffle_v8i32_00204464: 1296; AVX2: # BB#0: 1297; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 1298; AVX2-NEXT: retq 1299 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4> 1300 ret <8 x i32> %shuffle 1301} 1302 1303define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) { 1304; AVX1-LABEL: shuffle_v8i32_03004744: 1305; AVX1: # BB#0: 1306; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 1307; AVX1-NEXT: retq 1308; 1309; AVX2-LABEL: shuffle_v8i32_03004744: 1310; AVX2: # BB#0: 1311; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 1312; AVX2-NEXT: retq 1313 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4> 1314 ret <8 x i32> %shuffle 1315} 1316 1317define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) { 1318; AVX1-LABEL: shuffle_v8i32_10005444: 1319; AVX1: # BB#0: 1320; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 1321; AVX1-NEXT: retq 1322; 1323; AVX2-LABEL: shuffle_v8i32_10005444: 1324; AVX2: # BB#0: 1325; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 1326; AVX2-NEXT: retq 1327 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4> 1328 ret <8 x i32> %shuffle 1329} 1330 1331define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) { 1332; AVX1-LABEL: shuffle_v8i32_22006644: 1333; AVX1: # BB#0: 1334; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 1335; AVX1-NEXT: retq 1336; 1337; AVX2-LABEL: shuffle_v8i32_22006644: 1338; AVX2: # BB#0: 1339; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 1340; AVX2-NEXT: retq 1341 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4> 1342 ret <8 x i32> %shuffle 1343} 1344 1345define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) { 1346; AVX1-LABEL: shuffle_v8i32_33307774: 1347; AVX1: # BB#0: 1348; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 1349; AVX1-NEXT: retq 1350; 1351; AVX2-LABEL: shuffle_v8i32_33307774: 1352; AVX2: # BB#0: 1353; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 1354; AVX2-NEXT: retq 1355 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4> 1356 ret <8 x i32> %shuffle 1357} 1358 1359define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) { 1360; AVX1-LABEL: shuffle_v8i32_32107654: 1361; AVX1: # BB#0: 1362; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1363; AVX1-NEXT: retq 1364; 1365; AVX2-LABEL: shuffle_v8i32_32107654: 1366; AVX2: # BB#0: 1367; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1368; AVX2-NEXT: retq 1369 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1370 ret <8 x i32> %shuffle 1371} 1372 1373define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) { 1374; AVX1-LABEL: shuffle_v8i32_00234467: 1375; AVX1: # BB#0: 1376; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 1377; AVX1-NEXT: retq 1378; 1379; AVX2-LABEL: shuffle_v8i32_00234467: 1380; AVX2: # BB#0: 1381; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 1382; AVX2-NEXT: retq 1383 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7> 1384 ret <8 x i32> %shuffle 1385} 1386 1387define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) { 1388; AVX1-LABEL: shuffle_v8i32_00224466: 1389; AVX1: # BB#0: 1390; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1391; AVX1-NEXT: retq 1392; 1393; AVX2-LABEL: shuffle_v8i32_00224466: 1394; AVX2: # BB#0: 1395; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1396; AVX2-NEXT: retq 1397 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 1398 ret <8 x i32> %shuffle 1399} 1400 1401define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) { 1402; AVX1-LABEL: shuffle_v8i32_10325476: 1403; AVX1: # BB#0: 1404; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1405; AVX1-NEXT: retq 1406; 1407; AVX2-LABEL: shuffle_v8i32_10325476: 1408; AVX2: # BB#0: 1409; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1410; AVX2-NEXT: retq 1411 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 1412 ret <8 x i32> %shuffle 1413} 1414 1415define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) { 1416; AVX1-LABEL: shuffle_v8i32_11335577: 1417; AVX1: # BB#0: 1418; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1419; AVX1-NEXT: retq 1420; 1421; AVX2-LABEL: shuffle_v8i32_11335577: 1422; AVX2: # BB#0: 1423; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1424; AVX2-NEXT: retq 1425 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 1426 ret <8 x i32> %shuffle 1427} 1428 1429define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) { 1430; AVX1-LABEL: shuffle_v8i32_10235467: 1431; AVX1: # BB#0: 1432; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 1433; AVX1-NEXT: retq 1434; 1435; AVX2-LABEL: shuffle_v8i32_10235467: 1436; AVX2: # BB#0: 1437; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 1438; AVX2-NEXT: retq 1439 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 1440 ret <8 x i32> %shuffle 1441} 1442 1443define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) { 1444; AVX1-LABEL: shuffle_v8i32_10225466: 1445; AVX1: # BB#0: 1446; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 1447; AVX1-NEXT: retq 1448; 1449; AVX2-LABEL: shuffle_v8i32_10225466: 1450; AVX2: # BB#0: 1451; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 1452; AVX2-NEXT: retq 1453 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6> 1454 ret <8 x i32> %shuffle 1455} 1456 1457define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) { 1458; AVX1-LABEL: shuffle_v8i32_00015444: 1459; AVX1: # BB#0: 1460; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4] 1461; AVX1-NEXT: retq 1462; 1463; AVX2-LABEL: shuffle_v8i32_00015444: 1464; AVX2: # BB#0: 1465; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4] 1466; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1467; AVX2-NEXT: retq 1468 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> 1469 ret <8 x i32> %shuffle 1470} 1471 1472define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) { 1473; AVX1-LABEL: shuffle_v8i32_00204644: 1474; AVX1: # BB#0: 1475; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4] 1476; AVX1-NEXT: retq 1477; 1478; AVX2-LABEL: shuffle_v8i32_00204644: 1479; AVX2: # BB#0: 1480; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4] 1481; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1482; AVX2-NEXT: retq 1483 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> 1484 ret <8 x i32> %shuffle 1485} 1486 1487define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) { 1488; AVX1-LABEL: shuffle_v8i32_03004474: 1489; AVX1: # BB#0: 1490; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4] 1491; AVX1-NEXT: retq 1492; 1493; AVX2-LABEL: shuffle_v8i32_03004474: 1494; AVX2: # BB#0: 1495; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4] 1496; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1497; AVX2-NEXT: retq 1498 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> 1499 ret <8 x i32> %shuffle 1500} 1501 1502define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) { 1503; AVX1-LABEL: shuffle_v8i32_10004444: 1504; AVX1: # BB#0: 1505; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4] 1506; AVX1-NEXT: retq 1507; 1508; AVX2-LABEL: shuffle_v8i32_10004444: 1509; AVX2: # BB#0: 1510; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4] 1511; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1512; AVX2-NEXT: retq 1513 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 1514 ret <8 x i32> %shuffle 1515} 1516 1517define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) { 1518; AVX1-LABEL: shuffle_v8i32_22006446: 1519; AVX1: # BB#0: 1520; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6] 1521; AVX1-NEXT: retq 1522; 1523; AVX2-LABEL: shuffle_v8i32_22006446: 1524; AVX2: # BB#0: 1525; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6] 1526; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1527; AVX2-NEXT: retq 1528 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> 1529 ret <8 x i32> %shuffle 1530} 1531 1532define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) { 1533; AVX1-LABEL: shuffle_v8i32_33307474: 1534; AVX1: # BB#0: 1535; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4] 1536; AVX1-NEXT: retq 1537; 1538; AVX2-LABEL: shuffle_v8i32_33307474: 1539; AVX2: # BB#0: 1540; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4] 1541; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1542; AVX2-NEXT: retq 1543 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> 1544 ret <8 x i32> %shuffle 1545} 1546 1547define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) { 1548; AVX1-LABEL: shuffle_v8i32_32104567: 1549; AVX1: # BB#0: 1550; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7] 1551; AVX1-NEXT: retq 1552; 1553; AVX2-LABEL: shuffle_v8i32_32104567: 1554; AVX2: # BB#0: 1555; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7] 1556; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1557; AVX2-NEXT: retq 1558 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 1559 ret <8 x i32> %shuffle 1560} 1561 1562define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) { 1563; AVX1-LABEL: shuffle_v8i32_00236744: 1564; AVX1: # BB#0: 1565; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4] 1566; AVX1-NEXT: retq 1567; 1568; AVX2-LABEL: shuffle_v8i32_00236744: 1569; AVX2: # BB#0: 1570; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4] 1571; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1572; AVX2-NEXT: retq 1573 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> 1574 ret <8 x i32> %shuffle 1575} 1576 1577define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) { 1578; AVX1-LABEL: shuffle_v8i32_00226644: 1579; AVX1: # BB#0: 1580; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4] 1581; AVX1-NEXT: retq 1582; 1583; AVX2-LABEL: shuffle_v8i32_00226644: 1584; AVX2: # BB#0: 1585; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4] 1586; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1587; AVX2-NEXT: retq 1588 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> 1589 ret <8 x i32> %shuffle 1590} 1591 1592define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) { 1593; AVX1-LABEL: shuffle_v8i32_10324567: 1594; AVX1: # BB#0: 1595; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7] 1596; AVX1-NEXT: retq 1597; 1598; AVX2-LABEL: shuffle_v8i32_10324567: 1599; AVX2: # BB#0: 1600; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7] 1601; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1602; AVX2-NEXT: retq 1603 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 1604 ret <8 x i32> %shuffle 1605} 1606 1607define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) { 1608; AVX1-LABEL: shuffle_v8i32_11334567: 1609; AVX1: # BB#0: 1610; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7] 1611; AVX1-NEXT: retq 1612; 1613; AVX2-LABEL: shuffle_v8i32_11334567: 1614; AVX2: # BB#0: 1615; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7] 1616; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1617; AVX2-NEXT: retq 1618 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> 1619 ret <8 x i32> %shuffle 1620} 1621 1622define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) { 1623; AVX1-LABEL: shuffle_v8i32_01235467: 1624; AVX1: # BB#0: 1625; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7] 1626; AVX1-NEXT: retq 1627; 1628; AVX2-LABEL: shuffle_v8i32_01235467: 1629; AVX2: # BB#0: 1630; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7] 1631; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1632; AVX2-NEXT: retq 1633 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 1634 ret <8 x i32> %shuffle 1635} 1636 1637define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) { 1638; AVX1-LABEL: shuffle_v8i32_01235466: 1639; AVX1: # BB#0: 1640; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6] 1641; AVX1-NEXT: retq 1642; 1643; AVX2-LABEL: shuffle_v8i32_01235466: 1644; AVX2: # BB#0: 1645; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6] 1646; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1647; AVX2-NEXT: retq 1648 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> 1649 ret <8 x i32> %shuffle 1650} 1651 1652define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) { 1653; AVX1-LABEL: shuffle_v8i32_002u6u44: 1654; AVX1: # BB#0: 1655; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4] 1656; AVX1-NEXT: retq 1657; 1658; AVX2-LABEL: shuffle_v8i32_002u6u44: 1659; AVX2: # BB#0: 1660; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4> 1661; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1662; AVX2-NEXT: retq 1663 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> 1664 ret <8 x i32> %shuffle 1665} 1666 1667define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) { 1668; AVX1-LABEL: shuffle_v8i32_00uu66uu: 1669; AVX1: # BB#0: 1670; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u] 1671; AVX1-NEXT: retq 1672; 1673; AVX2-LABEL: shuffle_v8i32_00uu66uu: 1674; AVX2: # BB#0: 1675; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u> 1676; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1677; AVX2-NEXT: retq 1678 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> 1679 ret <8 x i32> %shuffle 1680} 1681 1682define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) { 1683; AVX1-LABEL: shuffle_v8i32_103245uu: 1684; AVX1: # BB#0: 1685; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u] 1686; AVX1-NEXT: retq 1687; 1688; AVX2-LABEL: shuffle_v8i32_103245uu: 1689; AVX2: # BB#0: 1690; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u> 1691; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1692; AVX2-NEXT: retq 1693 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> 1694 ret <8 x i32> %shuffle 1695} 1696 1697define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) { 1698; AVX1-LABEL: shuffle_v8i32_1133uu67: 1699; AVX1: # BB#0: 1700; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7] 1701; AVX1-NEXT: retq 1702; 1703; AVX2-LABEL: shuffle_v8i32_1133uu67: 1704; AVX2: # BB#0: 1705; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7> 1706; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1707; AVX2-NEXT: retq 1708 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> 1709 ret <8 x i32> %shuffle 1710} 1711 1712define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) { 1713; AVX1-LABEL: shuffle_v8i32_0uu354uu: 1714; AVX1: # BB#0: 1715; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u] 1716; AVX1-NEXT: retq 1717; 1718; AVX2-LABEL: shuffle_v8i32_0uu354uu: 1719; AVX2: # BB#0: 1720; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u> 1721; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1722; AVX2-NEXT: retq 1723 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> 1724 ret <8 x i32> %shuffle 1725} 1726 1727define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) { 1728; AVX1-LABEL: shuffle_v8i32_uuu3uu66: 1729; AVX1: # BB#0: 1730; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6] 1731; AVX1-NEXT: retq 1732; 1733; AVX2-LABEL: shuffle_v8i32_uuu3uu66: 1734; AVX2: # BB#0: 1735; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6> 1736; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1737; AVX2-NEXT: retq 1738 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> 1739 ret <8 x i32> %shuffle 1740} 1741 1742define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) { 1743; AVX1-LABEL: shuffle_v8i32_6caa87e5: 1744; AVX1: # BB#0: 1745; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1746; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 1747; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 1748; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6] 1749; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1750; AVX1-NEXT: retq 1751; 1752; AVX2-LABEL: shuffle_v8i32_6caa87e5: 1753; AVX2: # BB#0: 1754; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2] 1755; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,2,4,4,6,6] 1756; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,1,0,3] 1757; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1758; AVX2-NEXT: retq 1759 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5> 1760 ret <8 x i32> %shuffle 1761} 1762 1763define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) { 1764; AVX1-LABEL: shuffle_v8i32_32103210: 1765; AVX1: # BB#0: 1766; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1767; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1768; AVX1-NEXT: retq 1769; 1770; AVX2-LABEL: shuffle_v8i32_32103210: 1771; AVX2: # BB#0: 1772; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 1773; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 1774; AVX2-NEXT: retq 1775 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0> 1776 ret <8 x i32> %shuffle 1777} 1778 1779define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) { 1780; AVX1-LABEL: shuffle_v8i32_76547654: 1781; AVX1: # BB#0: 1782; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1783; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 1784; AVX1-NEXT: retq 1785; 1786; AVX2-LABEL: shuffle_v8i32_76547654: 1787; AVX2: # BB#0: 1788; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1789; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 1790; AVX2-NEXT: retq 1791 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 1792 ret <8 x i32> %shuffle 1793} 1794 1795define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) { 1796; AVX1-LABEL: shuffle_v8i32_76543210: 1797; AVX1: # BB#0: 1798; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1799; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 1800; AVX1-NEXT: retq 1801; 1802; AVX2-LABEL: shuffle_v8i32_76543210: 1803; AVX2: # BB#0: 1804; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1805; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 1806; AVX2-NEXT: retq 1807 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1808 ret <8 x i32> %shuffle 1809} 1810 1811define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) { 1812; AVX1-LABEL: shuffle_v8i32_3210ba98: 1813; AVX1: # BB#0: 1814; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1815; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1816; AVX1-NEXT: retq 1817; 1818; AVX2-LABEL: shuffle_v8i32_3210ba98: 1819; AVX2: # BB#0: 1820; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1821; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1822; AVX2-NEXT: retq 1823 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8> 1824 ret <8 x i32> %shuffle 1825} 1826 1827define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) { 1828; AVX1-LABEL: shuffle_v8i32_3210fedc: 1829; AVX1: # BB#0: 1830; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 1831; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1832; AVX1-NEXT: retq 1833; 1834; AVX2-LABEL: shuffle_v8i32_3210fedc: 1835; AVX2: # BB#0: 1836; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1837; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1838; AVX2-NEXT: retq 1839 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12> 1840 ret <8 x i32> %shuffle 1841} 1842 1843define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) { 1844; AVX1-LABEL: shuffle_v8i32_7654fedc: 1845; AVX1: # BB#0: 1846; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1847; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1848; AVX1-NEXT: retq 1849; 1850; AVX2-LABEL: shuffle_v8i32_7654fedc: 1851; AVX2: # BB#0: 1852; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1853; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1854; AVX2-NEXT: retq 1855 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 1856 ret <8 x i32> %shuffle 1857} 1858 1859define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) { 1860; AVX1-LABEL: shuffle_v8i32_fedc7654: 1861; AVX1: # BB#0: 1862; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 1863; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1864; AVX1-NEXT: retq 1865; 1866; AVX2-LABEL: shuffle_v8i32_fedc7654: 1867; AVX2: # BB#0: 1868; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 1869; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1870; AVX2-NEXT: retq 1871 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> 1872 ret <8 x i32> %shuffle 1873} 1874 1875define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) { 1876; AVX1-LABEL: shuffle_v8i32_ba987654: 1877; AVX1: # BB#0: 1878; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 1879; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1880; AVX1-NEXT: retq 1881; 1882; AVX2-LABEL: shuffle_v8i32_ba987654: 1883; AVX2: # BB#0: 1884; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1885; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1886; AVX2-NEXT: retq 1887 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 1888 ret <8 x i32> %shuffle 1889} 1890 1891define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) { 1892; AVX1-LABEL: shuffle_v8i32_ba983210: 1893; AVX1: # BB#0: 1894; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 1895; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1896; AVX1-NEXT: retq 1897; 1898; AVX2-LABEL: shuffle_v8i32_ba983210: 1899; AVX2: # BB#0: 1900; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1901; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1902; AVX2-NEXT: retq 1903 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 1904 ret <8 x i32> %shuffle 1905} 1906 1907define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) { 1908; AVX1-LABEL: shuffle_v8i32_zuu8zuuc: 1909; AVX1: # BB#0: 1910; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 1911; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4] 1912; AVX1-NEXT: retq 1913; 1914; AVX2-LABEL: shuffle_v8i32_zuu8zuuc: 1915; AVX2: # BB#0: 1916; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19] 1917; AVX2-NEXT: retq 1918 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12> 1919 ret <8 x i32> %shuffle 1920} 1921 1922define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) { 1923; AVX1-LABEL: shuffle_v8i32_9ubzdefz: 1924; AVX1: # BB#0: 1925; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 1926; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4] 1927; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 1928; AVX1-NEXT: retq 1929; 1930; AVX2-LABEL: shuffle_v8i32_9ubzdefz: 1931; AVX2: # BB#0: 1932; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero 1933; AVX2-NEXT: retq 1934 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0> 1935 ret <8 x i32> %shuffle 1936} 1937 1938define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) { 1939; AVX1-LABEL: shuffle_v8i32_80u1b4uu: 1940; AVX1: # BB#0: 1941; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 1942; AVX1-NEXT: retq 1943; 1944; AVX2-LABEL: shuffle_v8i32_80u1b4uu: 1945; AVX2: # BB#0: 1946; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 1947; AVX2-NEXT: retq 1948 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef> 1949 ret <8 x i32> %shuffle 1950} 1951 1952define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) { 1953; AVX1-LABEL: shuffle_v8i32_uuuu1111: 1954; AVX1: # BB#0: 1955; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1956; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1957; AVX1-NEXT: retq 1958; 1959; AVX2-LABEL: shuffle_v8i32_uuuu1111: 1960; AVX2: # BB#0: 1961; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1962; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 1963; AVX2-NEXT: retq 1964 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1> 1965 ret <8 x i32> %shuffle 1966} 1967 1968define <8 x i32> @shuffle_v8i32_2222uuuu(<8 x i32> %a, <8 x i32> %b) { 1969; ALL-LABEL: shuffle_v8i32_2222uuuu: 1970; ALL: # BB#0: 1971; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2] 1972; ALL-NEXT: retq 1973 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef> 1974 ret <8 x i32> %shuffle 1975} 1976 1977define <8 x i32> @shuffle_v8i32_2A3Buuuu(<8 x i32> %a, <8 x i32> %b) { 1978; ALL-LABEL: shuffle_v8i32_2A3Buuuu: 1979; ALL: # BB#0: 1980; ALL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1981; ALL-NEXT: retq 1982 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 undef, i32 undef, i32 undef, i32 undef> 1983 ret <8 x i32> %shuffle 1984} 1985 1986define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) { 1987; AVX1-LABEL: shuffle_v8i32_44444444: 1988; AVX1: # BB#0: 1989; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 1990; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 1991; AVX1-NEXT: retq 1992; 1993; AVX2-LABEL: shuffle_v8i32_44444444: 1994; AVX2: # BB#0: 1995; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 1996; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 1997; AVX2-NEXT: retq 1998 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 1999 ret <8 x i32> %shuffle 2000} 2001 2002define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) { 2003; AVX1-LABEL: shuffle_v8i32_5555uuuu: 2004; AVX1: # BB#0: 2005; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2006; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 2007; AVX1-NEXT: retq 2008; 2009; AVX2-LABEL: shuffle_v8i32_5555uuuu: 2010; AVX2: # BB#0: 2011; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 2012; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 2013; AVX2-NEXT: retq 2014 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef> 2015 ret <8 x i32> %shuffle 2016} 2017 2018define <8 x float> @splat_mem_v8f32_2(float* %p) { 2019; ALL-LABEL: splat_mem_v8f32_2: 2020; ALL: # BB#0: 2021; ALL-NEXT: vbroadcastss (%rdi), %ymm0 2022; ALL-NEXT: retq 2023 %1 = load float, float* %p 2024 %2 = insertelement <4 x float> undef, float %1, i32 0 2025 %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer 2026 ret <8 x float> %3 2027} 2028 2029define <8 x float> @splat_v8f32(<4 x float> %r) { 2030; AVX1-LABEL: splat_v8f32: 2031; AVX1: # BB#0: 2032; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 2033; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2034; AVX1-NEXT: retq 2035; 2036; AVX2-LABEL: splat_v8f32: 2037; AVX2: # BB#0: 2038; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 2039; AVX2-NEXT: retq 2040 %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer 2041 ret <8 x float> %1 2042} 2043 2044; 2045; Shuffle to logical bit shifts 2046; 2047 2048define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) { 2049; AVX1-LABEL: shuffle_v8i32_z0U2zUz6: 2050; AVX1: # BB#0: 2051; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 2052; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 2053; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5] 2054; AVX1-NEXT: retq 2055; 2056; AVX2-LABEL: shuffle_v8i32_z0U2zUz6: 2057; AVX2: # BB#0: 2058; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0 2059; AVX2-NEXT: retq 2060 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6> 2061 ret <8 x i32> %shuffle 2062} 2063 2064define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) { 2065; AVX1-LABEL: shuffle_v8i32_1U3z5zUU: 2066; AVX1: # BB#0: 2067; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 2068; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 2069; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 2070; AVX1-NEXT: retq 2071; 2072; AVX2-LABEL: shuffle_v8i32_1U3z5zUU: 2073; AVX2: # BB#0: 2074; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 2075; AVX2-NEXT: retq 2076 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef> 2077 ret <8 x i32> %shuffle 2078} 2079 2080define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) { 2081; AVX1-LABEL: shuffle_v8i32_B012F456: 2082; AVX1: # BB#0: 2083; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4] 2084; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6] 2085; AVX1-NEXT: retq 2086; 2087; AVX2-LABEL: shuffle_v8i32_B012F456: 2088; AVX2: # BB#0: 2089; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27] 2090; AVX2-NEXT: retq 2091 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6> 2092 ret <8 x i32> %shuffle 2093} 2094 2095define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) { 2096; AVX1-LABEL: shuffle_v8i32_1238567C: 2097; AVX1: # BB#0: 2098; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4] 2099; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 2100; AVX1-NEXT: retq 2101; 2102; AVX2-LABEL: shuffle_v8i32_1238567C: 2103; AVX2: # BB#0: 2104; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19] 2105; AVX2-NEXT: retq 2106 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12> 2107 ret <8 x i32> %shuffle 2108} 2109 2110define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) { 2111; AVX1-LABEL: shuffle_v8i32_9AB0DEF4: 2112; AVX1: # BB#0: 2113; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4] 2114; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4] 2115; AVX1-NEXT: retq 2116; 2117; AVX2-LABEL: shuffle_v8i32_9AB0DEF4: 2118; AVX2: # BB#0: 2119; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19] 2120; AVX2-NEXT: retq 2121 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4> 2122 ret <8 x i32> %shuffle 2123} 2124 2125define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) { 2126; AVX1-LABEL: shuffle_v8i32_389A7CDE: 2127; AVX1: # BB#0: 2128; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4] 2129; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6] 2130; AVX1-NEXT: retq 2131; 2132; AVX2-LABEL: shuffle_v8i32_389A7CDE: 2133; AVX2: # BB#0: 2134; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27] 2135; AVX2-NEXT: retq 2136 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14> 2137 ret <8 x i32> %shuffle 2138} 2139 2140define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) { 2141; AVX1-LABEL: shuffle_v8i32_30127456: 2142; AVX1: # BB#0: 2143; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6] 2144; AVX1-NEXT: retq 2145; 2146; AVX2-LABEL: shuffle_v8i32_30127456: 2147; AVX2: # BB#0: 2148; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6] 2149; AVX2-NEXT: retq 2150 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6> 2151 ret <8 x i32> %shuffle 2152} 2153 2154define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) { 2155; AVX1-LABEL: shuffle_v8i32_12305674: 2156; AVX1: # BB#0: 2157; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4] 2158; AVX1-NEXT: retq 2159; 2160; AVX2-LABEL: shuffle_v8i32_12305674: 2161; AVX2: # BB#0: 2162; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4] 2163; AVX2-NEXT: retq 2164 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4> 2165 ret <8 x i32> %shuffle 2166} 2167 2168define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2169; ALL-LABEL: concat_v2f32_1: 2170; ALL: # BB#0: # %entry 2171; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2172; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 2173; ALL-NEXT: retq 2174entry: 2175 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2176 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2177 %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2178 %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2179 %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef> 2180 ret <8 x float> %tmp76 2181} 2182 2183define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2184; ALL-LABEL: concat_v2f32_2: 2185; ALL: # BB#0: # %entry 2186; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2187; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 2188; ALL-NEXT: retq 2189entry: 2190 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2191 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2192 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 2193 ret <8 x float> %tmp76 2194} 2195 2196define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2197; ALL-LABEL: concat_v2f32_3: 2198; ALL: # BB#0: # %entry 2199; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2200; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 2201; ALL-NEXT: retq 2202entry: 2203 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2204 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2205 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2206 %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 2207 ret <8 x float> %res 2208} 2209 2210define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) { 2211; ALL-LABEL: insert_mem_and_zero_v8i32: 2212; ALL: # BB#0: 2213; ALL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2214; ALL-NEXT: retq 2215 %a = load i32, i32* %ptr 2216 %v = insertelement <8 x i32> undef, i32 %a, i32 0 2217 %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2218 ret <8 x i32> %shuffle 2219} 2220 2221define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) { 2222; AVX1-LABEL: concat_v8i32_0123CDEF: 2223; AVX1: # BB#0: 2224; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 2225; AVX1-NEXT: retq 2226; 2227; AVX2-LABEL: concat_v8i32_0123CDEF: 2228; AVX2: # BB#0: 2229; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 2230; AVX2-NEXT: retq 2231 %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2232 %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2233 %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2234 ret <8 x i32> %shuf 2235} 2236 2237define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) { 2238; ALL-LABEL: concat_v8i32_4567CDEF_bc: 2239; ALL: # BB#0: 2240; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2241; ALL-NEXT: retq 2242 %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2243 %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15> 2244 %bc0hi = bitcast <4 x i32> %a0hi to <2 x i64> 2245 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64> 2246 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2247 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32> 2248 ret <8 x i32> %shuffle32 2249} 2250 2251define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) { 2252; ALL-LABEL: concat_v8f32_4567CDEF_bc: 2253; ALL: # BB#0: 2254; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2255; ALL-NEXT: retq 2256 %a0 = bitcast <8 x float> %f0 to <4 x i64> 2257 %a1 = bitcast <8 x float> %f1 to <8 x i32> 2258 %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 2259 %a1hi = shufflevector <8 x i32> %a1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2260 %bc0hi = bitcast <2 x i64> %a0hi to <2 x i64> 2261 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64> 2262 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2263 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x float> 2264 ret <8 x float> %shuffle32 2265} 2266 2267define <8 x i32> @insert_dup_mem_v8i32(i32* %ptr) { 2268; ALL-LABEL: insert_dup_mem_v8i32: 2269; ALL: # BB#0: 2270; ALL-NEXT: vbroadcastss (%rdi), %ymm0 2271; ALL-NEXT: retq 2272 %tmp = load i32, i32* %ptr, align 4 2273 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2274 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer 2275 ret <8 x i32> %tmp2 2276} 2277