1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST 7 8define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) { 9; AVX1-LABEL: shuffle_v8f32_00000000: 10; AVX1: # %bb.0: 11; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 12; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 13; AVX1-NEXT: retq 14; 15; AVX2OR512VL-LABEL: shuffle_v8f32_00000000: 16; AVX2OR512VL: # %bb.0: 17; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0 18; AVX2OR512VL-NEXT: retq 19 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20 ret <8 x float> %shuffle 21} 22 23define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) { 24; AVX1-LABEL: shuffle_v8f32_00000010: 25; AVX1: # %bb.0: 26; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 27; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 28; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 29; AVX1-NEXT: retq 30; 31; AVX2OR512VL-LABEL: shuffle_v8f32_00000010: 32; AVX2OR512VL: # %bb.0: 33; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 34; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 35; AVX2OR512VL-NEXT: retq 36 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 37 ret <8 x float> %shuffle 38} 39 40define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) { 41; AVX1-LABEL: shuffle_v8f32_00000200: 42; AVX1: # %bb.0: 43; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 44; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0] 45; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 46; AVX1-NEXT: retq 47; 48; AVX2OR512VL-LABEL: shuffle_v8f32_00000200: 49; AVX2OR512VL: # %bb.0: 50; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,2] 51; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0] 52; AVX2OR512VL-NEXT: retq 53 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 54 ret <8 x float> %shuffle 55} 56 57define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) { 58; AVX1-LABEL: shuffle_v8f32_00003000: 59; AVX1: # %bb.0: 60; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 61; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0] 62; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 63; AVX1-NEXT: retq 64; 65; AVX2OR512VL-LABEL: shuffle_v8f32_00003000: 66; AVX2OR512VL: # %bb.0: 67; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,3,0] 68; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0] 69; AVX2OR512VL-NEXT: retq 70 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 71 ret <8 x float> %shuffle 72} 73 74define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) { 75; AVX1-LABEL: shuffle_v8f32_00040000: 76; AVX1: # %bb.0: 77; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3] 78; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 79; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 80; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 81; AVX1-NEXT: retq 82; 83; AVX2OR512VL-LABEL: shuffle_v8f32_00040000: 84; AVX2OR512VL: # %bb.0: 85; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] 86; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 87; AVX2OR512VL-NEXT: retq 88 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 89 ret <8 x float> %shuffle 90} 91 92define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) { 93; AVX1-LABEL: shuffle_v8f32_00500000: 94; AVX1: # %bb.0: 95; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 96; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 97; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4] 98; AVX1-NEXT: retq 99; 100; AVX2OR512VL-LABEL: shuffle_v8f32_00500000: 101; AVX2OR512VL: # %bb.0: 102; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] 103; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 104; AVX2OR512VL-NEXT: retq 105 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 106 ret <8 x float> %shuffle 107} 108 109define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) { 110; AVX1-LABEL: shuffle_v8f32_06000000: 111; AVX1: # %bb.0: 112; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 113; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 114; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] 115; AVX1-NEXT: retq 116; 117; AVX2OR512VL-LABEL: shuffle_v8f32_06000000: 118; AVX2OR512VL: # %bb.0: 119; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] 120; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 121; AVX2OR512VL-NEXT: retq 122 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 123 ret <8 x float> %shuffle 124} 125 126define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) { 127; AVX1-LABEL: shuffle_v8f32_70000000: 128; AVX1: # %bb.0: 129; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 130; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 131; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] 132; AVX1-NEXT: retq 133; 134; AVX2OR512VL-LABEL: shuffle_v8f32_70000000: 135; AVX2OR512VL: # %bb.0: 136; AVX2OR512VL-NEXT: movl $7, %eax 137; AVX2OR512VL-NEXT: vmovd %eax, %xmm1 138; AVX2OR512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 139; AVX2OR512VL-NEXT: retq 140 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 141 ret <8 x float> %shuffle 142} 143 144define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) { 145; ALL-LABEL: shuffle_v8f32_01014545: 146; ALL: # %bb.0: 147; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 148; ALL-NEXT: retq 149 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 150 ret <8 x float> %shuffle 151} 152 153define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) { 154; AVX1-LABEL: shuffle_v8f32_00112233: 155; AVX1: # %bb.0: 156; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1] 157; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 158; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 159; AVX1-NEXT: retq 160; 161; AVX2OR512VL-LABEL: shuffle_v8f32_00112233: 162; AVX2OR512VL: # %bb.0: 163; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] 164; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 165; AVX2OR512VL-NEXT: retq 166 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 167 ret <8 x float> %shuffle 168} 169 170define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) { 171; AVX1-LABEL: shuffle_v8f32_00001111: 172; AVX1: # %bb.0: 173; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 174; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 175; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 176; AVX1-NEXT: retq 177; 178; AVX2OR512VL-LABEL: shuffle_v8f32_00001111: 179; AVX2OR512VL: # %bb.0: 180; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1] 181; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1] 182; AVX2OR512VL-NEXT: retq 183 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 184 ret <8 x float> %shuffle 185} 186 187define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) { 188; ALL-LABEL: shuffle_v8f32_81a3c5e7: 189; ALL: # %bb.0: 190; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 191; ALL-NEXT: retq 192 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 193 ret <8 x float> %shuffle 194} 195 196define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) { 197; AVX1-LABEL: shuffle_v8f32_08080808: 198; AVX1: # %bb.0: 199; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 200; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3] 201; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 202; AVX1-NEXT: retq 203; 204; AVX2OR512VL-LABEL: shuffle_v8f32_08080808: 205; AVX2OR512VL: # %bb.0: 206; AVX2OR512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 207; AVX2OR512VL-NEXT: vbroadcastsd %xmm0, %ymm0 208; AVX2OR512VL-NEXT: retq 209 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8> 210 ret <8 x float> %shuffle 211} 212 213define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) { 214; ALL-LABEL: shuffle_v8f32_08084c4c: 215; ALL: # %bb.0: 216; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 217; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 218; ALL-NEXT: retq 219 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12> 220 ret <8 x float> %shuffle 221} 222 223define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) { 224; ALL-LABEL: shuffle_v8f32_8823cc67: 225; ALL: # %bb.0: 226; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7] 227; ALL-NEXT: retq 228 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7> 229 ret <8 x float> %shuffle 230} 231 232define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) { 233; ALL-LABEL: shuffle_v8f32_9832dc76: 234; ALL: # %bb.0: 235; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6] 236; ALL-NEXT: retq 237 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6> 238 ret <8 x float> %shuffle 239} 240 241define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) { 242; ALL-LABEL: shuffle_v8f32_9810dc54: 243; ALL: # %bb.0: 244; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4] 245; ALL-NEXT: retq 246 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4> 247 ret <8 x float> %shuffle 248} 249 250define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) { 251; ALL-LABEL: shuffle_v8f32_08194c5d: 252; ALL: # %bb.0: 253; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 254; ALL-NEXT: retq 255 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 256 ret <8 x float> %shuffle 257} 258 259define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) { 260; ALL-LABEL: shuffle_v8f32_2a3b6e7f: 261; ALL: # %bb.0: 262; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 263; ALL-NEXT: retq 264 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 265 ret <8 x float> %shuffle 266} 267 268define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) { 269; AVX1OR2-LABEL: shuffle_v8f32_08192a3b: 270; AVX1OR2: # %bb.0: 271; AVX1OR2-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 272; AVX1OR2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 273; AVX1OR2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 274; AVX1OR2-NEXT: retq 275; 276; AVX512VL-LABEL: shuffle_v8f32_08192a3b: 277; AVX512VL: # %bb.0: 278; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [0,8,1,9,2,10,3,11] 279; AVX512VL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0 280; AVX512VL-NEXT: retq 281 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 282 ret <8 x float> %shuffle 283} 284 285define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) { 286; AVX1-LABEL: shuffle_v8f32_08991abb: 287; AVX1: # %bb.0: 288; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0] 289; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1] 290; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 291; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] 292; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 293; AVX1-NEXT: retq 294; 295; AVX2-LABEL: shuffle_v8f32_08991abb: 296; AVX2: # %bb.0: 297; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3> 298; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 299; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] 300; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 301; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 302; AVX2-NEXT: retq 303; 304; AVX512VL-LABEL: shuffle_v8f32_08991abb: 305; AVX512VL: # %bb.0: 306; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 307; AVX512VL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3] 308; AVX512VL-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0 309; AVX512VL-NEXT: retq 310 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11> 311 ret <8 x float> %shuffle 312} 313 314define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) { 315; AVX1-LABEL: shuffle_v8f32_091b2d3f: 316; AVX1: # %bb.0: 317; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 318; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] 319; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 320; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 321; AVX1-NEXT: retq 322; 323; AVX2-LABEL: shuffle_v8f32_091b2d3f: 324; AVX2: # %bb.0: 325; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u> 326; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 327; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 328; AVX2-NEXT: retq 329; 330; AVX512VL-LABEL: shuffle_v8f32_091b2d3f: 331; AVX512VL: # %bb.0: 332; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [0,9,1,11,2,13,3,15] 333; AVX512VL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0 334; AVX512VL-NEXT: retq 335 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 336 ret <8 x float> %shuffle 337} 338 339define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) { 340; AVX1-LABEL: shuffle_v8f32_09ab1def: 341; AVX1: # %bb.0: 342; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 343; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 344; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 345; AVX1-NEXT: retq 346; 347; AVX2-LABEL: shuffle_v8f32_09ab1def: 348; AVX2: # %bb.0: 349; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] 350; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 351; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 352; AVX2-NEXT: retq 353; 354; AVX512VL-SLOW-LABEL: shuffle_v8f32_09ab1def: 355; AVX512VL-SLOW: # %bb.0: 356; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] 357; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 358; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 359; AVX512VL-SLOW-NEXT: retq 360; 361; AVX512VL-FAST-LABEL: shuffle_v8f32_09ab1def: 362; AVX512VL-FAST: # %bb.0: 363; AVX512VL-FAST-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 364; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm0 = [8,1,2,3,10,5,6,7] 365; AVX512VL-FAST-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0 366; AVX512VL-FAST-NEXT: retq 367 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 368 ret <8 x float> %shuffle 369} 370 371define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) { 372; ALL-LABEL: shuffle_v8f32_00014445: 373; ALL: # %bb.0: 374; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 375; ALL-NEXT: retq 376 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5> 377 ret <8 x float> %shuffle 378} 379 380define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) { 381; ALL-LABEL: shuffle_v8f32_00204464: 382; ALL: # %bb.0: 383; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 384; ALL-NEXT: retq 385 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4> 386 ret <8 x float> %shuffle 387} 388 389define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) { 390; ALL-LABEL: shuffle_v8f32_03004744: 391; ALL: # %bb.0: 392; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 393; ALL-NEXT: retq 394 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4> 395 ret <8 x float> %shuffle 396} 397 398define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) { 399; ALL-LABEL: shuffle_v8f32_10005444: 400; ALL: # %bb.0: 401; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 402; ALL-NEXT: retq 403 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4> 404 ret <8 x float> %shuffle 405} 406 407define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) { 408; ALL-LABEL: shuffle_v8f32_22006644: 409; ALL: # %bb.0: 410; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 411; ALL-NEXT: retq 412 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4> 413 ret <8 x float> %shuffle 414} 415 416define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) { 417; ALL-LABEL: shuffle_v8f32_33307774: 418; ALL: # %bb.0: 419; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 420; ALL-NEXT: retq 421 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4> 422 ret <8 x float> %shuffle 423} 424 425define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) { 426; ALL-LABEL: shuffle_v8f32_32107654: 427; ALL: # %bb.0: 428; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 429; ALL-NEXT: retq 430 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 431 ret <8 x float> %shuffle 432} 433 434define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) { 435; ALL-LABEL: shuffle_v8f32_00234467: 436; ALL: # %bb.0: 437; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 438; ALL-NEXT: retq 439 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7> 440 ret <8 x float> %shuffle 441} 442 443define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) { 444; ALL-LABEL: shuffle_v8f32_00224466: 445; ALL: # %bb.0: 446; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 447; ALL-NEXT: retq 448 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 449 ret <8 x float> %shuffle 450} 451 452define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) { 453; ALL-LABEL: shuffle_v8f32_10325476: 454; ALL: # %bb.0: 455; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 456; ALL-NEXT: retq 457 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 458 ret <8 x float> %shuffle 459} 460 461define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) { 462; ALL-LABEL: shuffle_v8f32_11335577: 463; ALL: # %bb.0: 464; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 465; ALL-NEXT: retq 466 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 467 ret <8 x float> %shuffle 468} 469 470define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) { 471; ALL-LABEL: shuffle_v8f32_10235467: 472; ALL: # %bb.0: 473; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 474; ALL-NEXT: retq 475 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 476 ret <8 x float> %shuffle 477} 478 479define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) { 480; ALL-LABEL: shuffle_v8f32_10225466: 481; ALL: # %bb.0: 482; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 483; ALL-NEXT: retq 484 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6> 485 ret <8 x float> %shuffle 486} 487 488define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) { 489; ALL-LABEL: shuffle_v8f32_00015444: 490; ALL: # %bb.0: 491; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4] 492; ALL-NEXT: retq 493 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> 494 ret <8 x float> %shuffle 495} 496 497define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) { 498; ALL-LABEL: shuffle_v8f32_00204644: 499; ALL: # %bb.0: 500; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4] 501; ALL-NEXT: retq 502 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> 503 ret <8 x float> %shuffle 504} 505 506define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) { 507; ALL-LABEL: shuffle_v8f32_03004474: 508; ALL: # %bb.0: 509; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4] 510; ALL-NEXT: retq 511 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> 512 ret <8 x float> %shuffle 513} 514 515define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) { 516; ALL-LABEL: shuffle_v8f32_10004444: 517; ALL: # %bb.0: 518; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4] 519; ALL-NEXT: retq 520 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 521 ret <8 x float> %shuffle 522} 523 524define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) { 525; ALL-LABEL: shuffle_v8f32_22006446: 526; ALL: # %bb.0: 527; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6] 528; ALL-NEXT: retq 529 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> 530 ret <8 x float> %shuffle 531} 532 533define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) { 534; ALL-LABEL: shuffle_v8f32_33307474: 535; ALL: # %bb.0: 536; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4] 537; ALL-NEXT: retq 538 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> 539 ret <8 x float> %shuffle 540} 541 542define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) { 543; ALL-LABEL: shuffle_v8f32_32104567: 544; ALL: # %bb.0: 545; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7] 546; ALL-NEXT: retq 547 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 548 ret <8 x float> %shuffle 549} 550 551define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) { 552; ALL-LABEL: shuffle_v8f32_00236744: 553; ALL: # %bb.0: 554; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4] 555; ALL-NEXT: retq 556 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> 557 ret <8 x float> %shuffle 558} 559 560define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) { 561; ALL-LABEL: shuffle_v8f32_00226644: 562; ALL: # %bb.0: 563; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4] 564; ALL-NEXT: retq 565 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> 566 ret <8 x float> %shuffle 567} 568 569define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) { 570; ALL-LABEL: shuffle_v8f32_10324567: 571; ALL: # %bb.0: 572; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7] 573; ALL-NEXT: retq 574 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 575 ret <8 x float> %shuffle 576} 577 578define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) { 579; ALL-LABEL: shuffle_v8f32_11334567: 580; ALL: # %bb.0: 581; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7] 582; ALL-NEXT: retq 583 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> 584 ret <8 x float> %shuffle 585} 586 587define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) { 588; ALL-LABEL: shuffle_v8f32_01235467: 589; ALL: # %bb.0: 590; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7] 591; ALL-NEXT: retq 592 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 593 ret <8 x float> %shuffle 594} 595 596define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) { 597; ALL-LABEL: shuffle_v8f32_01235466: 598; ALL: # %bb.0: 599; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6] 600; ALL-NEXT: retq 601 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> 602 ret <8 x float> %shuffle 603} 604 605define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) { 606; ALL-LABEL: shuffle_v8f32_002u6u44: 607; ALL: # %bb.0: 608; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4] 609; ALL-NEXT: retq 610 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> 611 ret <8 x float> %shuffle 612} 613 614define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) { 615; ALL-LABEL: shuffle_v8f32_00uu66uu: 616; ALL: # %bb.0: 617; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u] 618; ALL-NEXT: retq 619 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> 620 ret <8 x float> %shuffle 621} 622 623define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) { 624; ALL-LABEL: shuffle_v8f32_103245uu: 625; ALL: # %bb.0: 626; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u] 627; ALL-NEXT: retq 628 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> 629 ret <8 x float> %shuffle 630} 631 632define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) { 633; ALL-LABEL: shuffle_v8f32_1133uu67: 634; ALL: # %bb.0: 635; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7] 636; ALL-NEXT: retq 637 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> 638 ret <8 x float> %shuffle 639} 640 641define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) { 642; ALL-LABEL: shuffle_v8f32_0uu354uu: 643; ALL: # %bb.0: 644; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u] 645; ALL-NEXT: retq 646 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> 647 ret <8 x float> %shuffle 648} 649 650define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) { 651; ALL-LABEL: shuffle_v8f32_uuu3uu66: 652; ALL: # %bb.0: 653; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6] 654; ALL-NEXT: retq 655 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> 656 ret <8 x float> %shuffle 657} 658 659define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) { 660; AVX1-LABEL: shuffle_v8f32_c348cda0: 661; AVX1: # %bb.0: 662; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 663; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4] 664; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 665; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4] 666; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2,3,4,5],ymm2[6,7] 667; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 668; AVX1-NEXT: retq 669; 670; AVX2-SLOW-LABEL: shuffle_v8f32_c348cda0: 671; AVX2-SLOW: # %bb.0: 672; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u> 673; AVX2-SLOW-NEXT: vpermps %ymm1, %ymm2, %ymm1 674; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,2,0,4,7,6,4] 675; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,1] 676; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 677; AVX2-SLOW-NEXT: retq 678; 679; AVX2-FAST-LABEL: shuffle_v8f32_c348cda0: 680; AVX2-FAST: # %bb.0: 681; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [0,3,4,7,4,7,2,0] 682; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0 683; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u> 684; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1 685; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 686; AVX2-FAST-NEXT: retq 687; 688; AVX512VL-LABEL: shuffle_v8f32_c348cda0: 689; AVX512VL: # %bb.0: 690; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [4,11,12,0,4,5,2,8] 691; AVX512VL-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2 692; AVX512VL-NEXT: vmovaps %ymm2, %ymm0 693; AVX512VL-NEXT: retq 694 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0> 695 ret <8 x float> %shuffle 696} 697 698define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) { 699; AVX1-LABEL: shuffle_v8f32_f511235a: 700; AVX1: # %bb.0: 701; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6] 702; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] 703; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 704; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5] 705; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[3],ymm0[3] 706; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 707; AVX1-NEXT: retq 708; 709; AVX2-SLOW-LABEL: shuffle_v8f32_f511235a: 710; AVX2-SLOW: # %bb.0: 711; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,2,2,3,7,6,6,7] 712; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,1,2,0] 713; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,2,3,5,5,6,7] 714; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,1,2] 715; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 716; AVX2-SLOW-NEXT: retq 717; 718; AVX2-FAST-LABEL: shuffle_v8f32_f511235a: 719; AVX2-FAST: # %bb.0: 720; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,2,3,7,6,3,2] 721; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1 722; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [5,5,1,1,2,3,5,5] 723; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0 724; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 725; AVX2-FAST-NEXT: retq 726; 727; AVX512VL-LABEL: shuffle_v8f32_f511235a: 728; AVX512VL: # %bb.0: 729; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [15,5,1,1,2,3,5,10] 730; AVX512VL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0 731; AVX512VL-NEXT: retq 732 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10> 733 ret <8 x float> %shuffle 734} 735 736define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) { 737; AVX1-LABEL: shuffle_v8f32_32103210: 738; AVX1: # %bb.0: 739; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 740; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 741; AVX1-NEXT: retq 742; 743; AVX2OR512VL-LABEL: shuffle_v8f32_32103210: 744; AVX2OR512VL: # %bb.0: 745; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 746; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1] 747; AVX2OR512VL-NEXT: retq 748 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0> 749 ret <8 x float> %shuffle 750} 751 752define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) { 753; AVX1-LABEL: shuffle_v8f32_76547654: 754; AVX1: # %bb.0: 755; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 756; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 757; AVX1-NEXT: retq 758; 759; AVX2-SLOW-LABEL: shuffle_v8f32_76547654: 760; AVX2-SLOW: # %bb.0: 761; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 762; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3] 763; AVX2-SLOW-NEXT: retq 764; 765; AVX2-FAST-LABEL: shuffle_v8f32_76547654: 766; AVX2-FAST: # %bb.0: 767; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 768; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 769; AVX2-FAST-NEXT: retq 770; 771; AVX512VL-SLOW-LABEL: shuffle_v8f32_76547654: 772; AVX512VL-SLOW: # %bb.0: 773; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 774; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3] 775; AVX512VL-SLOW-NEXT: retq 776; 777; AVX512VL-FAST-LABEL: shuffle_v8f32_76547654: 778; AVX512VL-FAST: # %bb.0: 779; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 780; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 781; AVX512VL-FAST-NEXT: retq 782 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 783 ret <8 x float> %shuffle 784} 785 786define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) { 787; AVX1-LABEL: shuffle_v8f32_76543210: 788; AVX1: # %bb.0: 789; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 790; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 791; AVX1-NEXT: retq 792; 793; AVX2-SLOW-LABEL: shuffle_v8f32_76543210: 794; AVX2-SLOW: # %bb.0: 795; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 796; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 797; AVX2-SLOW-NEXT: retq 798; 799; AVX2-FAST-LABEL: shuffle_v8f32_76543210: 800; AVX2-FAST: # %bb.0: 801; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 802; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 803; AVX2-FAST-NEXT: retq 804; 805; AVX512VL-SLOW-LABEL: shuffle_v8f32_76543210: 806; AVX512VL-SLOW: # %bb.0: 807; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 808; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 809; AVX512VL-SLOW-NEXT: retq 810; 811; AVX512VL-FAST-LABEL: shuffle_v8f32_76543210: 812; AVX512VL-FAST: # %bb.0: 813; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 814; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 815; AVX512VL-FAST-NEXT: retq 816 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 817 ret <8 x float> %shuffle 818} 819 820define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) { 821; ALL-LABEL: shuffle_v8f32_3210ba98: 822; ALL: # %bb.0: 823; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 824; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 825; ALL-NEXT: retq 826 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8> 827 ret <8 x float> %shuffle 828} 829 830define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) { 831; ALL-LABEL: shuffle_v8f32_3210fedc: 832; ALL: # %bb.0: 833; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 834; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 835; ALL-NEXT: retq 836 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12> 837 ret <8 x float> %shuffle 838} 839 840define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) { 841; AVX1OR2-LABEL: shuffle_v8f32_7654fedc: 842; AVX1OR2: # %bb.0: 843; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 844; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 845; AVX1OR2-NEXT: retq 846; 847; AVX512VL-SLOW-LABEL: shuffle_v8f32_7654fedc: 848; AVX512VL-SLOW: # %bb.0: 849; AVX512VL-SLOW-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 850; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 851; AVX512VL-SLOW-NEXT: retq 852; 853; AVX512VL-FAST-LABEL: shuffle_v8f32_7654fedc: 854; AVX512VL-FAST: # %bb.0: 855; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12] 856; AVX512VL-FAST-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0 857; AVX512VL-FAST-NEXT: retq 858 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 859 ret <8 x float> %shuffle 860} 861 862define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) { 863; AVX1OR2-LABEL: shuffle_v8f32_fedc7654: 864; AVX1OR2: # %bb.0: 865; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 866; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 867; AVX1OR2-NEXT: retq 868; 869; AVX512VL-SLOW-LABEL: shuffle_v8f32_fedc7654: 870; AVX512VL-SLOW: # %bb.0: 871; AVX512VL-SLOW-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 872; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 873; AVX512VL-SLOW-NEXT: retq 874; 875; AVX512VL-FAST-LABEL: shuffle_v8f32_fedc7654: 876; AVX512VL-FAST: # %bb.0: 877; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12] 878; AVX512VL-FAST-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2 879; AVX512VL-FAST-NEXT: vmovaps %ymm2, %ymm0 880; AVX512VL-FAST-NEXT: retq 881 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> 882 ret <8 x float> %shuffle 883} 884 885define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) { 886; AVX1-LABEL: PR21138: 887; AVX1: # %bb.0: 888; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 889; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] 890; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 891; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 892; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 893; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 894; AVX1-NEXT: retq 895; 896; AVX2-LABEL: PR21138: 897; AVX2: # %bb.0: 898; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 899; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 900; AVX2-NEXT: retq 901; 902; AVX512VL-SLOW-LABEL: PR21138: 903; AVX512VL-SLOW: # %bb.0: 904; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 905; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 906; AVX512VL-SLOW-NEXT: retq 907; 908; AVX512VL-FAST-LABEL: PR21138: 909; AVX512VL-FAST: # %bb.0: 910; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,5,7,9,11,13,15] 911; AVX512VL-FAST-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0 912; AVX512VL-FAST-NEXT: retq 913 %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 914 ret <8 x float> %shuffle 915} 916 917define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) { 918; ALL-LABEL: shuffle_v8f32_ba987654: 919; ALL: # %bb.0: 920; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 921; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 922; ALL-NEXT: retq 923 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 924 ret <8 x float> %shuffle 925} 926 927define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) { 928; ALL-LABEL: shuffle_v8f32_ba983210: 929; ALL: # %bb.0: 930; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 931; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 932; ALL-NEXT: retq 933 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 3, i32 2, i32 1, i32 0> 934 ret <8 x float> %shuffle 935} 936 937define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) { 938; ALL-LABEL: shuffle_v8f32_80u1c4u5: 939; ALL: # %bb.0: 940; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 941; ALL-NEXT: retq 942 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5> 943 ret <8 x float> %shuffle 944} 945 946define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) { 947; ALL-LABEL: shuffle_v8f32_a2u3e6f7: 948; ALL: # %bb.0: 949; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7] 950; ALL-NEXT: retq 951 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7> 952 ret <8 x float> %shuffle 953} 954 955define <8 x float> @shuffle_v8f32_084c195d(<8 x float> %a, <8 x float> %b) { 956; AVX1-LABEL: shuffle_v8f32_084c195d: 957; AVX1: # %bb.0: 958; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 959; AVX1-NEXT: vpermilps {{.*#+}} ymm2 = ymm2[0,1,2,0,4,5,6,4] 960; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,2,1,4,4,6,5] 961; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm2[2,3,4,5],ymm1[6,7] 962; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 963; AVX1-NEXT: vpermilps {{.*#+}} ymm2 = ymm2[1,1,0,3,5,5,4,7] 964; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,5,7] 965; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5],ymm0[6,7] 966; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 967; AVX1-NEXT: retq 968; 969; AVX2-LABEL: shuffle_v8f32_084c195d: 970; AVX2: # %bb.0: 971; AVX2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 972; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 973; AVX2-NEXT: retq 974; 975; AVX512VL-SLOW-LABEL: shuffle_v8f32_084c195d: 976; AVX512VL-SLOW: # %bb.0: 977; AVX512VL-SLOW-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 978; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 979; AVX512VL-SLOW-NEXT: retq 980; 981; AVX512VL-FAST-LABEL: shuffle_v8f32_084c195d: 982; AVX512VL-FAST: # %bb.0: 983; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [0,8,4,12,1,9,5,13] 984; AVX512VL-FAST-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0 985; AVX512VL-FAST-NEXT: retq 986 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 4, i32 12, i32 1, i32 9, i32 5, i32 13> 987 ret <8 x float> %shuffle 988} 989 990define <8 x float> @shuffle_v8f32_01452367d(<8 x float> %a) { 991; AVX1-LABEL: shuffle_v8f32_01452367d: 992; AVX1: # %bb.0: 993; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 994; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,3,2] 995; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] 996; AVX1-NEXT: retq 997; 998; AVX2OR512VL-LABEL: shuffle_v8f32_01452367d: 999; AVX2OR512VL: # %bb.0: 1000; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 1001; AVX2OR512VL-NEXT: retq 1002 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 2, i32 3, i32 6, i32 7> 1003 ret <8 x float> %shuffle 1004} 1005 1006define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) { 1007; ALL-LABEL: shuffle_v8f32_uuuu1111: 1008; ALL: # %bb.0: 1009; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1010; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1011; ALL-NEXT: retq 1012 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1> 1013 ret <8 x float> %shuffle 1014} 1015 1016define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) { 1017; AVX1-LABEL: shuffle_v8f32_44444444: 1018; AVX1: # %bb.0: 1019; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 1020; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 1021; AVX1-NEXT: retq 1022; 1023; AVX2OR512VL-LABEL: shuffle_v8f32_44444444: 1024; AVX2OR512VL: # %bb.0: 1025; AVX2OR512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 1026; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0 1027; AVX2OR512VL-NEXT: retq 1028 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 1029 ret <8 x float> %shuffle 1030} 1031 1032define <8 x float> @shuffle_v8f32_1188uuuu(<8 x float> %a, <8 x float> %b) { 1033; ALL-LABEL: shuffle_v8f32_1188uuuu: 1034; ALL: # %bb.0: 1035; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0] 1036; ALL-NEXT: retq 1037 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 1038 ret <8 x float> %shuffle 1039} 1040 1041define <8 x float> @shuffle_v8f32_uuuu3210(<8 x float> %a, <8 x float> %b) { 1042; ALL-LABEL: shuffle_v8f32_uuuu3210: 1043; ALL: # %bb.0: 1044; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1045; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1046; ALL-NEXT: retq 1047 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 2, i32 1, i32 0> 1048 ret <8 x float> %shuffle 1049} 1050 1051define <8 x float> @shuffle_v8f32_uuuu1188(<8 x float> %a, <8 x float> %b) { 1052; ALL-LABEL: shuffle_v8f32_uuuu1188: 1053; ALL: # %bb.0: 1054; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0] 1055; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1056; ALL-NEXT: retq 1057 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 8, i32 8> 1058 ret <8 x float> %shuffle 1059} 1060 1061define <8 x float> @shuffle_v8f32_1111uuuu(<8 x float> %a, <8 x float> %b) { 1062; ALL-LABEL: shuffle_v8f32_1111uuuu: 1063; ALL: # %bb.0: 1064; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1065; ALL-NEXT: retq 1066 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef> 1067 ret <8 x float> %shuffle 1068} 1069 1070define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) { 1071; ALL-LABEL: shuffle_v8f32_5555uuuu: 1072; ALL: # %bb.0: 1073; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0 1074; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1075; ALL-NEXT: retq 1076 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef> 1077 ret <8 x float> %shuffle 1078} 1079 1080define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) { 1081; AVX1-LABEL: shuffle_v8i32_00000000: 1082; AVX1: # %bb.0: 1083; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1084; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1085; AVX1-NEXT: retq 1086; 1087; AVX2OR512VL-LABEL: shuffle_v8i32_00000000: 1088; AVX2OR512VL: # %bb.0: 1089; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0 1090; AVX2OR512VL-NEXT: retq 1091 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1092 ret <8 x i32> %shuffle 1093} 1094 1095define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) { 1096; AVX1-LABEL: shuffle_v8i32_00000010: 1097; AVX1: # %bb.0: 1098; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 1099; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 1100; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1101; AVX1-NEXT: retq 1102; 1103; AVX2OR512VL-LABEL: shuffle_v8i32_00000010: 1104; AVX2OR512VL: # %bb.0: 1105; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 1106; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 1107; AVX2OR512VL-NEXT: retq 1108 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 1109 ret <8 x i32> %shuffle 1110} 1111 1112define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) { 1113; AVX1-LABEL: shuffle_v8i32_00000200: 1114; AVX1: # %bb.0: 1115; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 1116; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0] 1117; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1118; AVX1-NEXT: retq 1119; 1120; AVX2OR512VL-LABEL: shuffle_v8i32_00000200: 1121; AVX2OR512VL: # %bb.0: 1122; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,2] 1123; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0] 1124; AVX2OR512VL-NEXT: retq 1125 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 1126 ret <8 x i32> %shuffle 1127} 1128 1129define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) { 1130; AVX1-LABEL: shuffle_v8i32_00003000: 1131; AVX1: # %bb.0: 1132; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 1133; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0] 1134; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1135; AVX1-NEXT: retq 1136; 1137; AVX2OR512VL-LABEL: shuffle_v8i32_00003000: 1138; AVX2OR512VL: # %bb.0: 1139; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,3,0] 1140; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0] 1141; AVX2OR512VL-NEXT: retq 1142 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 1143 ret <8 x i32> %shuffle 1144} 1145 1146define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) { 1147; AVX1-LABEL: shuffle_v8i32_00040000: 1148; AVX1: # %bb.0: 1149; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3] 1150; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 1151; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 1152; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 1153; AVX1-NEXT: retq 1154; 1155; AVX2OR512VL-LABEL: shuffle_v8i32_00040000: 1156; AVX2OR512VL: # %bb.0: 1157; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] 1158; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1159; AVX2OR512VL-NEXT: retq 1160 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 1161 ret <8 x i32> %shuffle 1162} 1163 1164define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) { 1165; AVX1-LABEL: shuffle_v8i32_00500000: 1166; AVX1: # %bb.0: 1167; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 1168; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 1169; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4] 1170; AVX1-NEXT: retq 1171; 1172; AVX2OR512VL-LABEL: shuffle_v8i32_00500000: 1173; AVX2OR512VL: # %bb.0: 1174; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] 1175; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1176; AVX2OR512VL-NEXT: retq 1177 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 1178 ret <8 x i32> %shuffle 1179} 1180 1181define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) { 1182; AVX1-LABEL: shuffle_v8i32_06000000: 1183; AVX1: # %bb.0: 1184; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 1185; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 1186; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] 1187; AVX1-NEXT: retq 1188; 1189; AVX2OR512VL-LABEL: shuffle_v8i32_06000000: 1190; AVX2OR512VL: # %bb.0: 1191; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] 1192; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1193; AVX2OR512VL-NEXT: retq 1194 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1195 ret <8 x i32> %shuffle 1196} 1197 1198define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) { 1199; AVX1-LABEL: shuffle_v8i32_70000000: 1200; AVX1: # %bb.0: 1201; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 1202; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 1203; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] 1204; AVX1-NEXT: retq 1205; 1206; AVX2OR512VL-LABEL: shuffle_v8i32_70000000: 1207; AVX2OR512VL: # %bb.0: 1208; AVX2OR512VL-NEXT: movl $7, %eax 1209; AVX2OR512VL-NEXT: vmovd %eax, %xmm1 1210; AVX2OR512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 1211; AVX2OR512VL-NEXT: retq 1212 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1213 ret <8 x i32> %shuffle 1214} 1215 1216define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) { 1217; AVX1-LABEL: shuffle_v8i32_01014545: 1218; AVX1: # %bb.0: 1219; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 1220; AVX1-NEXT: retq 1221; 1222; AVX2OR512VL-LABEL: shuffle_v8i32_01014545: 1223; AVX2OR512VL: # %bb.0: 1224; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 1225; AVX2OR512VL-NEXT: retq 1226 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 1227 ret <8 x i32> %shuffle 1228} 1229 1230define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) { 1231; AVX1-LABEL: shuffle_v8i32_00112233: 1232; AVX1: # %bb.0: 1233; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1] 1234; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 1235; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1236; AVX1-NEXT: retq 1237; 1238; AVX2OR512VL-LABEL: shuffle_v8i32_00112233: 1239; AVX2OR512VL: # %bb.0: 1240; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] 1241; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1242; AVX2OR512VL-NEXT: retq 1243 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 1244 ret <8 x i32> %shuffle 1245} 1246 1247define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) { 1248; AVX1-LABEL: shuffle_v8i32_00001111: 1249; AVX1: # %bb.0: 1250; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 1251; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1252; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1253; AVX1-NEXT: retq 1254; 1255; AVX2OR512VL-LABEL: shuffle_v8i32_00001111: 1256; AVX2OR512VL: # %bb.0: 1257; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1] 1258; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1] 1259; AVX2OR512VL-NEXT: retq 1260 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 1261 ret <8 x i32> %shuffle 1262} 1263 1264define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) { 1265; ALL-LABEL: shuffle_v8i32_81a3c5e7: 1266; ALL: # %bb.0: 1267; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 1268; ALL-NEXT: retq 1269 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 1270 ret <8 x i32> %shuffle 1271} 1272 1273define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) { 1274; AVX1-LABEL: shuffle_v8i32_08080808: 1275; AVX1: # %bb.0: 1276; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 1277; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3] 1278; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1279; AVX1-NEXT: retq 1280; 1281; AVX2OR512VL-LABEL: shuffle_v8i32_08080808: 1282; AVX2OR512VL: # %bb.0: 1283; AVX2OR512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1284; AVX2OR512VL-NEXT: vbroadcastsd %xmm0, %ymm0 1285; AVX2OR512VL-NEXT: retq 1286 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8> 1287 ret <8 x i32> %shuffle 1288} 1289 1290define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) { 1291; AVX1-LABEL: shuffle_v8i32_08084c4c: 1292; AVX1: # %bb.0: 1293; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 1294; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1295; AVX1-NEXT: retq 1296; 1297; AVX2OR512VL-LABEL: shuffle_v8i32_08084c4c: 1298; AVX2OR512VL: # %bb.0: 1299; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4] 1300; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 1301; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1302; AVX2OR512VL-NEXT: retq 1303 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12> 1304 ret <8 x i32> %shuffle 1305} 1306 1307define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) { 1308; ALL-LABEL: shuffle_v8i32_8823cc67: 1309; ALL: # %bb.0: 1310; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7] 1311; ALL-NEXT: retq 1312 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7> 1313 ret <8 x i32> %shuffle 1314} 1315 1316define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) { 1317; ALL-LABEL: shuffle_v8i32_9832dc76: 1318; ALL: # %bb.0: 1319; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6] 1320; ALL-NEXT: retq 1321 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6> 1322 ret <8 x i32> %shuffle 1323} 1324 1325define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) { 1326; ALL-LABEL: shuffle_v8i32_9810dc54: 1327; ALL: # %bb.0: 1328; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4] 1329; ALL-NEXT: retq 1330 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4> 1331 ret <8 x i32> %shuffle 1332} 1333 1334define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) { 1335; ALL-LABEL: shuffle_v8i32_08194c5d: 1336; ALL: # %bb.0: 1337; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1338; ALL-NEXT: retq 1339 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 1340 ret <8 x i32> %shuffle 1341} 1342 1343define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) { 1344; ALL-LABEL: shuffle_v8i32_2a3b6e7f: 1345; ALL: # %bb.0: 1346; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1347; ALL-NEXT: retq 1348 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1349 ret <8 x i32> %shuffle 1350} 1351 1352define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) { 1353; AVX1OR2-LABEL: shuffle_v8i32_08192a3b: 1354; AVX1OR2: # %bb.0: 1355; AVX1OR2-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1356; AVX1OR2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1357; AVX1OR2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1358; AVX1OR2-NEXT: retq 1359; 1360; AVX512VL-LABEL: shuffle_v8i32_08192a3b: 1361; AVX512VL: # %bb.0: 1362; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1363; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [0,8,2,9,4,10,6,11] 1364; AVX512VL-NEXT: vpermi2d %ymm1, %ymm2, %ymm0 1365; AVX512VL-NEXT: retq 1366 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 1367 ret <8 x i32> %shuffle 1368} 1369 1370define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) { 1371; AVX1-LABEL: shuffle_v8i32_08991abb: 1372; AVX1: # %bb.0: 1373; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0] 1374; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1] 1375; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1376; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] 1377; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1378; AVX1-NEXT: retq 1379; 1380; AVX2-LABEL: shuffle_v8i32_08991abb: 1381; AVX2: # %bb.0: 1382; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3> 1383; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1384; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1385; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3] 1386; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1387; AVX2-NEXT: retq 1388; 1389; AVX512VL-LABEL: shuffle_v8i32_08991abb: 1390; AVX512VL: # %bb.0: 1391; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero 1392; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3] 1393; AVX512VL-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 1394; AVX512VL-NEXT: retq 1395 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11> 1396 ret <8 x i32> %shuffle 1397} 1398 1399define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) { 1400; AVX1-LABEL: shuffle_v8i32_091b2d3f: 1401; AVX1: # %bb.0: 1402; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 1403; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] 1404; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1405; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1406; AVX1-NEXT: retq 1407; 1408; AVX2OR512VL-LABEL: shuffle_v8i32_091b2d3f: 1409; AVX2OR512VL: # %bb.0: 1410; AVX2OR512VL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1411; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1412; AVX2OR512VL-NEXT: retq 1413 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 1414 ret <8 x i32> %shuffle 1415} 1416 1417define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) { 1418; AVX1-LABEL: shuffle_v8i32_09ab1def: 1419; AVX1: # %bb.0: 1420; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[1,1,3,3] 1421; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1422; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1423; AVX1-NEXT: retq 1424; 1425; AVX2-LABEL: shuffle_v8i32_09ab1def: 1426; AVX2: # %bb.0: 1427; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1428; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3] 1429; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1430; AVX2-NEXT: retq 1431; 1432; AVX512VL-SLOW-LABEL: shuffle_v8i32_09ab1def: 1433; AVX512VL-SLOW: # %bb.0: 1434; AVX512VL-SLOW-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1435; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3] 1436; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1437; AVX512VL-SLOW-NEXT: retq 1438; 1439; AVX512VL-FAST-LABEL: shuffle_v8i32_09ab1def: 1440; AVX512VL-FAST: # %bb.0: 1441; AVX512VL-FAST-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero 1442; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm0 = [8,1,2,3,10,5,6,7] 1443; AVX512VL-FAST-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 1444; AVX512VL-FAST-NEXT: retq 1445 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 1446 ret <8 x i32> %shuffle 1447} 1448 1449define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) { 1450; ALL-LABEL: shuffle_v8i32_00014445: 1451; ALL: # %bb.0: 1452; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 1453; ALL-NEXT: retq 1454 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5> 1455 ret <8 x i32> %shuffle 1456} 1457 1458define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) { 1459; ALL-LABEL: shuffle_v8i32_00204464: 1460; ALL: # %bb.0: 1461; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 1462; ALL-NEXT: retq 1463 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4> 1464 ret <8 x i32> %shuffle 1465} 1466 1467define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) { 1468; ALL-LABEL: shuffle_v8i32_03004744: 1469; ALL: # %bb.0: 1470; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 1471; ALL-NEXT: retq 1472 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4> 1473 ret <8 x i32> %shuffle 1474} 1475 1476define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) { 1477; ALL-LABEL: shuffle_v8i32_10005444: 1478; ALL: # %bb.0: 1479; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 1480; ALL-NEXT: retq 1481 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4> 1482 ret <8 x i32> %shuffle 1483} 1484 1485define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) { 1486; ALL-LABEL: shuffle_v8i32_22006644: 1487; ALL: # %bb.0: 1488; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 1489; ALL-NEXT: retq 1490 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4> 1491 ret <8 x i32> %shuffle 1492} 1493 1494define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) { 1495; ALL-LABEL: shuffle_v8i32_33307774: 1496; ALL: # %bb.0: 1497; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 1498; ALL-NEXT: retq 1499 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4> 1500 ret <8 x i32> %shuffle 1501} 1502 1503define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) { 1504; ALL-LABEL: shuffle_v8i32_32107654: 1505; ALL: # %bb.0: 1506; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1507; ALL-NEXT: retq 1508 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1509 ret <8 x i32> %shuffle 1510} 1511 1512define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) { 1513; ALL-LABEL: shuffle_v8i32_00234467: 1514; ALL: # %bb.0: 1515; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 1516; ALL-NEXT: retq 1517 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7> 1518 ret <8 x i32> %shuffle 1519} 1520 1521define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) { 1522; AVX1-LABEL: shuffle_v8i32_00224466: 1523; AVX1: # %bb.0: 1524; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1525; AVX1-NEXT: retq 1526; 1527; AVX2OR512VL-LABEL: shuffle_v8i32_00224466: 1528; AVX2OR512VL: # %bb.0: 1529; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1530; AVX2OR512VL-NEXT: retq 1531 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 1532 ret <8 x i32> %shuffle 1533} 1534 1535define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) { 1536; ALL-LABEL: shuffle_v8i32_10325476: 1537; ALL: # %bb.0: 1538; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1539; ALL-NEXT: retq 1540 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 1541 ret <8 x i32> %shuffle 1542} 1543 1544define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) { 1545; AVX1-LABEL: shuffle_v8i32_11335577: 1546; AVX1: # %bb.0: 1547; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1548; AVX1-NEXT: retq 1549; 1550; AVX2OR512VL-LABEL: shuffle_v8i32_11335577: 1551; AVX2OR512VL: # %bb.0: 1552; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1553; AVX2OR512VL-NEXT: retq 1554 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 1555 ret <8 x i32> %shuffle 1556} 1557 1558define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) { 1559; ALL-LABEL: shuffle_v8i32_10235467: 1560; ALL: # %bb.0: 1561; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 1562; ALL-NEXT: retq 1563 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 1564 ret <8 x i32> %shuffle 1565} 1566 1567define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) { 1568; ALL-LABEL: shuffle_v8i32_10225466: 1569; ALL: # %bb.0: 1570; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 1571; ALL-NEXT: retq 1572 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6> 1573 ret <8 x i32> %shuffle 1574} 1575 1576define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) { 1577; AVX1-LABEL: shuffle_v8i32_00015444: 1578; AVX1: # %bb.0: 1579; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4] 1580; AVX1-NEXT: retq 1581; 1582; AVX2OR512VL-LABEL: shuffle_v8i32_00015444: 1583; AVX2OR512VL: # %bb.0: 1584; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4] 1585; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1586; AVX2OR512VL-NEXT: retq 1587 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> 1588 ret <8 x i32> %shuffle 1589} 1590 1591define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) { 1592; AVX1-LABEL: shuffle_v8i32_00204644: 1593; AVX1: # %bb.0: 1594; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4] 1595; AVX1-NEXT: retq 1596; 1597; AVX2OR512VL-LABEL: shuffle_v8i32_00204644: 1598; AVX2OR512VL: # %bb.0: 1599; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4] 1600; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1601; AVX2OR512VL-NEXT: retq 1602 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> 1603 ret <8 x i32> %shuffle 1604} 1605 1606define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) { 1607; AVX1-LABEL: shuffle_v8i32_03004474: 1608; AVX1: # %bb.0: 1609; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4] 1610; AVX1-NEXT: retq 1611; 1612; AVX2OR512VL-LABEL: shuffle_v8i32_03004474: 1613; AVX2OR512VL: # %bb.0: 1614; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4] 1615; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1616; AVX2OR512VL-NEXT: retq 1617 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> 1618 ret <8 x i32> %shuffle 1619} 1620 1621define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) { 1622; AVX1-LABEL: shuffle_v8i32_10004444: 1623; AVX1: # %bb.0: 1624; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4] 1625; AVX1-NEXT: retq 1626; 1627; AVX2OR512VL-LABEL: shuffle_v8i32_10004444: 1628; AVX2OR512VL: # %bb.0: 1629; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4] 1630; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1631; AVX2OR512VL-NEXT: retq 1632 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 1633 ret <8 x i32> %shuffle 1634} 1635 1636define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) { 1637; AVX1-LABEL: shuffle_v8i32_22006446: 1638; AVX1: # %bb.0: 1639; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6] 1640; AVX1-NEXT: retq 1641; 1642; AVX2OR512VL-LABEL: shuffle_v8i32_22006446: 1643; AVX2OR512VL: # %bb.0: 1644; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6] 1645; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1646; AVX2OR512VL-NEXT: retq 1647 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> 1648 ret <8 x i32> %shuffle 1649} 1650 1651define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) { 1652; AVX1-LABEL: shuffle_v8i32_33307474: 1653; AVX1: # %bb.0: 1654; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4] 1655; AVX1-NEXT: retq 1656; 1657; AVX2OR512VL-LABEL: shuffle_v8i32_33307474: 1658; AVX2OR512VL: # %bb.0: 1659; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4] 1660; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1661; AVX2OR512VL-NEXT: retq 1662 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> 1663 ret <8 x i32> %shuffle 1664} 1665 1666define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) { 1667; AVX1-LABEL: shuffle_v8i32_32104567: 1668; AVX1: # %bb.0: 1669; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7] 1670; AVX1-NEXT: retq 1671; 1672; AVX2OR512VL-LABEL: shuffle_v8i32_32104567: 1673; AVX2OR512VL: # %bb.0: 1674; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7] 1675; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1676; AVX2OR512VL-NEXT: retq 1677 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 1678 ret <8 x i32> %shuffle 1679} 1680 1681define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) { 1682; AVX1-LABEL: shuffle_v8i32_00236744: 1683; AVX1: # %bb.0: 1684; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4] 1685; AVX1-NEXT: retq 1686; 1687; AVX2OR512VL-LABEL: shuffle_v8i32_00236744: 1688; AVX2OR512VL: # %bb.0: 1689; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4] 1690; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1691; AVX2OR512VL-NEXT: retq 1692 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> 1693 ret <8 x i32> %shuffle 1694} 1695 1696define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) { 1697; AVX1-LABEL: shuffle_v8i32_00226644: 1698; AVX1: # %bb.0: 1699; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4] 1700; AVX1-NEXT: retq 1701; 1702; AVX2OR512VL-LABEL: shuffle_v8i32_00226644: 1703; AVX2OR512VL: # %bb.0: 1704; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4] 1705; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1706; AVX2OR512VL-NEXT: retq 1707 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> 1708 ret <8 x i32> %shuffle 1709} 1710 1711define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) { 1712; AVX1-LABEL: shuffle_v8i32_10324567: 1713; AVX1: # %bb.0: 1714; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7] 1715; AVX1-NEXT: retq 1716; 1717; AVX2OR512VL-LABEL: shuffle_v8i32_10324567: 1718; AVX2OR512VL: # %bb.0: 1719; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7] 1720; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1721; AVX2OR512VL-NEXT: retq 1722 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 1723 ret <8 x i32> %shuffle 1724} 1725 1726define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) { 1727; AVX1-LABEL: shuffle_v8i32_11334567: 1728; AVX1: # %bb.0: 1729; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7] 1730; AVX1-NEXT: retq 1731; 1732; AVX2OR512VL-LABEL: shuffle_v8i32_11334567: 1733; AVX2OR512VL: # %bb.0: 1734; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7] 1735; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1736; AVX2OR512VL-NEXT: retq 1737 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> 1738 ret <8 x i32> %shuffle 1739} 1740 1741define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) { 1742; AVX1-LABEL: shuffle_v8i32_01235467: 1743; AVX1: # %bb.0: 1744; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7] 1745; AVX1-NEXT: retq 1746; 1747; AVX2OR512VL-LABEL: shuffle_v8i32_01235467: 1748; AVX2OR512VL: # %bb.0: 1749; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7] 1750; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1751; AVX2OR512VL-NEXT: retq 1752 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 1753 ret <8 x i32> %shuffle 1754} 1755 1756define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) { 1757; AVX1-LABEL: shuffle_v8i32_01235466: 1758; AVX1: # %bb.0: 1759; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6] 1760; AVX1-NEXT: retq 1761; 1762; AVX2OR512VL-LABEL: shuffle_v8i32_01235466: 1763; AVX2OR512VL: # %bb.0: 1764; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6] 1765; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1766; AVX2OR512VL-NEXT: retq 1767 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> 1768 ret <8 x i32> %shuffle 1769} 1770 1771define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) { 1772; AVX1-LABEL: shuffle_v8i32_002u6u44: 1773; AVX1: # %bb.0: 1774; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4] 1775; AVX1-NEXT: retq 1776; 1777; AVX2OR512VL-LABEL: shuffle_v8i32_002u6u44: 1778; AVX2OR512VL: # %bb.0: 1779; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4> 1780; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1781; AVX2OR512VL-NEXT: retq 1782 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> 1783 ret <8 x i32> %shuffle 1784} 1785 1786define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) { 1787; AVX1-LABEL: shuffle_v8i32_00uu66uu: 1788; AVX1: # %bb.0: 1789; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u] 1790; AVX1-NEXT: retq 1791; 1792; AVX2OR512VL-LABEL: shuffle_v8i32_00uu66uu: 1793; AVX2OR512VL: # %bb.0: 1794; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u> 1795; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1796; AVX2OR512VL-NEXT: retq 1797 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> 1798 ret <8 x i32> %shuffle 1799} 1800 1801define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) { 1802; AVX1-LABEL: shuffle_v8i32_103245uu: 1803; AVX1: # %bb.0: 1804; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u] 1805; AVX1-NEXT: retq 1806; 1807; AVX2OR512VL-LABEL: shuffle_v8i32_103245uu: 1808; AVX2OR512VL: # %bb.0: 1809; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u> 1810; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1811; AVX2OR512VL-NEXT: retq 1812 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> 1813 ret <8 x i32> %shuffle 1814} 1815 1816define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) { 1817; AVX1-LABEL: shuffle_v8i32_1133uu67: 1818; AVX1: # %bb.0: 1819; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7] 1820; AVX1-NEXT: retq 1821; 1822; AVX2OR512VL-LABEL: shuffle_v8i32_1133uu67: 1823; AVX2OR512VL: # %bb.0: 1824; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7> 1825; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1826; AVX2OR512VL-NEXT: retq 1827 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> 1828 ret <8 x i32> %shuffle 1829} 1830 1831define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) { 1832; AVX1-LABEL: shuffle_v8i32_0uu354uu: 1833; AVX1: # %bb.0: 1834; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u] 1835; AVX1-NEXT: retq 1836; 1837; AVX2OR512VL-LABEL: shuffle_v8i32_0uu354uu: 1838; AVX2OR512VL: # %bb.0: 1839; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u> 1840; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1841; AVX2OR512VL-NEXT: retq 1842 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> 1843 ret <8 x i32> %shuffle 1844} 1845 1846define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) { 1847; AVX1-LABEL: shuffle_v8i32_uuu3uu66: 1848; AVX1: # %bb.0: 1849; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6] 1850; AVX1-NEXT: retq 1851; 1852; AVX2OR512VL-LABEL: shuffle_v8i32_uuu3uu66: 1853; AVX2OR512VL: # %bb.0: 1854; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6> 1855; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1856; AVX2OR512VL-NEXT: retq 1857 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> 1858 ret <8 x i32> %shuffle 1859} 1860 1861define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) { 1862; AVX1-LABEL: shuffle_v8i32_6caa87e5: 1863; AVX1: # %bb.0: 1864; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1865; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 1866; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 1867; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6] 1868; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1869; AVX1-NEXT: retq 1870; 1871; AVX2-SLOW-LABEL: shuffle_v8i32_6caa87e5: 1872; AVX2-SLOW: # %bb.0: 1873; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,1,3,2] 1874; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,2,2,4,4,6,6] 1875; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,1,0,3] 1876; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1877; AVX2-SLOW-NEXT: retq 1878; 1879; AVX2-FAST-LABEL: shuffle_v8i32_6caa87e5: 1880; AVX2-FAST: # %bb.0: 1881; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,2,2,0,0,6,6] 1882; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1 1883; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,1,3,2] 1884; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1885; AVX2-FAST-NEXT: retq 1886; 1887; AVX512VL-LABEL: shuffle_v8i32_6caa87e5: 1888; AVX512VL: # %bb.0: 1889; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [14,4,2,2,0,15,6,13] 1890; AVX512VL-NEXT: vpermi2d %ymm0, %ymm1, %ymm2 1891; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 1892; AVX512VL-NEXT: retq 1893 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5> 1894 ret <8 x i32> %shuffle 1895} 1896 1897define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) { 1898; AVX1-LABEL: shuffle_v8i32_32103210: 1899; AVX1: # %bb.0: 1900; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1901; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1902; AVX1-NEXT: retq 1903; 1904; AVX2OR512VL-LABEL: shuffle_v8i32_32103210: 1905; AVX2OR512VL: # %bb.0: 1906; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1907; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1] 1908; AVX2OR512VL-NEXT: retq 1909 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0> 1910 ret <8 x i32> %shuffle 1911} 1912 1913define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) { 1914; AVX1-LABEL: shuffle_v8i32_76547654: 1915; AVX1: # %bb.0: 1916; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1917; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 1918; AVX1-NEXT: retq 1919; 1920; AVX2-SLOW-LABEL: shuffle_v8i32_76547654: 1921; AVX2-SLOW: # %bb.0: 1922; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1923; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3] 1924; AVX2-SLOW-NEXT: retq 1925; 1926; AVX2-FAST-LABEL: shuffle_v8i32_76547654: 1927; AVX2-FAST: # %bb.0: 1928; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 1929; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 1930; AVX2-FAST-NEXT: retq 1931; 1932; AVX512VL-SLOW-LABEL: shuffle_v8i32_76547654: 1933; AVX512VL-SLOW: # %bb.0: 1934; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1935; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3] 1936; AVX512VL-SLOW-NEXT: retq 1937; 1938; AVX512VL-FAST-LABEL: shuffle_v8i32_76547654: 1939; AVX512VL-FAST: # %bb.0: 1940; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 1941; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 1942; AVX512VL-FAST-NEXT: retq 1943 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 1944 ret <8 x i32> %shuffle 1945} 1946 1947define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) { 1948; AVX1-LABEL: shuffle_v8i32_76543210: 1949; AVX1: # %bb.0: 1950; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1951; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 1952; AVX1-NEXT: retq 1953; 1954; AVX2-SLOW-LABEL: shuffle_v8i32_76543210: 1955; AVX2-SLOW: # %bb.0: 1956; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1957; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 1958; AVX2-SLOW-NEXT: retq 1959; 1960; AVX2-FAST-LABEL: shuffle_v8i32_76543210: 1961; AVX2-FAST: # %bb.0: 1962; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 1963; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 1964; AVX2-FAST-NEXT: retq 1965; 1966; AVX512VL-SLOW-LABEL: shuffle_v8i32_76543210: 1967; AVX512VL-SLOW: # %bb.0: 1968; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1969; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 1970; AVX512VL-SLOW-NEXT: retq 1971; 1972; AVX512VL-FAST-LABEL: shuffle_v8i32_76543210: 1973; AVX512VL-FAST: # %bb.0: 1974; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 1975; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 1976; AVX512VL-FAST-NEXT: retq 1977 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1978 ret <8 x i32> %shuffle 1979} 1980 1981define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) { 1982; ALL-LABEL: shuffle_v8i32_3210ba98: 1983; ALL: # %bb.0: 1984; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1985; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1986; ALL-NEXT: retq 1987 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8> 1988 ret <8 x i32> %shuffle 1989} 1990 1991define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) { 1992; ALL-LABEL: shuffle_v8i32_3210fedc: 1993; ALL: # %bb.0: 1994; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1995; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1996; ALL-NEXT: retq 1997 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12> 1998 ret <8 x i32> %shuffle 1999} 2000 2001define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) { 2002; AVX1OR2-LABEL: shuffle_v8i32_7654fedc: 2003; AVX1OR2: # %bb.0: 2004; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2005; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2006; AVX1OR2-NEXT: retq 2007; 2008; AVX512VL-SLOW-LABEL: shuffle_v8i32_7654fedc: 2009; AVX512VL-SLOW: # %bb.0: 2010; AVX512VL-SLOW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2011; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2012; AVX512VL-SLOW-NEXT: retq 2013; 2014; AVX512VL-FAST-LABEL: shuffle_v8i32_7654fedc: 2015; AVX512VL-FAST: # %bb.0: 2016; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12] 2017; AVX512VL-FAST-NEXT: vpermt2d %ymm1, %ymm2, %ymm0 2018; AVX512VL-FAST-NEXT: retq 2019 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 2020 ret <8 x i32> %shuffle 2021} 2022 2023define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) { 2024; AVX1OR2-LABEL: shuffle_v8i32_fedc7654: 2025; AVX1OR2: # %bb.0: 2026; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 2027; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2028; AVX1OR2-NEXT: retq 2029; 2030; AVX512VL-SLOW-LABEL: shuffle_v8i32_fedc7654: 2031; AVX512VL-SLOW: # %bb.0: 2032; AVX512VL-SLOW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 2033; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2034; AVX512VL-SLOW-NEXT: retq 2035; 2036; AVX512VL-FAST-LABEL: shuffle_v8i32_fedc7654: 2037; AVX512VL-FAST: # %bb.0: 2038; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12] 2039; AVX512VL-FAST-NEXT: vpermi2d %ymm0, %ymm1, %ymm2 2040; AVX512VL-FAST-NEXT: vmovdqa %ymm2, %ymm0 2041; AVX512VL-FAST-NEXT: retq 2042 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> 2043 ret <8 x i32> %shuffle 2044} 2045 2046define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) { 2047; ALL-LABEL: shuffle_v8i32_ba987654: 2048; ALL: # %bb.0: 2049; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 2050; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2051; ALL-NEXT: retq 2052 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 2053 ret <8 x i32> %shuffle 2054} 2055 2056define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) { 2057; ALL-LABEL: shuffle_v8i32_ba983210: 2058; ALL: # %bb.0: 2059; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 2060; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2061; ALL-NEXT: retq 2062 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 2063 ret <8 x i32> %shuffle 2064} 2065 2066define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) { 2067; AVX1-LABEL: shuffle_v8i32_zuu8zuuc: 2068; AVX1: # %bb.0: 2069; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 2070; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4] 2071; AVX1-NEXT: retq 2072; 2073; AVX2OR512VL-LABEL: shuffle_v8i32_zuu8zuuc: 2074; AVX2OR512VL: # %bb.0: 2075; AVX2OR512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19] 2076; AVX2OR512VL-NEXT: retq 2077 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12> 2078 ret <8 x i32> %shuffle 2079} 2080 2081define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) { 2082; AVX1-LABEL: shuffle_v8i32_9ubzdefz: 2083; AVX1: # %bb.0: 2084; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 2085; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4] 2086; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 2087; AVX1-NEXT: retq 2088; 2089; AVX2OR512VL-LABEL: shuffle_v8i32_9ubzdefz: 2090; AVX2OR512VL: # %bb.0: 2091; AVX2OR512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero 2092; AVX2OR512VL-NEXT: retq 2093 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0> 2094 ret <8 x i32> %shuffle 2095} 2096 2097define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) { 2098; ALL-LABEL: shuffle_v8i32_80u1b4uu: 2099; ALL: # %bb.0: 2100; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 2101; ALL-NEXT: retq 2102 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef> 2103 ret <8 x i32> %shuffle 2104} 2105 2106define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) { 2107; ALL-LABEL: shuffle_v8i32_uuuu1111: 2108; ALL: # %bb.0: 2109; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 2110; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2111; ALL-NEXT: retq 2112 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1> 2113 ret <8 x i32> %shuffle 2114} 2115 2116define <8 x i32> @shuffle_v8i32_2222uuuu(<8 x i32> %a, <8 x i32> %b) { 2117; ALL-LABEL: shuffle_v8i32_2222uuuu: 2118; ALL: # %bb.0: 2119; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,2,2] 2120; ALL-NEXT: retq 2121 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef> 2122 ret <8 x i32> %shuffle 2123} 2124 2125define <8 x i32> @shuffle_v8i32_2A3Buuuu(<8 x i32> %a, <8 x i32> %b) { 2126; ALL-LABEL: shuffle_v8i32_2A3Buuuu: 2127; ALL: # %bb.0: 2128; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2129; ALL-NEXT: retq 2130 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 undef, i32 undef, i32 undef, i32 undef> 2131 ret <8 x i32> %shuffle 2132} 2133 2134define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) { 2135; AVX1-LABEL: shuffle_v8i32_44444444: 2136; AVX1: # %bb.0: 2137; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 2138; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 2139; AVX1-NEXT: retq 2140; 2141; AVX2OR512VL-LABEL: shuffle_v8i32_44444444: 2142; AVX2OR512VL: # %bb.0: 2143; AVX2OR512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 2144; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0 2145; AVX2OR512VL-NEXT: retq 2146 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 2147 ret <8 x i32> %shuffle 2148} 2149 2150define <8 x i32> @shuffle_v8i32_44444444_bc(<8 x float> %a, <8 x float> %b) { 2151; AVX1-LABEL: shuffle_v8i32_44444444_bc: 2152; AVX1: # %bb.0: 2153; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 2154; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 2155; AVX1-NEXT: retq 2156; 2157; AVX2OR512VL-LABEL: shuffle_v8i32_44444444_bc: 2158; AVX2OR512VL: # %bb.0: 2159; AVX2OR512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 2160; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0 2161; AVX2OR512VL-NEXT: retq 2162 %tmp0 = bitcast <8 x float> %a to <8 x i32> 2163 %tmp1 = bitcast <8 x float> %b to <8 x i32> 2164 %shuffle = shufflevector <8 x i32> %tmp0, <8 x i32> %tmp1, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 2165 ret <8 x i32> %shuffle 2166} 2167 2168define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) { 2169; ALL-LABEL: shuffle_v8i32_5555uuuu: 2170; ALL: # %bb.0: 2171; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0 2172; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 2173; ALL-NEXT: retq 2174 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef> 2175 ret <8 x i32> %shuffle 2176} 2177 2178; PR32453 2179define <8 x i32> @shuffle_v8i32_uuuuuu7u(<8 x i32> %a, <8 x i32> %b) nounwind { 2180; AVX1-LABEL: shuffle_v8i32_uuuuuu7u: 2181; AVX1: # %bb.0: 2182; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 2183; AVX1-NEXT: retq 2184; 2185; AVX2OR512VL-LABEL: shuffle_v8i32_uuuuuu7u: 2186; AVX2OR512VL: # %bb.0: 2187; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,3,3,4,5,7,7] 2188; AVX2OR512VL-NEXT: retq 2189 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef> 2190 ret <8 x i32> %shuffle 2191} 2192 2193define <8 x float> @splat_mem_v8f32_2(float* %p) { 2194; ALL-LABEL: splat_mem_v8f32_2: 2195; ALL: # %bb.0: 2196; ALL-NEXT: vbroadcastss (%rdi), %ymm0 2197; ALL-NEXT: retq 2198 %1 = load float, float* %p 2199 %2 = insertelement <4 x float> undef, float %1, i32 0 2200 %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer 2201 ret <8 x float> %3 2202} 2203 2204define <8 x float> @splat_v8f32(<4 x float> %r) { 2205; AVX1-LABEL: splat_v8f32: 2206; AVX1: # %bb.0: 2207; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 2208; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2209; AVX1-NEXT: retq 2210; 2211; AVX2OR512VL-LABEL: splat_v8f32: 2212; AVX2OR512VL: # %bb.0: 2213; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0 2214; AVX2OR512VL-NEXT: retq 2215 %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer 2216 ret <8 x float> %1 2217} 2218 2219; 2220; Shuffle to logical bit shifts 2221; 2222 2223define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) { 2224; AVX1-LABEL: shuffle_v8i32_z0U2zUz6: 2225; AVX1: # %bb.0: 2226; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 2227; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 2228; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5] 2229; AVX1-NEXT: retq 2230; 2231; AVX2OR512VL-LABEL: shuffle_v8i32_z0U2zUz6: 2232; AVX2OR512VL: # %bb.0: 2233; AVX2OR512VL-NEXT: vpsllq $32, %ymm0, %ymm0 2234; AVX2OR512VL-NEXT: retq 2235 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6> 2236 ret <8 x i32> %shuffle 2237} 2238 2239define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) { 2240; AVX1-LABEL: shuffle_v8i32_1U3z5zUU: 2241; AVX1: # %bb.0: 2242; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 2243; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 2244; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 2245; AVX1-NEXT: retq 2246; 2247; AVX2OR512VL-LABEL: shuffle_v8i32_1U3z5zUU: 2248; AVX2OR512VL: # %bb.0: 2249; AVX2OR512VL-NEXT: vpsrlq $32, %ymm0, %ymm0 2250; AVX2OR512VL-NEXT: retq 2251 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef> 2252 ret <8 x i32> %shuffle 2253} 2254 2255define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) { 2256; AVX1-LABEL: shuffle_v8i32_B012F456: 2257; AVX1: # %bb.0: 2258; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4] 2259; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6] 2260; AVX1-NEXT: retq 2261; 2262; AVX2OR512VL-LABEL: shuffle_v8i32_B012F456: 2263; AVX2OR512VL: # %bb.0: 2264; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27] 2265; AVX2OR512VL-NEXT: retq 2266 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6> 2267 ret <8 x i32> %shuffle 2268} 2269 2270define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) { 2271; AVX1-LABEL: shuffle_v8i32_1238567C: 2272; AVX1: # %bb.0: 2273; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4] 2274; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 2275; AVX1-NEXT: retq 2276; 2277; AVX2OR512VL-LABEL: shuffle_v8i32_1238567C: 2278; AVX2OR512VL: # %bb.0: 2279; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19] 2280; AVX2OR512VL-NEXT: retq 2281 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12> 2282 ret <8 x i32> %shuffle 2283} 2284 2285define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) { 2286; AVX1-LABEL: shuffle_v8i32_9AB0DEF4: 2287; AVX1: # %bb.0: 2288; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4] 2289; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4] 2290; AVX1-NEXT: retq 2291; 2292; AVX2OR512VL-LABEL: shuffle_v8i32_9AB0DEF4: 2293; AVX2OR512VL: # %bb.0: 2294; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19] 2295; AVX2OR512VL-NEXT: retq 2296 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4> 2297 ret <8 x i32> %shuffle 2298} 2299 2300define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) { 2301; AVX1-LABEL: shuffle_v8i32_389A7CDE: 2302; AVX1: # %bb.0: 2303; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4] 2304; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6] 2305; AVX1-NEXT: retq 2306; 2307; AVX2OR512VL-LABEL: shuffle_v8i32_389A7CDE: 2308; AVX2OR512VL: # %bb.0: 2309; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27] 2310; AVX2OR512VL-NEXT: retq 2311 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14> 2312 ret <8 x i32> %shuffle 2313} 2314 2315define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) { 2316; ALL-LABEL: shuffle_v8i32_30127456: 2317; ALL: # %bb.0: 2318; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6] 2319; ALL-NEXT: retq 2320 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6> 2321 ret <8 x i32> %shuffle 2322} 2323 2324define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) { 2325; ALL-LABEL: shuffle_v8i32_12305674: 2326; ALL: # %bb.0: 2327; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4] 2328; ALL-NEXT: retq 2329 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4> 2330 ret <8 x i32> %shuffle 2331} 2332 2333define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2334; ALL-LABEL: concat_v2f32_1: 2335; ALL: # %bb.0: # %entry 2336; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2337; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 2338; ALL-NEXT: retq 2339entry: 2340 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2341 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2342 %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2343 %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2344 %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef> 2345 ret <8 x float> %tmp76 2346} 2347 2348define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2349; ALL-LABEL: concat_v2f32_2: 2350; ALL: # %bb.0: # %entry 2351; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2352; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 2353; ALL-NEXT: retq 2354entry: 2355 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2356 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2357 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 2358 ret <8 x float> %tmp76 2359} 2360 2361define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2362; ALL-LABEL: concat_v2f32_3: 2363; ALL: # %bb.0: # %entry 2364; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2365; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 2366; ALL-NEXT: retq 2367entry: 2368 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2369 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2370 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2371 %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 2372 ret <8 x float> %res 2373} 2374 2375define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) { 2376; ALL-LABEL: insert_mem_and_zero_v8i32: 2377; ALL: # %bb.0: 2378; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2379; ALL-NEXT: retq 2380 %a = load i32, i32* %ptr 2381 %v = insertelement <8 x i32> undef, i32 %a, i32 0 2382 %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2383 ret <8 x i32> %shuffle 2384} 2385 2386define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) { 2387; ALL-LABEL: concat_v8i32_0123CDEF: 2388; ALL: # %bb.0: 2389; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 2390; ALL-NEXT: retq 2391 %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2392 %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2393 %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2394 ret <8 x i32> %shuf 2395} 2396 2397define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) { 2398; AVX1OR2-LABEL: concat_v8i32_4567CDEF_bc: 2399; AVX1OR2: # %bb.0: 2400; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2401; AVX1OR2-NEXT: retq 2402; 2403; AVX512VL-LABEL: concat_v8i32_4567CDEF_bc: 2404; AVX512VL: # %bb.0: 2405; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2406; AVX512VL-NEXT: retq 2407 %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2408 %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15> 2409 %bc0hi = bitcast <4 x i32> %a0hi to <2 x i64> 2410 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64> 2411 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2412 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32> 2413 ret <8 x i32> %shuffle32 2414} 2415 2416define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) { 2417; ALL-LABEL: concat_v8f32_4567CDEF_bc: 2418; ALL: # %bb.0: 2419; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2420; ALL-NEXT: retq 2421 %a0 = bitcast <8 x float> %f0 to <4 x i64> 2422 %a1 = bitcast <8 x float> %f1 to <8 x i32> 2423 %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 2424 %a1hi = shufflevector <8 x i32> %a1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2425 %bc0hi = bitcast <2 x i64> %a0hi to <2 x i64> 2426 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64> 2427 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2428 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x float> 2429 ret <8 x float> %shuffle32 2430} 2431 2432define <8 x i32> @insert_dup_mem_v8i32(i32* %ptr) { 2433; ALL-LABEL: insert_dup_mem_v8i32: 2434; ALL: # %bb.0: 2435; ALL-NEXT: vbroadcastss (%rdi), %ymm0 2436; ALL-NEXT: retq 2437 %tmp = load i32, i32* %ptr, align 4 2438 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2439 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer 2440 ret <8 x i32> %tmp2 2441} 2442 2443define <8 x i32> @shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b) { 2444; AVX1-LABEL: shuffle_v8i32_12345678: 2445; AVX1: # %bb.0: 2446; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] 2447; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 2448; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4] 2449; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 2450; AVX1-NEXT: retq 2451; 2452; AVX2-LABEL: shuffle_v8i32_12345678: 2453; AVX2: # %bb.0: 2454; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] 2455; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,3,4,5,6,7,0] 2456; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 2457; AVX2-NEXT: retq 2458; 2459; AVX512VL-LABEL: shuffle_v8i32_12345678: 2460; AVX512VL: # %bb.0: 2461; AVX512VL-NEXT: valignd {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7],ymm1[0] 2462; AVX512VL-NEXT: retq 2463 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 2464 ret <8 x i32> %shuffle 2465} 2466 2467define <8 x i32> @shuffle_v8i32_12345670(<8 x i32> %a) { 2468; AVX1-LABEL: shuffle_v8i32_12345670: 2469; AVX1: # %bb.0: 2470; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 2471; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4] 2472; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 2473; AVX1-NEXT: retq 2474; 2475; AVX2-LABEL: shuffle_v8i32_12345670: 2476; AVX2: # %bb.0: 2477; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,3,4,5,6,7,0] 2478; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 2479; AVX2-NEXT: retq 2480; 2481; AVX512VL-LABEL: shuffle_v8i32_12345670: 2482; AVX512VL: # %bb.0: 2483; AVX512VL-NEXT: valignd {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,0] 2484; AVX512VL-NEXT: retq 2485 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0> 2486 ret <8 x i32> %shuffle 2487} 2488 2489define <8 x float> @add_v8f32_02468ACE_13579BDF(<8 x float> %a, <8 x float> %b) { 2490; AVX1-LABEL: add_v8f32_02468ACE_13579BDF: 2491; AVX1: # %bb.0: # %entry 2492; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2493; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm1[0,2],xmm2[0,2] 2494; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3 2495; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2496; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm0[0,2],xmm4[0,2] 2497; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7] 2498; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] 2499; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 2500; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm4[1,3] 2501; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 2502; AVX1-NEXT: vaddps %ymm0, %ymm3, %ymm0 2503; AVX1-NEXT: retq 2504; 2505; AVX2-LABEL: add_v8f32_02468ACE_13579BDF: 2506; AVX2: # %bb.0: # %entry 2507; AVX2-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 2508; AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2509; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 2510; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2511; AVX2-NEXT: vaddps %ymm0, %ymm2, %ymm0 2512; AVX2-NEXT: retq 2513; 2514; AVX512VL-SLOW-LABEL: add_v8f32_02468ACE_13579BDF: 2515; AVX512VL-SLOW: # %bb.0: # %entry 2516; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 2517; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2518; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 2519; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2520; AVX512VL-SLOW-NEXT: vaddps %ymm0, %ymm2, %ymm0 2521; AVX512VL-SLOW-NEXT: retq 2522; 2523; AVX512VL-FAST-LABEL: add_v8f32_02468ACE_13579BDF: 2524; AVX512VL-FAST: # %bb.0: # %entry 2525; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14] 2526; AVX512VL-FAST-NEXT: vpermi2ps %ymm1, %ymm0, %ymm2 2527; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm3 = [1,3,5,7,9,11,13,15] 2528; AVX512VL-FAST-NEXT: vpermi2ps %ymm1, %ymm0, %ymm3 2529; AVX512VL-FAST-NEXT: vaddps %ymm3, %ymm2, %ymm0 2530; AVX512VL-FAST-NEXT: retq 2531entry: 2532 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 2533 %shuffle1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 2534 %add = fadd <8 x float> %shuffle, %shuffle1 2535 ret <8 x float> %add 2536} 2537 2538define <8 x float> @add_v8f32_8ACE0246_9BDF1357(<8 x float> %a, <8 x float> %b) { 2539; AVX1-LABEL: add_v8f32_8ACE0246_9BDF1357: 2540; AVX1: # %bb.0: # %entry 2541; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2542; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm0[0,2],xmm2[0,2] 2543; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3 2544; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 2545; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm1[0,2],xmm4[0,2] 2546; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7] 2547; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 2548; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2549; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm4[1,3] 2550; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 2551; AVX1-NEXT: vaddps %ymm0, %ymm3, %ymm0 2552; AVX1-NEXT: retq 2553; 2554; AVX2-LABEL: add_v8f32_8ACE0246_9BDF1357: 2555; AVX2: # %bb.0: # %entry 2556; AVX2-NEXT: vshufps {{.*#+}} ymm2 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6] 2557; AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2558; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7] 2559; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2560; AVX2-NEXT: vaddps %ymm0, %ymm2, %ymm0 2561; AVX2-NEXT: retq 2562; 2563; AVX512VL-SLOW-LABEL: add_v8f32_8ACE0246_9BDF1357: 2564; AVX512VL-SLOW: # %bb.0: # %entry 2565; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6] 2566; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2567; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7] 2568; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2569; AVX512VL-SLOW-NEXT: vaddps %ymm0, %ymm2, %ymm0 2570; AVX512VL-SLOW-NEXT: retq 2571; 2572; AVX512VL-FAST-LABEL: add_v8f32_8ACE0246_9BDF1357: 2573; AVX512VL-FAST: # %bb.0: # %entry 2574; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14] 2575; AVX512VL-FAST-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2 2576; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm3 = [1,3,5,7,9,11,13,15] 2577; AVX512VL-FAST-NEXT: vpermi2ps %ymm0, %ymm1, %ymm3 2578; AVX512VL-FAST-NEXT: vaddps %ymm3, %ymm2, %ymm0 2579; AVX512VL-FAST-NEXT: retq 2580entry: 2581 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6> 2582 %shuffle1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7> 2583 %add = fadd <8 x float> %shuffle, %shuffle1 2584 ret <8 x float> %add 2585} 2586 2587define <8 x i32> @add_v8i32_02468ACE_13579BDF(<8 x i32> %a, <8 x i32> %b) { 2588; AVX1-LABEL: add_v8i32_02468ACE_13579BDF: 2589; AVX1: # %bb.0: # %entry 2590; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2591; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm1[0,2],xmm2[0,2] 2592; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3 2593; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2594; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm0[0,2],xmm4[0,2] 2595; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7] 2596; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] 2597; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 2598; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm4[1,3] 2599; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 2600; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2601; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2 2602; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 2603; AVX1-NEXT: vpaddd %xmm0, %xmm3, %xmm0 2604; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2605; AVX1-NEXT: retq 2606; 2607; AVX2-LABEL: add_v8i32_02468ACE_13579BDF: 2608; AVX2: # %bb.0: # %entry 2609; AVX2-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 2610; AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2611; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 2612; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2613; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0 2614; AVX2-NEXT: retq 2615; 2616; AVX512VL-SLOW-LABEL: add_v8i32_02468ACE_13579BDF: 2617; AVX512VL-SLOW: # %bb.0: # %entry 2618; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 2619; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2620; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 2621; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2622; AVX512VL-SLOW-NEXT: vpaddd %ymm0, %ymm2, %ymm0 2623; AVX512VL-SLOW-NEXT: retq 2624; 2625; AVX512VL-FAST-LABEL: add_v8i32_02468ACE_13579BDF: 2626; AVX512VL-FAST: # %bb.0: # %entry 2627; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14] 2628; AVX512VL-FAST-NEXT: vpermi2d %ymm1, %ymm0, %ymm2 2629; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [1,3,5,7,9,11,13,15] 2630; AVX512VL-FAST-NEXT: vpermi2d %ymm1, %ymm0, %ymm3 2631; AVX512VL-FAST-NEXT: vpaddd %ymm3, %ymm2, %ymm0 2632; AVX512VL-FAST-NEXT: retq 2633entry: 2634 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 2635 %shuffle1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 2636 %add = add <8 x i32> %shuffle, %shuffle1 2637 ret <8 x i32> %add 2638} 2639 2640define <8 x i32> @add_v8i32_8ACE0246_9BDF1357(<8 x i32> %a, <8 x i32> %b) { 2641; AVX1-LABEL: add_v8i32_8ACE0246_9BDF1357: 2642; AVX1: # %bb.0: # %entry 2643; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2644; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm0[0,2],xmm2[0,2] 2645; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3 2646; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 2647; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm1[0,2],xmm4[0,2] 2648; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7] 2649; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 2650; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2651; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm4[1,3] 2652; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 2653; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2654; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2 2655; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 2656; AVX1-NEXT: vpaddd %xmm0, %xmm3, %xmm0 2657; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2658; AVX1-NEXT: retq 2659; 2660; AVX2-LABEL: add_v8i32_8ACE0246_9BDF1357: 2661; AVX2: # %bb.0: # %entry 2662; AVX2-NEXT: vshufps {{.*#+}} ymm2 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6] 2663; AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2664; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7] 2665; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2666; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0 2667; AVX2-NEXT: retq 2668; 2669; AVX512VL-SLOW-LABEL: add_v8i32_8ACE0246_9BDF1357: 2670; AVX512VL-SLOW: # %bb.0: # %entry 2671; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6] 2672; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2673; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7] 2674; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2675; AVX512VL-SLOW-NEXT: vpaddd %ymm0, %ymm2, %ymm0 2676; AVX512VL-SLOW-NEXT: retq 2677; 2678; AVX512VL-FAST-LABEL: add_v8i32_8ACE0246_9BDF1357: 2679; AVX512VL-FAST: # %bb.0: # %entry 2680; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14] 2681; AVX512VL-FAST-NEXT: vpermi2d %ymm0, %ymm1, %ymm2 2682; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [1,3,5,7,9,11,13,15] 2683; AVX512VL-FAST-NEXT: vpermi2d %ymm0, %ymm1, %ymm3 2684; AVX512VL-FAST-NEXT: vpaddd %ymm3, %ymm2, %ymm0 2685; AVX512VL-FAST-NEXT: retq 2686entry: 2687 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6> 2688 %shuffle1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7> 2689 %add = add <8 x i32> %shuffle, %shuffle1 2690 ret <8 x i32> %add 2691} 2692