1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=ALL,AVX512F 3; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw,+avx512dq | FileCheck %s --check-prefixes=ALL,AVX512BW 4 5target triple = "x86_64-unknown-unknown" 6 7define <16 x float> @shuffle_v16f32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x float> %a, <16 x float> %b) { 8; ALL-LABEL: shuffle_v16f32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 9; ALL: # %bb.0: 10; ALL-NEXT: vbroadcastss %xmm0, %zmm0 11; ALL-NEXT: retq 12 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 13 ret <16 x float> %shuffle 14} 15 16define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08(<16 x float> %a, <16 x float> %b) { 17; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08: 18; ALL: # %bb.0: 19; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm0 20; ALL-NEXT: vbroadcastss %xmm0, %zmm0 21; ALL-NEXT: retq 22 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 23 ret <16 x float> %shuffle 24} 25 26define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc(<16 x i32> %a, <16 x i32> %b) { 27; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc: 28; ALL: # %bb.0: 29; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm0 30; ALL-NEXT: vbroadcastss %xmm0, %zmm0 31; ALL-NEXT: retq 32 %tmp0 = bitcast <16 x i32> %a to <16 x float> 33 %tmp1 = bitcast <16 x i32> %b to <16 x float> 34 %shuffle = shufflevector <16 x float> %tmp0, <16 x float> %tmp1, <16 x i32><i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 35 ret <16 x float> %shuffle 36} 37 38define <16 x float> @shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x float> %a, <16 x float> %b) { 39; ALL-LABEL: shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d: 40; ALL: # %bb.0: 41; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 42; ALL-NEXT: retq 43 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 44 ret <16 x float> %shuffle 45} 46 47define <16 x float> @shuffle_v16f32_00_zz_01_zz_04_zz_05_zz_08_zz_09_zz_0c_zz_0d_zz(<16 x float> %a, <16 x float> %b) { 48; ALL-LABEL: shuffle_v16f32_00_zz_01_zz_04_zz_05_zz_08_zz_09_zz_0c_zz_0d_zz: 49; ALL: # %bb.0: 50; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 51; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 52; ALL-NEXT: retq 53 %shuffle = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <16 x i32><i32 0, i32 16, i32 1, i32 16, i32 4, i32 16, i32 5, i32 16, i32 8, i32 16, i32 9, i32 16, i32 12, i32 16, i32 13, i32 16> 54 ret <16 x float> %shuffle 55} 56 57define <16 x float> @shuffle_v16f32_vunpcklps_swap(<16 x float> %a, <16 x float> %b) { 58; ALL-LABEL: shuffle_v16f32_vunpcklps_swap: 59; ALL: # %bb.0: 60; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[12],zmm0[12],zmm1[13],zmm0[13] 61; ALL-NEXT: retq 62 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 20, i32 4, i32 21, i32 5, i32 24, i32 8, i32 25, i32 9, i32 28, i32 12, i32 29, i32 13> 63 ret <16 x float> %shuffle 64} 65 66; PR34382 67define <16 x float> @shuffle_v16f32_01_01_03_00_06_04_05_07_08_08_09_09_15_14_14_12(<16 x float> %a0) { 68; ALL-LABEL: shuffle_v16f32_01_01_03_00_06_04_05_07_08_08_09_09_15_14_14_12: 69; ALL: # %bb.0: 70; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,1,3,0,6,4,5,7,8,8,9,9,15,14,14,12] 71; ALL-NEXT: retq 72 %shuffle = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 0, i32 6, i32 4, i32 5, i32 7, i32 8, i32 8, i32 9, i32 9, i32 15, i32 14, i32 14, i32 12> 73 ret <16 x float> %shuffle 74} 75 76define <16 x i32> @shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x i32> %a, <16 x i32> %b) { 77; ALL-LABEL: shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d: 78; ALL: # %bb.0: 79; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 80; ALL-NEXT: retq 81 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 82 ret <16 x i32> %shuffle 83} 84 85define <16 x i32> @shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d(<16 x i32> %a, <16 x i32> %b) { 86; ALL-LABEL: shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d: 87; ALL: # %bb.0: 88; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0 89; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 90; ALL-NEXT: retq 91 %shuffle = shufflevector <16 x i32> zeroinitializer, <16 x i32> %b, <16 x i32><i32 15, i32 16, i32 13, i32 17, i32 11, i32 20, i32 9, i32 21, i32 7, i32 24, i32 5, i32 25, i32 3, i32 28, i32 1, i32 29> 92 ret <16 x i32> %shuffle 93} 94 95define <16 x float> @shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x float> %a, <16 x float> %b) { 96; ALL-LABEL: shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f: 97; ALL: # %bb.0: 98; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 99; ALL-NEXT: retq 100 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 101 ret <16 x float> %shuffle 102} 103 104define <16 x float> @shuffle_v16f32_zz_12_zz_13_zz_16_zz_17_zz_1a_zz_1b_zz_1e_zz_1f(<16 x float> %a, <16 x float> %b) { 105; ALL-LABEL: shuffle_v16f32_zz_12_zz_13_zz_16_zz_17_zz_1a_zz_1b_zz_1e_zz_1f: 106; ALL: # %bb.0: 107; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0 108; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 109; ALL-NEXT: retq 110 %shuffle = shufflevector <16 x float> zeroinitializer, <16 x float> %b, <16 x i32><i32 0, i32 18, i32 0, i32 19, i32 4, i32 22, i32 4, i32 23, i32 6, i32 26, i32 6, i32 27, i32 8, i32 30, i32 8, i32 31> 111 ret <16 x float> %shuffle 112} 113 114define <16 x float> @shuffle_v16f32_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x float> %a, <16 x float> %b) { 115; ALL-LABEL: shuffle_v16f32_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14: 116; ALL: # %bb.0: 117; ALL-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 118; ALL-NEXT: retq 119 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> 120 ret <16 x float> %shuffle 121} 122 123define <16 x float> @shuffle_v16f32_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x float> %a, <16 x float> %b) { 124; ALL-LABEL: shuffle_v16f32_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15: 125; ALL: # %bb.0: 126; ALL-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 127; ALL-NEXT: retq 128 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> 129 ret <16 x float> %shuffle 130} 131 132define <16 x float> @shuffle_v16f32_00_01_00_01_06_07_06_07_08_09_10_11_12_13_12_13(<16 x float> %a, <16 x float> %b) { 133; ALL-LABEL: shuffle_v16f32_00_01_00_01_06_07_06_07_08_09_10_11_12_13_12_13: 134; ALL: # %bb.0: 135; ALL-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,3,3,4,5,6,6] 136; ALL-NEXT: retq 137 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 6, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 12, i32 13> 138 ret <16 x float> %shuffle 139} 140 141define <16 x float> @shuffle_v16f32_00_00_02_00_04_04_06_04_08_08_10_08_12_12_14_12(<16 x float> %a, <16 x float> %b) { 142; ALL-LABEL: shuffle_v16f32_00_00_02_00_04_04_06_04_08_08_10_08_12_12_14_12: 143; ALL: # %bb.0: 144; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4,8,8,10,8,12,12,14,12] 145; ALL-NEXT: retq 146 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4, i32 8, i32 8, i32 10, i32 8, i32 12, i32 12, i32 14, i32 12> 147 ret <16 x float> %shuffle 148} 149 150define <16 x float> @shuffle_v16f32_03_uu_uu_uu_uu_04_uu_uu_uu_uu_11_uu_uu_uu_uu_12(<16 x float> %a, <16 x float> %b) { 151; ALL-LABEL: shuffle_v16f32_03_uu_uu_uu_uu_04_uu_uu_uu_uu_11_uu_uu_uu_uu_12: 152; ALL: # %bb.0: 153; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,0,3,0,7,4,7,4,11,8,11,8,15,12,15,12] 154; ALL-NEXT: retq 155 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12> 156 ret <16 x float> %shuffle 157} 158 159; PR41203 160define <16 x float> @shuffle_v16f32_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x float> %a) { 161; ALL-LABEL: shuffle_v16f32_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 162; ALL: # %bb.0: 163; ALL-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 164; ALL-NEXT: retq 165 %tmp1 = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 undef, i32 17, i32 undef, i32 19, i32 undef, i32 5, i32 undef, i32 7, i32 undef, i32 9, i32 undef, i32 11, i32 undef, i32 13, i32 undef, i32 15> 166 %tmp2 = shufflevector <16 x float> %tmp1, <16 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 167 ret <16 x float> %tmp2 168} 169 170; PR48322 171define <16 x float> @shuffle_v16f32_02_03_16_17_06_07_20_21_10_11_24_25_14_15_28_29(<16 x float> %a, <16 x float> %b) { 172; ALL-LABEL: shuffle_v16f32_02_03_16_17_06_07_20_21_10_11_24_25_14_15_28_29: 173; ALL: # %bb.0: 174; ALL-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1],zmm1[0],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[7],zmm1[6] 175; ALL-NEXT: retq 176 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 20, i32 21, i32 10, i32 11, i32 24, i32 25, i32 14, i32 15, i32 28, i32 29> 177 ret <16 x float> %shuffle 178} 179 180define <16 x i32> @shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i32> %a, <16 x i32> %b) { 181; ALL-LABEL: shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 182; ALL: # %bb.0: 183; ALL-NEXT: vbroadcastss %xmm0, %zmm0 184; ALL-NEXT: retq 185 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 186 ret <16 x i32> %shuffle 187} 188 189define <16 x i32> @shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04(<16 x i32> %a, <16 x i32> %b) { 190; ALL-LABEL: shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04: 191; ALL: # %bb.0: 192; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0 193; ALL-NEXT: vbroadcastss %xmm0, %zmm0 194; ALL-NEXT: retq 195 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 196 ret <16 x i32> %shuffle 197} 198 199define <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x i32> %a, <16 x i32> %b) { 200; ALL-LABEL: shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f: 201; ALL: # %bb.0: 202; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 203; ALL-NEXT: retq 204 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 205 ret <16 x i32> %shuffle 206} 207 208define <16 x i32> @shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz(<16 x i32> %a, <16 x i32> %b) { 209; ALL-LABEL: shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz: 210; ALL: # %bb.0: 211; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 212; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 213; ALL-NEXT: retq 214 %shuffle = shufflevector <16 x i32> %a, <16 x i32> zeroinitializer, <16 x i32><i32 2, i32 30, i32 3, i32 28, i32 6, i32 26, i32 7, i32 24, i32 10, i32 22, i32 11, i32 20, i32 14, i32 18, i32 15, i32 16> 215 ret <16 x i32> %shuffle 216} 217 218define <16 x i32> @shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_28(<16 x i32> %a, <16 x i32> %b) { 219; AVX512F-LABEL: shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_28: 220; AVX512F: # %bb.0: 221; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,2,3,16,5,6,7,20,9,10,11,24,13,14,15,28] 222; AVX512F-NEXT: vpermt2d %zmm1, %zmm2, %zmm0 223; AVX512F-NEXT: retq 224; 225; AVX512BW-LABEL: shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_28: 226; AVX512BW: # %bb.0: 227; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1,2,3],zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zmm1[16,17,18,19],zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zmm1[32,33,34,35],zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zmm1[48,49,50,51] 228; AVX512BW-NEXT: retq 229 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 1, i32 2, i32 3, i32 16, i32 5, i32 6, i32 7, i32 20, i32 9, i32 10, i32 11, i32 24, i32 13, i32 14, i32 15, i32 28> 230 ret <16 x i32> %shuffle 231} 232 233define <16 x float> @shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x float> %a) { 234; ALL-LABEL: shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01: 235; ALL: # %bb.0: 236; ALL-NEXT: vmovaps {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1> 237; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0 238; ALL-NEXT: retq 239 %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1> 240 ret <16 x float> %c 241} 242 243define <16 x i32> @shuffle_v16i32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x i32> %a) { 244; ALL-LABEL: shuffle_v16i32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01: 245; ALL: # %bb.0: 246; ALL-NEXT: vmovaps {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1> 247; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0 248; ALL-NEXT: retq 249 %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1> 250 ret <16 x i32> %c 251} 252 253define <16 x i32> @shuffle_v16i32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x i32> %a, <16 x i32> %b) { 254; ALL-LABEL: shuffle_v16i32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18: 255; ALL: # %bb.0: 256; ALL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24] 257; ALL-NEXT: vpermt2d %zmm1, %zmm2, %zmm0 258; ALL-NEXT: retq 259 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24> 260 ret <16 x i32> %c 261} 262 263define <16 x float> @shuffle_v16f32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x float> %a, <16 x float> %b) { 264; ALL-LABEL: shuffle_v16f32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18: 265; ALL: # %bb.0: 266; ALL-NEXT: vmovaps {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24] 267; ALL-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 268; ALL-NEXT: retq 269 %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24> 270 ret <16 x float> %c 271} 272 273; PR46249 274define <16 x i32> @shuffle_v16i32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04(<16 x i32> %a) { 275; ALL-LABEL: shuffle_v16i32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04: 276; ALL: # %bb.0: 277; ALL-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] 278; ALL-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3] 279; ALL-NEXT: retq 280 %1 = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 281 ret <16 x i32> %1 282} 283 284define <16 x float> @shuffle_v16f32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04(<16 x float> %a) { 285; ALL-LABEL: shuffle_v16f32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04: 286; ALL: # %bb.0: 287; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] 288; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3] 289; ALL-NEXT: retq 290 %1 = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 291 ret <16 x float> %1 292} 293 294define <16 x float> @shuffle_v16f32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x float> %a, <16 x float>* %b) { 295; ALL-LABEL: shuffle_v16f32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18: 296; ALL: # %bb.0: 297; ALL-NEXT: vmovaps {{.*#+}} zmm1 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24] 298; ALL-NEXT: vpermt2ps (%rdi), %zmm1, %zmm0 299; ALL-NEXT: retq 300 %c = load <16 x float>, <16 x float>* %b 301 %d = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24> 302 ret <16 x float> %d 303} 304 305define <16 x float> @shuffle_v16f32_load_08_11_10_00_12_15_14_04(<16 x float> %a0, <16 x float>* %a1) { 306; ALL-LABEL: shuffle_v16f32_load_08_11_10_00_12_15_14_04: 307; ALL: # %bb.0: 308; ALL-NEXT: vshufps {{.*#+}} zmm1 = zmm0[2,0],mem[0,0],zmm0[6,4],mem[4,4],zmm0[10,8],mem[8,8],zmm0[14,12],mem[12,12] 309; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[0,3],zmm1[0,2],zmm0[4,7],zmm1[4,6],zmm0[8,11],zmm1[8,10],zmm0[12,15],zmm1[12,14] 310; ALL-NEXT: retq 311 %1 = load <16 x float>, <16 x float>* %a1 312 %2 = shufflevector <16 x float> %1, <16 x float> %a0, <16 x i32> <i32 16, i32 19, i32 18, i32 0, i32 20, i32 23, i32 22, i32 4, i32 24, i32 27, i32 26, i32 8, i32 28, i32 31, i32 30, i32 12> 313 ret <16 x float> %2 314} 315 316define <16 x i32> @shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x i32> %a, <16 x i32>* %b) { 317; ALL-LABEL: shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18: 318; ALL: # %bb.0: 319; ALL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24] 320; ALL-NEXT: vpermt2d (%rdi), %zmm1, %zmm0 321; ALL-NEXT: retq 322 %c = load <16 x i32>, <16 x i32>* %b 323 %d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24> 324 ret <16 x i32> %d 325} 326 327define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a, <16 x i32> %b) { 328; ALL-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u: 329; ALL: # %bb.0: 330; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 331; ALL-NEXT: retq 332 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 333 ret <16 x i32> %c 334} 335 336;FIXME: can do better with vpcompress 337define <8 x i32> @test_v16i32_1_3_5_7_9_11_13_15(<16 x i32> %v) { 338; ALL-LABEL: test_v16i32_1_3_5_7_9_11_13_15: 339; ALL: # %bb.0: 340; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1 341; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 342; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 343; ALL-NEXT: retq 344 %res = shufflevector <16 x i32> %v, <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 345 ret <8 x i32> %res 346} 347 348;FIXME: can do better with vpcompress 349define <4 x i32> @test_v16i32_0_1_2_12 (<16 x i32> %v) { 350; ALL-LABEL: test_v16i32_0_1_2_12: 351; ALL: # %bb.0: 352; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1 353; ALL-NEXT: vextractf128 $1, %ymm1, %xmm1 354; ALL-NEXT: vbroadcastss %xmm1, %xmm1 355; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 356; ALL-NEXT: vzeroupper 357; ALL-NEXT: retq 358 %res = shufflevector <16 x i32> %v, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 12> 359 ret <4 x i32> %res 360} 361 362;PR31451 363;FIXME: can do better with vpcompress 364define <4 x i32> @test_v16i32_0_4_8_12(<16 x i32> %v) { 365; ALL-LABEL: test_v16i32_0_4_8_12: 366; ALL: # %bb.0: 367; ALL-NEXT: vmovaps {{.*#+}} xmm1 = [0,4,8,12] 368; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0 369; ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 370; ALL-NEXT: vzeroupper 371; ALL-NEXT: retq 372 %res = shufflevector <16 x i32> %v, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 373 ret <4 x i32> %res 374} 375 376define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) { 377; ALL-LABEL: shuffle_v16f32_extract_256: 378; ALL: # %bb.0: 379; ALL-NEXT: vmovups 32(%rsi), %ymm0 380; ALL-NEXT: retq 381 %ptr_a = bitcast float* %a to <16 x float>* 382 %v_a = load <16 x float>, <16 x float>* %ptr_a, align 4 383 %v2 = shufflevector <16 x float> %v_a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 384 ret <8 x float> %v2 385} 386 387;FIXME: can do better with vcompressp 388define <8 x float> @test_v16f32_0_1_2_3_4_6_7_10 (<16 x float> %v) { 389; ALL-LABEL: test_v16f32_0_1_2_3_4_6_7_10: 390; ALL: # %bb.0: 391; ALL-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,4,6,7,10,0,1,2,3,4,6,7,10] 392; ALL-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 393; ALL-NEXT: vpermd %zmm0, %zmm1, %zmm0 394; ALL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 395; ALL-NEXT: retq 396 %res = shufflevector <16 x float> %v, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 7, i32 10> 397 ret <8 x float> %res 398} 399 400;FIXME: can do better with vcompressp 401define <4 x float> @test_v16f32_0_1_3_6 (<16 x float> %v) { 402; ALL-LABEL: test_v16f32_0_1_3_6: 403; ALL: # %bb.0: 404; ALL-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,3,6,0,1,3,6,0,1,3,6,0,1,3,6] 405; ALL-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 406; ALL-NEXT: vpermd %zmm0, %zmm1, %zmm0 407; ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 408; ALL-NEXT: vzeroupper 409; ALL-NEXT: retq 410 %res = shufflevector <16 x float> %v, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 6> 411 ret <4 x float> %res 412} 413 414define <16 x i32> @shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12(<16 x i32> %a, <16 x i32> %b) { 415; ALL-LABEL: shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12: 416; ALL: # %bb.0: 417; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12] 418; ALL-NEXT: retq 419 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12> 420 ret <16 x i32> %c 421} 422 423define <16 x i32> @shuffle_v16i16_3_3_0_0_7_7_4_4_11_11_8_8_15_15_12_12(<16 x i32> %a, <16 x i32> %b) { 424; ALL-LABEL: shuffle_v16i16_3_3_0_0_7_7_4_4_11_11_8_8_15_15_12_12: 425; ALL: # %bb.0: 426; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13] 427; ALL-NEXT: retq 428 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13> 429 ret <16 x i32> %c 430} 431 432define <16 x float> @shuffle_v16f32_00_01_10_10_04_05_14_14_08_09_18_18_0c_0d_1c_1c(<16 x float> %a, <16 x float> %b) { 433; ALL-LABEL: shuffle_v16f32_00_01_10_10_04_05_14_14_08_09_18_18_0c_0d_1c_1c: 434; ALL: # %bb.0: 435; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[0,1],zmm1[0,0],zmm0[4,5],zmm1[4,4],zmm0[8,9],zmm1[8,8],zmm0[12,13],zmm1[12,12] 436; ALL-NEXT: retq 437 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28> 438 ret <16 x float> %shuffle 439} 440 441define <16 x i32> @insert_mem_and_zero_v16i32(i32* %ptr) { 442; ALL-LABEL: insert_mem_and_zero_v16i32: 443; ALL: # %bb.0: 444; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 445; ALL-NEXT: retq 446 %a = load i32, i32* %ptr 447 %v = insertelement <16 x i32> undef, i32 %a, i32 0 448 %shuffle = shufflevector <16 x i32> %v, <16 x i32> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 449 ret <16 x i32> %shuffle 450} 451 452 453define <16 x i32> @shuffle_v16i32_0zzzzzzzzzzzzzzz(<16 x i32> %a) { 454; ALL-LABEL: shuffle_v16i32_0zzzzzzzzzzzzzzz: 455; ALL: # %bb.0: 456; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 457; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 458; ALL-NEXT: retq 459 %shuffle = shufflevector <16 x i32> %a, <16 x i32> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 460 ret <16 x i32> %shuffle 461} 462 463define <16 x float> @shuffle_v16f32_0zzzzzzzzzzzzzzz(<16 x float> %a) { 464; ALL-LABEL: shuffle_v16f32_0zzzzzzzzzzzzzzz: 465; ALL: # %bb.0: 466; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 467; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 468; ALL-NEXT: retq 469 %shuffle = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 470 ret <16 x float> %shuffle 471} 472 473define <16 x i32> @shuffle_v16i32_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_23_zz(<16 x i32> %a) { 474; ALL-LABEL: shuffle_v16i32_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_23_zz: 475; ALL: # %bb.0: 476; ALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 477; ALL-NEXT: retq 478 %shuffle = shufflevector <16 x i32> zeroinitializer, <16 x i32> %a, <16 x i32> <i32 16, i32 0, i32 17, i32 0, i32 18, i32 0, i32 19, i32 0, i32 20, i32 0, i32 21, i32 0, i32 22, i32 0, i32 23, i32 0> 479 ret <16 x i32> %shuffle 480} 481 482define <16 x i32> @shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i32> %a, <16 x i32> %b) { 483; ALL-LABEL: shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 484; ALL: # %bb.0: 485; ALL-NEXT: valignd {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0] 486; ALL-NEXT: retq 487 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> 488 ret <16 x i32> %shuffle 489} 490 491define <16 x i32> @shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i32> %a) { 492; ALL-LABEL: shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 493; ALL: # %bb.0: 494; ALL-NEXT: valignd {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 495; ALL-NEXT: retq 496 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0> 497 ret <16 x i32> %shuffle 498} 499 500define <16 x i32> @shuffle_v16i32_00_03_16_19_04_07_20_23_08_11_24_27_12_15_28_31(<16 x i32> %a, <16 x i32> %b) { 501; ALL-LABEL: shuffle_v16i32_00_03_16_19_04_07_20_23_08_11_24_27_12_15_28_31: 502; ALL: # %bb.0: 503; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[0,3],zmm1[0,3],zmm0[4,7],zmm1[4,7],zmm0[8,11],zmm1[8,11],zmm0[12,15],zmm1[12,15] 504; ALL-NEXT: retq 505 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 3, i32 16, i32 19, i32 4, i32 7, i32 20, i32 23, i32 8, i32 11, i32 24, i32 27, i32 12, i32 15, i32 28, i32 31> 506 ret <16 x i32> %shuffle 507} 508 509define <16 x i32> @shuffle_v16i32_16_16_02_03_20_20_06_07_24_24_10_11_28_28_uu_uu(<16 x i32> %a, <16 x i32> %b) { 510; ALL-LABEL: shuffle_v16i32_16_16_02_03_20_20_06_07_24_24_10_11_28_28_uu_uu: 511; ALL: # %bb.0: 512; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm1[0,0],zmm0[2,3],zmm1[4,4],zmm0[6,7],zmm1[8,8],zmm0[10,11],zmm1[12,12],zmm0[14,15] 513; ALL-NEXT: retq 514 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 16, i32 16, i32 02, i32 03, i32 20, i32 20, i32 06, i32 07, i32 24, i32 24, i32 10, i32 11, i32 28, i32 28, i32 undef, i32 undef> 515 ret <16 x i32> %shuffle 516} 517 518; PR48322 519define <16 x i32> @shuffle_v16i32_02_03_16_17_06_07_20_21_10_11_24_25_14_15_28_29(<16 x i32> %a, <16 x i32> %b) { 520; AVX512F-LABEL: shuffle_v16i32_02_03_16_17_06_07_20_21_10_11_24_25_14_15_28_29: 521; AVX512F: # %bb.0: 522; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,3,10,5,12,7,14] 523; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0 524; AVX512F-NEXT: retq 525; 526; AVX512BW-LABEL: shuffle_v16i32_02_03_16_17_06_07_20_21_10_11_24_25_14_15_28_29: 527; AVX512BW: # %bb.0: 528; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,4,5,6,7],zmm0[24,25,26,27,28,29,30,31],zmm1[16,17,18,19,20,21,22,23],zmm0[40,41,42,43,44,45,46,47],zmm1[32,33,34,35,36,37,38,39],zmm0[56,57,58,59,60,61,62,63],zmm1[48,49,50,51,52,53,54,55] 529; AVX512BW-NEXT: retq 530 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 20, i32 21, i32 10, i32 11, i32 24, i32 25, i32 14, i32 15, i32 28, i32 29> 531 ret <16 x i32> %shuffle 532} 533 534define <16 x i32> @shuffle_v8i32_17_16_01_00_21_20_05_04_25_24_09_08_29_28_13_12(<16 x i32> %a, <16 x i32> %b) { 535; ALL-LABEL: shuffle_v8i32_17_16_01_00_21_20_05_04_25_24_09_08_29_28_13_12: 536; ALL: # %bb.0: 537; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm1[1,0],zmm0[1,0],zmm1[5,4],zmm0[5,4],zmm1[9,8],zmm0[9,8],zmm1[13,12],zmm0[13,12] 538; ALL-NEXT: retq 539 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 17, i32 16, i32 01, i32 00, i32 21, i32 20, i32 05, i32 04, i32 25, i32 24, i32 09, i32 08, i32 29, i32 28, i32 13, i32 12> 540 ret <16 x i32> %shuffle 541} 542 543define <16 x float> @shuffle_v8f32_v16f32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04(<8 x float> %a) { 544; ALL-LABEL: shuffle_v8f32_v16f32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04: 545; ALL: # %bb.0: 546; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0 547; ALL-NEXT: vbroadcastss %xmm0, %zmm0 548; ALL-NEXT: retq 549 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 550 ret <16 x float> %shuffle 551} 552 553define <16 x i32> @mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01(<16 x i32> %a, <16 x i32> %passthru, i16 %mask) { 554; AVX512F-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01: 555; AVX512F: # %bb.0: 556; AVX512F-NEXT: kmovw %edi, %k1 557; AVX512F-NEXT: valignd {{.*#+}} zmm1 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 558; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0 559; AVX512F-NEXT: retq 560; 561; AVX512BW-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01: 562; AVX512BW: # %bb.0: 563; AVX512BW-NEXT: kmovd %edi, %k1 564; AVX512BW-NEXT: valignd {{.*#+}} zmm1 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 565; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 566; AVX512BW-NEXT: retq 567 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1> 568 %mask.cast = bitcast i16 %mask to <16 x i1> 569 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru 570 ret <16 x i32> %res 571} 572 573define <16 x i32> @mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passthru, i16 %mask) { 574; AVX512F-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17: 575; AVX512F: # %bb.0: 576; AVX512F-NEXT: kmovw %edi, %k1 577; AVX512F-NEXT: valignd {{.*#+}} zmm2 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1] 578; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 579; AVX512F-NEXT: retq 580; 581; AVX512BW-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17: 582; AVX512BW: # %bb.0: 583; AVX512BW-NEXT: kmovd %edi, %k1 584; AVX512BW-NEXT: valignd {{.*#+}} zmm2 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1] 585; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 586; AVX512BW-NEXT: retq 587 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17> 588 %mask.cast = bitcast i16 %mask to <16 x i1> 589 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru 590 ret <16 x i32> %res 591} 592 593define <16 x i32> @maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01(<16 x i32> %a, i16 %mask) { 594; AVX512F-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01: 595; AVX512F: # %bb.0: 596; AVX512F-NEXT: kmovw %edi, %k1 597; AVX512F-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 598; AVX512F-NEXT: retq 599; 600; AVX512BW-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01: 601; AVX512BW: # %bb.0: 602; AVX512BW-NEXT: kmovd %edi, %k1 603; AVX512BW-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 604; AVX512BW-NEXT: retq 605 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1> 606 %mask.cast = bitcast i16 %mask to <16 x i1> 607 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> zeroinitializer 608 ret <16 x i32> %res 609} 610 611define <16 x i32> @maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 612; AVX512F-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17: 613; AVX512F: # %bb.0: 614; AVX512F-NEXT: kmovw %edi, %k1 615; AVX512F-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1] 616; AVX512F-NEXT: retq 617; 618; AVX512BW-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17: 619; AVX512BW: # %bb.0: 620; AVX512BW-NEXT: kmovd %edi, %k1 621; AVX512BW-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1] 622; AVX512BW-NEXT: retq 623 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17> 624 %mask.cast = bitcast i16 %mask to <16 x i1> 625 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> zeroinitializer 626 ret <16 x i32> %res 627} 628 629define <16 x float> @test_vshuff32x4_512(<16 x float> %x, <16 x float> %x1) nounwind { 630; ALL-LABEL: test_vshuff32x4_512: 631; ALL: # %bb.0: 632; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1] 633; ALL-NEXT: retq 634 %res = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19> 635 ret <16 x float> %res 636} 637 638define <16 x i32> @test_vshufi32x4_512(<16 x i32> %x, <16 x i32> %x1) nounwind { 639; ALL-LABEL: test_vshufi32x4_512: 640; ALL: # %bb.0: 641; ALL-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1] 642; ALL-NEXT: retq 643 %res = shufflevector <16 x i32> %x, <16 x i32> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19> 644 ret <16 x i32> %res 645} 646 647define <16 x float> @test_vshuff32x4_512_mask(<16 x float> %x, <16 x float> %x1, <16 x float> %y, <16 x i1> %mask) nounwind { 648; AVX512F-LABEL: test_vshuff32x4_512_mask: 649; AVX512F: # %bb.0: 650; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 651; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 652; AVX512F-NEXT: vpmovd2m %zmm3, %k1 653; AVX512F-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 654; AVX512F-NEXT: vmovaps %zmm2, %zmm0 655; AVX512F-NEXT: retq 656; 657; AVX512BW-LABEL: test_vshuff32x4_512_mask: 658; AVX512BW: # %bb.0: 659; AVX512BW-NEXT: vpsllw $7, %xmm3, %xmm3 660; AVX512BW-NEXT: vpmovb2m %zmm3, %k1 661; AVX512BW-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 662; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 663; AVX512BW-NEXT: retq 664 %x2 = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19> 665 %res = select <16 x i1> %mask, <16 x float> %x2, <16 x float> %y 666 ret <16 x float> %res 667} 668 669define <16 x i32> @test_vshufi32x4_512_mask(<16 x i32> %x, <16 x i32> %x1, <16 x i32> %y, <16 x i1> %mask) nounwind { 670; AVX512F-LABEL: test_vshufi32x4_512_mask: 671; AVX512F: # %bb.0: 672; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 673; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 674; AVX512F-NEXT: vpmovd2m %zmm3, %k1 675; AVX512F-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 676; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 677; AVX512F-NEXT: retq 678; 679; AVX512BW-LABEL: test_vshufi32x4_512_mask: 680; AVX512BW: # %bb.0: 681; AVX512BW-NEXT: vpsllw $7, %xmm3, %xmm3 682; AVX512BW-NEXT: vpmovb2m %zmm3, %k1 683; AVX512BW-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 684; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 685; AVX512BW-NEXT: retq 686 %x2 = shufflevector <16 x i32> %x, <16 x i32> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19> 687 %res = select <16 x i1> %mask, <16 x i32> %x2, <16 x i32> %y 688 ret <16 x i32> %res 689} 690 691define <16 x float> @mask_shuffle_v16f32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23(<16 x float> %a, <16 x float> %b, <16 x float> %passthru, i16 %mask) { 692; AVX512F-LABEL: mask_shuffle_v16f32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23: 693; AVX512F: # %bb.0: 694; AVX512F-NEXT: kmovw %edi, %k1 695; AVX512F-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 696; AVX512F-NEXT: vmovaps %zmm2, %zmm0 697; AVX512F-NEXT: retq 698; 699; AVX512BW-LABEL: mask_shuffle_v16f32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23: 700; AVX512BW: # %bb.0: 701; AVX512BW-NEXT: kmovd %edi, %k1 702; AVX512BW-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 703; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 704; AVX512BW-NEXT: retq 705 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 706 %mask.cast = bitcast i16 %mask to <16 x i1> 707 %res = select <16 x i1> %mask.cast, <16 x float> %shuffle, <16 x float> %passthru 708 ret <16 x float> %res 709} 710 711define <16 x float> @mask_shuffle_v16f32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15(<16 x float> %a, <16 x float> %b, <16 x float> %passthru, i16 %mask) { 712; AVX512F-LABEL: mask_shuffle_v16f32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15: 713; AVX512F: # %bb.0: 714; AVX512F-NEXT: kmovw %edi, %k1 715; AVX512F-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} 716; AVX512F-NEXT: vmovaps %zmm2, %zmm0 717; AVX512F-NEXT: retq 718; 719; AVX512BW-LABEL: mask_shuffle_v16f32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15: 720; AVX512BW: # %bb.0: 721; AVX512BW-NEXT: kmovd %edi, %k1 722; AVX512BW-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} 723; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 724; AVX512BW-NEXT: retq 725 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 726 %mask.cast = bitcast i16 %mask to <16 x i1> 727 %res = select <16 x i1> %mask.cast, <16 x float> %shuffle, <16 x float> %passthru 728 ret <16 x float> %res 729} 730 731define <16 x i32> @mask_shuffle_v16i32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passthru, i16 %mask) { 732; AVX512F-LABEL: mask_shuffle_v16i32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23: 733; AVX512F: # %bb.0: 734; AVX512F-NEXT: kmovw %edi, %k1 735; AVX512F-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 736; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 737; AVX512F-NEXT: retq 738; 739; AVX512BW-LABEL: mask_shuffle_v16i32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23: 740; AVX512BW: # %bb.0: 741; AVX512BW-NEXT: kmovd %edi, %k1 742; AVX512BW-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 743; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 744; AVX512BW-NEXT: retq 745 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 746 %mask.cast = bitcast i16 %mask to <16 x i1> 747 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru 748 ret <16 x i32> %res 749} 750 751define <16 x i32> @mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passthru, i16 %mask) { 752; AVX512F-LABEL: mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15: 753; AVX512F: # %bb.0: 754; AVX512F-NEXT: kmovw %edi, %k1 755; AVX512F-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} 756; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 757; AVX512F-NEXT: retq 758; 759; AVX512BW-LABEL: mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15: 760; AVX512BW: # %bb.0: 761; AVX512BW-NEXT: kmovd %edi, %k1 762; AVX512BW-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} 763; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 764; AVX512BW-NEXT: retq 765 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 766 %mask.cast = bitcast i16 %mask to <16 x i1> 767 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru 768 ret <16 x i32> %res 769} 770 771define <16 x i32> @mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03(<4 x i32> %a) { 772; ALL-LABEL: mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03: 773; ALL: # %bb.0: 774; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 775; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 776; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 777; ALL-NEXT: retq 778 %res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 779 ret <16 x i32> %res 780} 781 782define <16 x float> @mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03(<4 x float> %a) { 783; ALL-LABEL: mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03: 784; ALL: # %bb.0: 785; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 786; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 787; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 788; ALL-NEXT: retq 789 %res = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 790 ret <16 x float> %res 791} 792 793%struct.foo = type { [4 x double], [3 x [4 x double]], [4 x double] } 794 795; This test previously hung in shuffle combining. https://github.com/ispc/ispc/issues/1864 796define void @ispc_1864(<16 x float>* %arg) { 797; ALL-LABEL: ispc_1864: 798; ALL: # %bb.0: # %bb 799; ALL-NEXT: pushq %rbp 800; ALL-NEXT: .cfi_def_cfa_offset 16 801; ALL-NEXT: .cfi_offset %rbp, -16 802; ALL-NEXT: movq %rsp, %rbp 803; ALL-NEXT: .cfi_def_cfa_register %rbp 804; ALL-NEXT: andq $-64, %rsp 805; ALL-NEXT: subq $4864, %rsp # imm = 0x1300 806; ALL-NEXT: vbroadcastss {{.*#+}} ymm0 = [-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0] 807; ALL-NEXT: vmulps 32(%rdi), %ymm0, %ymm0 808; ALL-NEXT: vcvtps2pd %ymm0, %zmm0 809; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,4,5,0,1,0,1] 810; ALL-NEXT: vmovapd %ymm0, {{[0-9]+}}(%rsp) 811; ALL-NEXT: movq %rbp, %rsp 812; ALL-NEXT: popq %rbp 813; ALL-NEXT: .cfi_def_cfa %rsp, 8 814; ALL-NEXT: vzeroupper 815; ALL-NEXT: retq 816bb: 817 %tmp = alloca [30 x %struct.foo], align 64 818 %tmp1 = load <16 x float>, <16 x float>* %arg, align 4 819 %tmp2 = fmul <16 x float> %tmp1, <float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00> 820 %tmp3 = fpext <16 x float> %tmp2 to <16 x double> 821 %tmp4 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 0 822 %tmp5 = extractelement <16 x double> %tmp3, i32 10 823 store double %tmp5, double* %tmp4, align 32 824 %tmp6 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 1 825 %tmp7 = extractelement <16 x double> %tmp3, i32 11 826 store double %tmp7, double* %tmp6, align 8 827 %tmp8 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 2 828 %tmp9 = extractelement <16 x double> %tmp3, i32 12 829 store double %tmp9, double* %tmp8, align 16 830 %tmp10 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 3 831 %tmp11 = extractelement <16 x double> %tmp3, i32 13 832 store double %tmp11, double* %tmp10, align 8 833 ret void 834} 835 836