1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW 4; 5; Just one 32-bit run to make sure we do reasonable things. 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32-AVX512F 7 8define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable noinline ssp { 9; ALL-LABEL: merge_8f64_2f64_12u4: 10; ALL: # %bb.0: 11; ALL-NEXT: vmovups 16(%rdi), %ymm0 12; ALL-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm1 13; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 14; ALL-NEXT: retq 15; 16; X32-AVX512F-LABEL: merge_8f64_2f64_12u4: 17; X32-AVX512F: # %bb.0: 18; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 19; X32-AVX512F-NEXT: vmovups 16(%eax), %ymm0 20; X32-AVX512F-NEXT: vinsertf128 $1, 64(%eax), %ymm0, %ymm1 21; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 22; X32-AVX512F-NEXT: retl 23 %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 1 24 %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2 25 %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 4 26 %val0 = load <2 x double>, <2 x double>* %ptr0 27 %val1 = load <2 x double>, <2 x double>* %ptr1 28 %val3 = load <2 x double>, <2 x double>* %ptr3 29 %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 30 %res23 = shufflevector <2 x double> undef, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 31 %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 32 ret <8 x double> %res 33} 34 35define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable noinline ssp { 36; ALL-LABEL: merge_8f64_2f64_23z5: 37; ALL: # %bb.0: 38; ALL-NEXT: vmovups 32(%rdi), %ymm0 39; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 40; ALL-NEXT: vinsertf128 $1, 80(%rdi), %ymm1, %ymm1 41; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 42; ALL-NEXT: retq 43; 44; X32-AVX512F-LABEL: merge_8f64_2f64_23z5: 45; X32-AVX512F: # %bb.0: 46; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 47; X32-AVX512F-NEXT: vmovups 32(%eax), %ymm0 48; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 49; X32-AVX512F-NEXT: vinsertf128 $1, 80(%eax), %ymm1, %ymm1 50; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 51; X32-AVX512F-NEXT: retl 52 %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2 53 %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 3 54 %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 5 55 %val0 = load <2 x double>, <2 x double>* %ptr0 56 %val1 = load <2 x double>, <2 x double>* %ptr1 57 %val3 = load <2 x double>, <2 x double>* %ptr3 58 %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 59 %res23 = shufflevector <2 x double> zeroinitializer, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 60 %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 61 ret <8 x double> %res 62} 63 64define <8 x double> @merge_8f64_4f64_z2(<4 x double>* %ptr) nounwind uwtable noinline ssp { 65; ALL-LABEL: merge_8f64_4f64_z2: 66; ALL: # %bb.0: 67; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0 68; ALL-NEXT: vinsertf64x4 $1, 64(%rdi), %zmm0, %zmm0 69; ALL-NEXT: retq 70; 71; X32-AVX512F-LABEL: merge_8f64_4f64_z2: 72; X32-AVX512F: # %bb.0: 73; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 74; X32-AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 75; X32-AVX512F-NEXT: vinsertf64x4 $1, 64(%eax), %zmm0, %zmm0 76; X32-AVX512F-NEXT: retl 77 %ptr1 = getelementptr inbounds <4 x double>, <4 x double>* %ptr, i64 2 78 %val1 = load <4 x double>, <4 x double>* %ptr1 79 %res = shufflevector <4 x double> zeroinitializer, <4 x double> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 80 ret <8 x double> %res 81} 82 83define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noinline ssp { 84; ALL-LABEL: merge_8f64_f64_23uuuuu9: 85; ALL: # %bb.0: 86; ALL-NEXT: vmovups 16(%rdi), %zmm0 87; ALL-NEXT: retq 88; 89; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9: 90; X32-AVX512F: # %bb.0: 91; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 92; X32-AVX512F-NEXT: vmovups 16(%eax), %zmm0 93; X32-AVX512F-NEXT: retl 94 %ptr0 = getelementptr inbounds double, double* %ptr, i64 2 95 %ptr1 = getelementptr inbounds double, double* %ptr, i64 3 96 %ptr7 = getelementptr inbounds double, double* %ptr, i64 9 97 %val0 = load double, double* %ptr0 98 %val1 = load double, double* %ptr1 99 %val7 = load double, double* %ptr7 100 %res0 = insertelement <8 x double> undef, double %val0, i32 0 101 %res1 = insertelement <8 x double> %res0, double %val1, i32 1 102 %res7 = insertelement <8 x double> %res1, double %val7, i32 7 103 ret <8 x double> %res7 104} 105 106define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp { 107; ALL-LABEL: merge_8f64_f64_12zzuuzz: 108; ALL: # %bb.0: 109; ALL-NEXT: vmovups 8(%rdi), %xmm0 110; ALL-NEXT: retq 111; 112; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz: 113; X32-AVX512F: # %bb.0: 114; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 115; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0 116; X32-AVX512F-NEXT: retl 117 %ptr0 = getelementptr inbounds double, double* %ptr, i64 1 118 %ptr1 = getelementptr inbounds double, double* %ptr, i64 2 119 %val0 = load double, double* %ptr0 120 %val1 = load double, double* %ptr1 121 %res0 = insertelement <8 x double> undef, double %val0, i32 0 122 %res1 = insertelement <8 x double> %res0, double %val1, i32 1 123 %res2 = insertelement <8 x double> %res1, double 0.0, i32 2 124 %res3 = insertelement <8 x double> %res2, double 0.0, i32 3 125 %res6 = insertelement <8 x double> %res3, double 0.0, i32 6 126 %res7 = insertelement <8 x double> %res6, double 0.0, i32 7 127 ret <8 x double> %res7 128} 129 130define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp { 131; AVX512F-LABEL: merge_8f64_f64_1u3u5zu8: 132; AVX512F: # %bb.0: 133; AVX512F-NEXT: movb $32, %al 134; AVX512F-NEXT: kmovw %eax, %k0 135; AVX512F-NEXT: knotw %k0, %k1 136; AVX512F-NEXT: vmovupd 8(%rdi), %zmm0 {%k1} {z} 137; AVX512F-NEXT: retq 138; 139; AVX512BW-LABEL: merge_8f64_f64_1u3u5zu8: 140; AVX512BW: # %bb.0: 141; AVX512BW-NEXT: movb $32, %al 142; AVX512BW-NEXT: kmovd %eax, %k0 143; AVX512BW-NEXT: knotw %k0, %k1 144; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm0 {%k1} {z} 145; AVX512BW-NEXT: retq 146; 147; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8: 148; X32-AVX512F: # %bb.0: 149; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 150; X32-AVX512F-NEXT: movb $32, %cl 151; X32-AVX512F-NEXT: kmovw %ecx, %k0 152; X32-AVX512F-NEXT: knotw %k0, %k1 153; X32-AVX512F-NEXT: vmovupd 8(%eax), %zmm0 {%k1} {z} 154; X32-AVX512F-NEXT: retl 155 %ptr0 = getelementptr inbounds double, double* %ptr, i64 1 156 %ptr2 = getelementptr inbounds double, double* %ptr, i64 3 157 %ptr4 = getelementptr inbounds double, double* %ptr, i64 5 158 %ptr7 = getelementptr inbounds double, double* %ptr, i64 8 159 %val0 = load double, double* %ptr0 160 %val2 = load double, double* %ptr2 161 %val4 = load double, double* %ptr4 162 %val7 = load double, double* %ptr7 163 %res0 = insertelement <8 x double> undef, double %val0, i32 0 164 %res2 = insertelement <8 x double> %res0, double %val2, i32 2 165 %res4 = insertelement <8 x double> %res2, double %val4, i32 4 166 %res5 = insertelement <8 x double> %res4, double 0.0, i32 5 167 %res7 = insertelement <8 x double> %res5, double %val7, i32 7 168 ret <8 x double> %res7 169} 170 171define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline ssp { 172; ALL-LABEL: merge_8i64_4i64_z3: 173; ALL: # %bb.0: 174; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0 175; ALL-NEXT: vinsertf64x4 $1, 96(%rdi), %zmm0, %zmm0 176; ALL-NEXT: retq 177; 178; X32-AVX512F-LABEL: merge_8i64_4i64_z3: 179; X32-AVX512F: # %bb.0: 180; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 181; X32-AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 182; X32-AVX512F-NEXT: vinsertf64x4 $1, 96(%eax), %zmm0, %zmm0 183; X32-AVX512F-NEXT: retl 184 %ptr1 = getelementptr inbounds <4 x i64>, <4 x i64>* %ptr, i64 3 185 %val1 = load <4 x i64>, <4 x i64>* %ptr1 186 %res = shufflevector <4 x i64> zeroinitializer, <4 x i64> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 187 ret <8 x i64> %res 188} 189 190define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp { 191; ALL-LABEL: merge_8i64_i64_56zz9uzz: 192; ALL: # %bb.0: 193; ALL-NEXT: vmovups 40(%rdi), %xmm0 194; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 195; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 196; ALL-NEXT: retq 197; 198; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz: 199; X32-AVX512F: # %bb.0: 200; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 201; X32-AVX512F-NEXT: vmovups 40(%eax), %xmm0 202; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 203; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 204; X32-AVX512F-NEXT: retl 205 %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 5 206 %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 6 207 %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 9 208 %val0 = load i64, i64* %ptr0 209 %val1 = load i64, i64* %ptr1 210 %val4 = load i64, i64* %ptr4 211 %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0 212 %res1 = insertelement <8 x i64> %res0, i64 %val1, i32 1 213 %res2 = insertelement <8 x i64> %res1, i64 0, i32 2 214 %res3 = insertelement <8 x i64> %res2, i64 0, i32 3 215 %res4 = insertelement <8 x i64> %res3, i64 %val4, i32 4 216 %res6 = insertelement <8 x i64> %res4, i64 0, i32 6 217 %res7 = insertelement <8 x i64> %res6, i64 0, i32 7 218 ret <8 x i64> %res7 219} 220 221define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp { 222; AVX512F-LABEL: merge_8i64_i64_1u3u5zu8: 223; AVX512F: # %bb.0: 224; AVX512F-NEXT: movb $32, %al 225; AVX512F-NEXT: kmovw %eax, %k0 226; AVX512F-NEXT: knotw %k0, %k1 227; AVX512F-NEXT: vmovdqu64 8(%rdi), %zmm0 {%k1} {z} 228; AVX512F-NEXT: retq 229; 230; AVX512BW-LABEL: merge_8i64_i64_1u3u5zu8: 231; AVX512BW: # %bb.0: 232; AVX512BW-NEXT: movb $32, %al 233; AVX512BW-NEXT: kmovd %eax, %k0 234; AVX512BW-NEXT: knotw %k0, %k1 235; AVX512BW-NEXT: vmovdqu64 8(%rdi), %zmm0 {%k1} {z} 236; AVX512BW-NEXT: retq 237; 238; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8: 239; X32-AVX512F: # %bb.0: 240; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 241; X32-AVX512F-NEXT: movb $32, %cl 242; X32-AVX512F-NEXT: kmovw %ecx, %k0 243; X32-AVX512F-NEXT: knotw %k0, %k1 244; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0 {%k1} {z} 245; X32-AVX512F-NEXT: retl 246 %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1 247 %ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3 248 %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 5 249 %ptr7 = getelementptr inbounds i64, i64* %ptr, i64 8 250 %val0 = load i64, i64* %ptr0 251 %val2 = load i64, i64* %ptr2 252 %val4 = load i64, i64* %ptr4 253 %val7 = load i64, i64* %ptr7 254 %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0 255 %res2 = insertelement <8 x i64> %res0, i64 %val2, i32 2 256 %res4 = insertelement <8 x i64> %res2, i64 %val4, i32 4 257 %res5 = insertelement <8 x i64> %res4, i64 0, i32 5 258 %res7 = insertelement <8 x i64> %res5, i64 %val7, i32 7 259 ret <8 x i64> %res7 260} 261 262define <16 x float> @merge_16f32_f32_89zzzuuuuuuuuuuuz(float* %ptr) nounwind uwtable noinline ssp { 263; ALL-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz: 264; ALL: # %bb.0: 265; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 266; ALL-NEXT: retq 267; 268; X32-AVX512F-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz: 269; X32-AVX512F: # %bb.0: 270; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 271; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 272; X32-AVX512F-NEXT: retl 273 %ptr0 = getelementptr inbounds float, float* %ptr, i64 8 274 %ptr1 = getelementptr inbounds float, float* %ptr, i64 9 275 %val0 = load float, float* %ptr0 276 %val1 = load float, float* %ptr1 277 %res0 = insertelement <16 x float> undef, float %val0, i32 0 278 %res1 = insertelement <16 x float> %res0, float %val1, i32 1 279 %res2 = insertelement <16 x float> %res1, float 0.0, i32 2 280 %res3 = insertelement <16 x float> %res2, float 0.0, i32 3 281 %res4 = insertelement <16 x float> %res3, float 0.0, i32 4 282 %resF = insertelement <16 x float> %res4, float 0.0, i32 15 283 ret <16 x float> %resF 284} 285 286define <16 x float> @merge_16f32_f32_45u7uuuuuuuuuuuu(float* %ptr) nounwind uwtable noinline ssp { 287; ALL-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu: 288; ALL: # %bb.0: 289; ALL-NEXT: vmovups 16(%rdi), %xmm0 290; ALL-NEXT: retq 291; 292; X32-AVX512F-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu: 293; X32-AVX512F: # %bb.0: 294; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 295; X32-AVX512F-NEXT: vmovups 16(%eax), %xmm0 296; X32-AVX512F-NEXT: retl 297 %ptr0 = getelementptr inbounds float, float* %ptr, i64 4 298 %ptr1 = getelementptr inbounds float, float* %ptr, i64 5 299 %ptr3 = getelementptr inbounds float, float* %ptr, i64 7 300 %val0 = load float, float* %ptr0 301 %val1 = load float, float* %ptr1 302 %val3 = load float, float* %ptr3 303 %res0 = insertelement <16 x float> undef, float %val0, i32 0 304 %res1 = insertelement <16 x float> %res0, float %val1, i32 1 305 %res3 = insertelement <16 x float> %res1, float %val3, i32 3 306 ret <16 x float> %res3 307} 308 309define <16 x float> @merge_16f32_f32_0uu3uuuuuuuuCuEF(float* %ptr) nounwind uwtable noinline ssp { 310; ALL-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF: 311; ALL: # %bb.0: 312; ALL-NEXT: vmovups (%rdi), %zmm0 313; ALL-NEXT: retq 314; 315; X32-AVX512F-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF: 316; X32-AVX512F: # %bb.0: 317; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 318; X32-AVX512F-NEXT: vmovups (%eax), %zmm0 319; X32-AVX512F-NEXT: retl 320 %ptr0 = getelementptr inbounds float, float* %ptr, i64 0 321 %ptr3 = getelementptr inbounds float, float* %ptr, i64 3 322 %ptrC = getelementptr inbounds float, float* %ptr, i64 12 323 %ptrE = getelementptr inbounds float, float* %ptr, i64 14 324 %ptrF = getelementptr inbounds float, float* %ptr, i64 15 325 %val0 = load float, float* %ptr0 326 %val3 = load float, float* %ptr3 327 %valC = load float, float* %ptrC 328 %valE = load float, float* %ptrE 329 %valF = load float, float* %ptrF 330 %res0 = insertelement <16 x float> undef, float %val0, i32 0 331 %res3 = insertelement <16 x float> %res0, float %val3, i32 3 332 %resC = insertelement <16 x float> %res3, float %valC, i32 12 333 %resE = insertelement <16 x float> %resC, float %valE, i32 14 334 %resF = insertelement <16 x float> %resE, float %valF, i32 15 335 ret <16 x float> %resF 336} 337 338define <16 x float> @merge_16f32_f32_0uu3zzuuuuuzCuEF(float* %ptr) nounwind uwtable noinline ssp { 339; ALL-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF: 340; ALL: # %bb.0: 341; ALL-NEXT: vmovups (%rdi), %zmm1 342; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2 343; ALL-NEXT: vmovaps {{.*#+}} zmm0 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15> 344; ALL-NEXT: vpermi2ps %zmm2, %zmm1, %zmm0 345; ALL-NEXT: retq 346; 347; X32-AVX512F-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF: 348; X32-AVX512F: # %bb.0: 349; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 350; X32-AVX512F-NEXT: vmovups (%eax), %zmm1 351; X32-AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2 352; X32-AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15> 353; X32-AVX512F-NEXT: vpermi2ps %zmm2, %zmm1, %zmm0 354; X32-AVX512F-NEXT: retl 355 %ptr0 = getelementptr inbounds float, float* %ptr, i64 0 356 %ptr3 = getelementptr inbounds float, float* %ptr, i64 3 357 %ptrC = getelementptr inbounds float, float* %ptr, i64 12 358 %ptrE = getelementptr inbounds float, float* %ptr, i64 14 359 %ptrF = getelementptr inbounds float, float* %ptr, i64 15 360 %val0 = load float, float* %ptr0 361 %val3 = load float, float* %ptr3 362 %valC = load float, float* %ptrC 363 %valE = load float, float* %ptrE 364 %valF = load float, float* %ptrF 365 %res0 = insertelement <16 x float> undef, float %val0, i32 0 366 %res3 = insertelement <16 x float> %res0, float %val3, i32 3 367 %res4 = insertelement <16 x float> %res3, float 0.0, i32 4 368 %res5 = insertelement <16 x float> %res4, float 0.0, i32 5 369 %resC = insertelement <16 x float> %res5, float %valC, i32 12 370 %resD = insertelement <16 x float> %resC, float 0.0, i32 13 371 %resE = insertelement <16 x float> %resD, float %valE, i32 14 372 %resF = insertelement <16 x float> %resE, float %valF, i32 15 373 ret <16 x float> %resF 374} 375 376define <16 x i32> @merge_16i32_i32_12zzzuuuuuuuuuuuz(i32* %ptr) nounwind uwtable noinline ssp { 377; ALL-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz: 378; ALL: # %bb.0: 379; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 380; ALL-NEXT: retq 381; 382; X32-AVX512F-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz: 383; X32-AVX512F: # %bb.0: 384; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 385; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 386; X32-AVX512F-NEXT: retl 387 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 1 388 %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 2 389 %val0 = load i32, i32* %ptr0 390 %val1 = load i32, i32* %ptr1 391 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0 392 %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1 393 %res2 = insertelement <16 x i32> %res1, i32 0, i32 2 394 %res3 = insertelement <16 x i32> %res2, i32 0, i32 3 395 %res4 = insertelement <16 x i32> %res3, i32 0, i32 4 396 %resF = insertelement <16 x i32> %res4, i32 0, i32 15 397 ret <16 x i32> %resF 398} 399 400define <16 x i32> @merge_16i32_i32_23u5uuuuuuuuuuuu(i32* %ptr) nounwind uwtable noinline ssp { 401; ALL-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu: 402; ALL: # %bb.0: 403; ALL-NEXT: vmovups 8(%rdi), %xmm0 404; ALL-NEXT: retq 405; 406; X32-AVX512F-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu: 407; X32-AVX512F: # %bb.0: 408; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 409; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0 410; X32-AVX512F-NEXT: retl 411 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 2 412 %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 3 413 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 5 414 %val0 = load i32, i32* %ptr0 415 %val1 = load i32, i32* %ptr1 416 %val3 = load i32, i32* %ptr3 417 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0 418 %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1 419 %res3 = insertelement <16 x i32> %res1, i32 %val3, i32 3 420 ret <16 x i32> %res3 421} 422 423define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF(i32* %ptr) nounwind uwtable noinline ssp { 424; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF: 425; ALL: # %bb.0: 426; ALL-NEXT: vmovups (%rdi), %zmm0 427; ALL-NEXT: retq 428; 429; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF: 430; X32-AVX512F: # %bb.0: 431; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 432; X32-AVX512F-NEXT: vmovups (%eax), %zmm0 433; X32-AVX512F-NEXT: retl 434 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0 435 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3 436 %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12 437 %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14 438 %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15 439 %val0 = load i32, i32* %ptr0 440 %val3 = load i32, i32* %ptr3 441 %valC = load i32, i32* %ptrC 442 %valE = load i32, i32* %ptrE 443 %valF = load i32, i32* %ptrF 444 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0 445 %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3 446 %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12 447 %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14 448 %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15 449 ret <16 x i32> %resF 450} 451 452define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp { 453; AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: 454; AVX512F: # %bb.0: 455; AVX512F-NEXT: movw $8240, %ax # imm = 0x2030 456; AVX512F-NEXT: kmovw %eax, %k0 457; AVX512F-NEXT: knotw %k0, %k1 458; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} 459; AVX512F-NEXT: retq 460; 461; AVX512BW-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: 462; AVX512BW: # %bb.0: 463; AVX512BW-NEXT: movw $8240, %ax # imm = 0x2030 464; AVX512BW-NEXT: kmovd %eax, %k0 465; AVX512BW-NEXT: knotw %k0, %k1 466; AVX512BW-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} 467; AVX512BW-NEXT: retq 468; 469; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: 470; X32-AVX512F: # %bb.0: 471; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 472; X32-AVX512F-NEXT: movw $8240, %cx # imm = 0x2030 473; X32-AVX512F-NEXT: kmovw %ecx, %k0 474; X32-AVX512F-NEXT: knotw %k0, %k1 475; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm0 {%k1} {z} 476; X32-AVX512F-NEXT: retl 477 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0 478 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3 479 %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12 480 %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14 481 %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15 482 %val0 = load i32, i32* %ptr0 483 %val3 = load i32, i32* %ptr3 484 %valC = load i32, i32* %ptrC 485 %valE = load i32, i32* %ptrE 486 %valF = load i32, i32* %ptrF 487 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0 488 %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3 489 %res4 = insertelement <16 x i32> %res3, i32 0, i32 4 490 %res5 = insertelement <16 x i32> %res4, i32 0, i32 5 491 %resC = insertelement <16 x i32> %res5, i32 %valC, i32 12 492 %resD = insertelement <16 x i32> %resC, i32 0, i32 13 493 %resE = insertelement <16 x i32> %resD, i32 %valE, i32 14 494 %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15 495 ret <16 x i32> %resF 496} 497 498define <32 x i16> @merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz(i16* %ptr) nounwind uwtable noinline ssp { 499; AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz: 500; AVX512F: # %bb.0: 501; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 502; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 503; AVX512F-NEXT: retq 504; 505; AVX512BW-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz: 506; AVX512BW: # %bb.0: 507; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 508; AVX512BW-NEXT: retq 509; 510; X32-AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz: 511; X32-AVX512F: # %bb.0: 512; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 513; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 514; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 515; X32-AVX512F-NEXT: retl 516 %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 1 517 %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 2 518 %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 4 519 %val0 = load i16, i16* %ptr0 520 %val1 = load i16, i16* %ptr1 521 %val3 = load i16, i16* %ptr3 522 %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0 523 %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1 524 %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3 525 %res30 = insertelement <32 x i16> %res3, i16 0, i16 30 526 %res31 = insertelement <32 x i16> %res30, i16 0, i16 31 527 ret <32 x i16> %res31 528} 529 530define <32 x i16> @merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp { 531; ALL-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu: 532; ALL: # %bb.0: 533; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 534; ALL-NEXT: retq 535; 536; X32-AVX512F-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu: 537; X32-AVX512F: # %bb.0: 538; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 539; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 540; X32-AVX512F-NEXT: retl 541 %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 4 542 %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 5 543 %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 7 544 %val0 = load i16, i16* %ptr0 545 %val1 = load i16, i16* %ptr1 546 %val3 = load i16, i16* %ptr3 547 %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0 548 %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1 549 %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3 550 ret <32 x i16> %res3 551} 552 553define <32 x i16> @merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp { 554; AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu: 555; AVX512F: # %bb.0: 556; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 557; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 558; AVX512F-NEXT: retq 559; 560; AVX512BW-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu: 561; AVX512BW: # %bb.0: 562; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 563; AVX512BW-NEXT: retq 564; 565; X32-AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu: 566; X32-AVX512F: # %bb.0: 567; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 568; X32-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 569; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 570; X32-AVX512F-NEXT: retl 571 %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 2 572 %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 3 573 %val0 = load i16, i16* %ptr0 574 %val1 = load i16, i16* %ptr1 575 %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0 576 %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1 577 %res3 = insertelement <32 x i16> %res1, i16 0, i16 3 578 %resE = insertelement <32 x i16> %res3, i16 0, i16 14 579 %resF = insertelement <32 x i16> %resE, i16 0, i16 15 580 %resG = insertelement <32 x i16> %resF, i16 0, i16 16 581 %resH = insertelement <32 x i16> %resG, i16 0, i16 17 582 ret <32 x i16> %resH 583} 584 585define <64 x i8> @merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp { 586; AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 587; AVX512F: # %bb.0: 588; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 589; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 590; AVX512F-NEXT: retq 591; 592; AVX512BW-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 593; AVX512BW: # %bb.0: 594; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 595; AVX512BW-NEXT: retq 596; 597; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 598; X32-AVX512F: # %bb.0: 599; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 600; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 601; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 602; X32-AVX512F-NEXT: retl 603 %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1 604 %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2 605 %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4 606 %ptr7 = getelementptr inbounds i8, i8* %ptr, i64 8 607 %val0 = load i8, i8* %ptr0 608 %val1 = load i8, i8* %ptr1 609 %val3 = load i8, i8* %ptr3 610 %val7 = load i8, i8* %ptr7 611 %res0 = insertelement <64 x i8> undef, i8 %val0, i8 0 612 %res1 = insertelement <64 x i8> %res0, i8 %val1, i8 1 613 %res3 = insertelement <64 x i8> %res1, i8 %val3, i8 3 614 %res7 = insertelement <64 x i8> %res3, i8 %val7, i8 7 615 %res14 = insertelement <64 x i8> %res7, i8 0, i8 14 616 %res15 = insertelement <64 x i8> %res14, i8 0, i8 15 617 %res16 = insertelement <64 x i8> %res15, i8 0, i8 16 618 %res17 = insertelement <64 x i8> %res16, i8 0, i8 17 619 %res63 = insertelement <64 x i8> %res17, i8 0, i8 63 620 ret <64 x i8> %res63 621} 622 623define <64 x i8> @merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp { 624; AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 625; AVX512F: # %bb.0: 626; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 627; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 628; AVX512F-NEXT: retq 629; 630; AVX512BW-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 631; AVX512BW: # %bb.0: 632; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 633; AVX512BW-NEXT: retq 634; 635; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 636; X32-AVX512F: # %bb.0: 637; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 638; X32-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 639; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 640; X32-AVX512F-NEXT: retl 641 %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1 642 %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2 643 %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4 644 %val0 = load i8, i8* %ptr0 645 %val1 = load i8, i8* %ptr1 646 %val3 = load i8, i8* %ptr3 647 %res0 = insertelement <64 x i8> undef, i8 %val0, i8 0 648 %res1 = insertelement <64 x i8> %res0, i8 %val1, i8 1 649 %res3 = insertelement <64 x i8> %res1, i8 %val3, i8 3 650 %res14 = insertelement <64 x i8> %res3, i8 0, i8 14 651 %res15 = insertelement <64 x i8> %res14, i8 0, i8 15 652 %res16 = insertelement <64 x i8> %res15, i8 0, i8 16 653 %res17 = insertelement <64 x i8> %res16, i8 0, i8 17 654 %res63 = insertelement <64 x i8> %res17, i8 0, i8 63 655 ret <64 x i8> %res63 656} 657 658; 659; consecutive loads including any/all volatiles may not be combined 660; 661 662define <8 x double> @merge_8f64_f64_23uuuuu9_volatile(double* %ptr) nounwind uwtable noinline ssp { 663; ALL-LABEL: merge_8f64_f64_23uuuuu9_volatile: 664; ALL: # %bb.0: 665; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 666; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 667; ALL-NEXT: vbroadcastsd 72(%rdi), %ymm1 668; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 669; ALL-NEXT: retq 670; 671; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9_volatile: 672; X32-AVX512F: # %bb.0: 673; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 674; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 675; X32-AVX512F-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 676; X32-AVX512F-NEXT: vbroadcastsd 72(%eax), %ymm1 677; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 678; X32-AVX512F-NEXT: retl 679 %ptr0 = getelementptr inbounds double, double* %ptr, i64 2 680 %ptr1 = getelementptr inbounds double, double* %ptr, i64 3 681 %ptr7 = getelementptr inbounds double, double* %ptr, i64 9 682 %val0 = load volatile double, double* %ptr0 683 %val1 = load double, double* %ptr1 684 %val7 = load double, double* %ptr7 685 %res0 = insertelement <8 x double> undef, double %val0, i32 0 686 %res1 = insertelement <8 x double> %res0, double %val1, i32 1 687 %res7 = insertelement <8 x double> %res1, double %val7, i32 7 688 ret <8 x double> %res7 689} 690 691define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile(i32* %ptr) nounwind uwtable noinline ssp { 692; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile: 693; ALL: # %bb.0: 694; ALL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 695; ALL-NEXT: vpinsrd $3, 12(%rdi), %xmm0, %xmm0 696; ALL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 697; ALL-NEXT: vpinsrd $2, 56(%rdi), %xmm1, %xmm1 698; ALL-NEXT: vpinsrd $3, 60(%rdi), %xmm1, %xmm1 699; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 700; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 701; ALL-NEXT: retq 702; 703; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile: 704; X32-AVX512F: # %bb.0: 705; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 706; X32-AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 707; X32-AVX512F-NEXT: vpinsrd $3, 12(%eax), %xmm0, %xmm0 708; X32-AVX512F-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 709; X32-AVX512F-NEXT: vpinsrd $2, 56(%eax), %xmm1, %xmm1 710; X32-AVX512F-NEXT: vpinsrd $3, 60(%eax), %xmm1, %xmm1 711; X32-AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 712; X32-AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 713; X32-AVX512F-NEXT: retl 714 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0 715 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3 716 %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12 717 %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14 718 %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15 719 %val0 = load volatile i32, i32* %ptr0 720 %val3 = load volatile i32, i32* %ptr3 721 %valC = load volatile i32, i32* %ptrC 722 %valE = load volatile i32, i32* %ptrE 723 %valF = load volatile i32, i32* %ptrF 724 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0 725 %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3 726 %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12 727 %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14 728 %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15 729 ret <16 x i32> %resF 730} 731