1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s 3 4define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp { 5; CHECK-LABEL: BB16: 6; CHECK: ## BB#0: ## %entry 7; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 8; CHECK-NEXT: retq 9entry: 10 %q = load i8, i8* %ptr, align 4 11 %q0 = insertelement <16 x i8> undef, i8 %q, i32 0 12 %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1 13 %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2 14 %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3 15 %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4 16 %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5 17 %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6 18 %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7 19 %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8 20 %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9 21 %qa = insertelement <16 x i8> %q9, i8 %q, i32 10 22 %qb = insertelement <16 x i8> %qa, i8 %q, i32 11 23 %qc = insertelement <16 x i8> %qb, i8 %q, i32 12 24 %qd = insertelement <16 x i8> %qc, i8 %q, i32 13 25 %qe = insertelement <16 x i8> %qd, i8 %q, i32 14 26 %qf = insertelement <16 x i8> %qe, i8 %q, i32 15 27 ret <16 x i8> %qf 28} 29 30define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp { 31; CHECK-LABEL: BB32: 32; CHECK: ## BB#0: ## %entry 33; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 34; CHECK-NEXT: retq 35entry: 36 %q = load i8, i8* %ptr, align 4 37 %q0 = insertelement <32 x i8> undef, i8 %q, i32 0 38 %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1 39 %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2 40 %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3 41 %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4 42 %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5 43 %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6 44 %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7 45 %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8 46 %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9 47 %qa = insertelement <32 x i8> %q9, i8 %q, i32 10 48 %qb = insertelement <32 x i8> %qa, i8 %q, i32 11 49 %qc = insertelement <32 x i8> %qb, i8 %q, i32 12 50 %qd = insertelement <32 x i8> %qc, i8 %q, i32 13 51 %qe = insertelement <32 x i8> %qd, i8 %q, i32 14 52 %qf = insertelement <32 x i8> %qe, i8 %q, i32 15 53 54 %q20 = insertelement <32 x i8> %qf, i8 %q, i32 16 55 %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17 56 %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18 57 %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19 58 %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20 59 %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21 60 %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22 61 %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23 62 %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24 63 %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25 64 %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26 65 %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27 66 %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28 67 %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29 68 %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30 69 %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31 70 ret <32 x i8> %q2f 71} 72 73define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp { 74; CHECK-LABEL: W16: 75; CHECK: ## BB#0: ## %entry 76; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 77; CHECK-NEXT: retq 78entry: 79 %q = load i16, i16* %ptr, align 4 80 %q0 = insertelement <8 x i16> undef, i16 %q, i32 0 81 %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1 82 %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2 83 %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3 84 %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4 85 %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5 86 %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6 87 %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7 88 ret <8 x i16> %q7 89} 90 91define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp { 92; CHECK-LABEL: WW16: 93; CHECK: ## BB#0: ## %entry 94; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 95; CHECK-NEXT: retq 96entry: 97 %q = load i16, i16* %ptr, align 4 98 %q0 = insertelement <16 x i16> undef, i16 %q, i32 0 99 %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1 100 %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2 101 %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3 102 %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4 103 %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5 104 %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6 105 %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7 106 %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8 107 %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9 108 %qa = insertelement <16 x i16> %q9, i16 %q, i32 10 109 %qb = insertelement <16 x i16> %qa, i16 %q, i32 11 110 %qc = insertelement <16 x i16> %qb, i16 %q, i32 12 111 %qd = insertelement <16 x i16> %qc, i16 %q, i32 13 112 %qe = insertelement <16 x i16> %qd, i16 %q, i32 14 113 %qf = insertelement <16 x i16> %qe, i16 %q, i32 15 114 ret <16 x i16> %qf 115} 116 117define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp { 118; CHECK-LABEL: D32: 119; CHECK: ## BB#0: ## %entry 120; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 121; CHECK-NEXT: retq 122entry: 123 %q = load i32, i32* %ptr, align 4 124 %q0 = insertelement <4 x i32> undef, i32 %q, i32 0 125 %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1 126 %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2 127 %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3 128 ret <4 x i32> %q3 129} 130 131define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp { 132; CHECK-LABEL: DD32: 133; CHECK: ## BB#0: ## %entry 134; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 135; CHECK-NEXT: retq 136entry: 137 %q = load i32, i32* %ptr, align 4 138 %q0 = insertelement <8 x i32> undef, i32 %q, i32 0 139 %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1 140 %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2 141 %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3 142 %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4 143 %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5 144 %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6 145 %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7 146 ret <8 x i32> %q7 147} 148 149define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp { 150; CHECK-LABEL: Q64: 151; CHECK: ## BB#0: ## %entry 152; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0 153; CHECK-NEXT: retq 154entry: 155 %q = load i64, i64* %ptr, align 4 156 %q0 = insertelement <2 x i64> undef, i64 %q, i32 0 157 %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1 158 ret <2 x i64> %q1 159} 160 161define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp { 162; CHECK-LABEL: QQ64: 163; CHECK: ## BB#0: ## %entry 164; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 165; CHECK-NEXT: retq 166entry: 167 %q = load i64, i64* %ptr, align 4 168 %q0 = insertelement <4 x i64> undef, i64 %q, i32 0 169 %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1 170 %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2 171 %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3 172 ret <4 x i64> %q3 173} 174 175; FIXME: Pointer adjusted broadcasts 176 177define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp { 178; CHECK-LABEL: load_splat_16i8_16i8_1111111111111111: 179; CHECK: ## BB#0: ## %entry 180; CHECK-NEXT: vpbroadcastb 1(%rdi), %xmm0 181; CHECK-NEXT: retq 182entry: 183 %ld = load <16 x i8>, <16 x i8>* %ptr 184 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 185 ret <16 x i8> %ret 186} 187 188define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp { 189; CHECK-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111: 190; CHECK: ## BB#0: ## %entry 191; CHECK-NEXT: vpbroadcastb 1(%rdi), %ymm0 192; CHECK-NEXT: retq 193entry: 194 %ld = load <16 x i8>, <16 x i8>* %ptr 195 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 196 ret <32 x i8> %ret 197} 198 199define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp { 200; CHECK-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111: 201; CHECK: ## BB#0: ## %entry 202; CHECK-NEXT: vpbroadcastb 1(%rdi), %ymm0 203; CHECK-NEXT: retq 204entry: 205 %ld = load <32 x i8>, <32 x i8>* %ptr 206 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 207 ret <32 x i8> %ret 208} 209 210define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp { 211; CHECK-LABEL: load_splat_8i16_8i16_11111111: 212; CHECK: ## BB#0: ## %entry 213; CHECK-NEXT: vpbroadcastw 2(%rdi), %xmm0 214; CHECK-NEXT: retq 215entry: 216 %ld = load <8 x i16>, <8 x i16>* %ptr 217 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 218 ret <8 x i16> %ret 219} 220 221define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp { 222; CHECK-LABEL: load_splat_16i16_8i16_1111111111111111: 223; CHECK: ## BB#0: ## %entry 224; CHECK-NEXT: vpbroadcastw 2(%rdi), %ymm0 225; CHECK-NEXT: retq 226entry: 227 %ld = load <8 x i16>, <8 x i16>* %ptr 228 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 229 ret <16 x i16> %ret 230} 231 232define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp { 233; CHECK-LABEL: load_splat_16i16_16i16_1111111111111111: 234; CHECK: ## BB#0: ## %entry 235; CHECK-NEXT: vpbroadcastw 2(%rdi), %ymm0 236; CHECK-NEXT: retq 237entry: 238 %ld = load <16 x i16>, <16 x i16>* %ptr 239 %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 240 ret <16 x i16> %ret 241} 242 243define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 244; CHECK-LABEL: load_splat_4i32_4i32_1111: 245; CHECK: ## BB#0: ## %entry 246; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0 247; CHECK-NEXT: retq 248entry: 249 %ld = load <4 x i32>, <4 x i32>* %ptr 250 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 251 ret <4 x i32> %ret 252} 253 254define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 255; CHECK-LABEL: load_splat_8i32_4i32_33333333: 256; CHECK: ## BB#0: ## %entry 257; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0 258; CHECK-NEXT: retq 259entry: 260 %ld = load <4 x i32>, <4 x i32>* %ptr 261 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 262 ret <8 x i32> %ret 263} 264 265define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp { 266; CHECK-LABEL: load_splat_8i32_8i32_55555555: 267; CHECK: ## BB#0: ## %entry 268; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0 269; CHECK-NEXT: retq 270entry: 271 %ld = load <8 x i32>, <8 x i32>* %ptr 272 %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 273 ret <8 x i32> %ret 274} 275 276define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp { 277; CHECK-LABEL: load_splat_4f32_4f32_1111: 278; CHECK: ## BB#0: ## %entry 279; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0 280; CHECK-NEXT: retq 281entry: 282 %ld = load <4 x float>, <4 x float>* %ptr 283 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 284 ret <4 x float> %ret 285} 286 287define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp { 288; CHECK-LABEL: load_splat_8f32_4f32_33333333: 289; CHECK: ## BB#0: ## %entry 290; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0 291; CHECK-NEXT: retq 292entry: 293 %ld = load <4 x float>, <4 x float>* %ptr 294 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 295 ret <8 x float> %ret 296} 297 298define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp { 299; CHECK-LABEL: load_splat_8f32_8f32_55555555: 300; CHECK: ## BB#0: ## %entry 301; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0 302; CHECK-NEXT: retq 303entry: 304 %ld = load <8 x float>, <8 x float>* %ptr 305 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 306 ret <8 x float> %ret 307} 308 309define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { 310; CHECK-LABEL: load_splat_2i64_2i64_1111: 311; CHECK: ## BB#0: ## %entry 312; CHECK-NEXT: vpbroadcastq 8(%rdi), %xmm0 313; CHECK-NEXT: retq 314entry: 315 %ld = load <2 x i64>, <2 x i64>* %ptr 316 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 317 ret <2 x i64> %ret 318} 319 320define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { 321; CHECK-LABEL: load_splat_4i64_2i64_1111: 322; CHECK: ## BB#0: ## %entry 323; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0 324; CHECK-NEXT: retq 325entry: 326 %ld = load <2 x i64>, <2 x i64>* %ptr 327 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 328 ret <4 x i64> %ret 329} 330 331define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp { 332; CHECK-LABEL: load_splat_4i64_4i64_2222: 333; CHECK: ## BB#0: ## %entry 334; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0 335; CHECK-NEXT: retq 336entry: 337 %ld = load <4 x i64>, <4 x i64>* %ptr 338 %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 339 ret <4 x i64> %ret 340} 341 342define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { 343; CHECK-LABEL: load_splat_2f64_2f64_1111: 344; CHECK: ## BB#0: ## %entry 345; CHECK-NEXT: vmovaps (%rdi), %xmm0 346; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] 347; CHECK-NEXT: retq 348entry: 349 %ld = load <2 x double>, <2 x double>* %ptr 350 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1> 351 ret <2 x double> %ret 352} 353 354define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { 355; CHECK-LABEL: load_splat_4f64_2f64_1111: 356; CHECK: ## BB#0: ## %entry 357; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0 358; CHECK-NEXT: retq 359entry: 360 %ld = load <2 x double>, <2 x double>* %ptr 361 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 362 ret <4 x double> %ret 363} 364 365define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp { 366; CHECK-LABEL: load_splat_4f64_4f64_2222: 367; CHECK: ## BB#0: ## %entry 368; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0 369; CHECK-NEXT: retq 370entry: 371 %ld = load <4 x double>, <4 x double>* %ptr 372 %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 373 ret <4 x double> %ret 374} 375 376; make sure that we still don't support broadcast double into 128-bit vector 377; this used to crash 378define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp { 379; CHECK-LABEL: I: 380; CHECK: ## BB#0: ## %entry 381; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 382; CHECK-NEXT: retq 383entry: 384 %q = load double, double* %ptr, align 4 385 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 386 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 387 ret <2 x double> %vecinit2.i 388} 389 390define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp { 391; CHECK-LABEL: V111: 392; CHECK: ## BB#0: ## %entry 393; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 394; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 395; CHECK-NEXT: retq 396entry: 397 %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 398 ret <8 x i32> %g 399} 400 401define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp { 402; CHECK-LABEL: V113: 403; CHECK: ## BB#0: ## %entry 404; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 405; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 406; CHECK-NEXT: retq 407entry: 408 %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000> 409 ret <8 x float> %g 410} 411 412define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp { 413; CHECK-LABEL: _e2: 414; CHECK: ## BB#0: 415; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %xmm0 416; CHECK-NEXT: retq 417 %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0 418 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1 419 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2 420 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3 421 ret <4 x float> %vecinit6.i 422} 423 424define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp { 425; CHECK-LABEL: _e4: 426; CHECK: ## BB#0: 427; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52] 428; CHECK-NEXT: retq 429 %vecinit0.i = insertelement <8 x i8> undef, i8 52, i32 0 430 %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1 431 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2 432 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3 433 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4 434 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5 435 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6 436 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7 437 ret <8 x i8> %vecinit7.i 438} 439 440 441define void @crash() nounwind alwaysinline { 442; CHECK-LABEL: crash: 443; CHECK: ## BB#0: ## %WGLoopsEntry 444; CHECK-NEXT: xorl %eax, %eax 445; CHECK-NEXT: testb %al, %al 446; CHECK-NEXT: je LBB31_1 447; CHECK-NEXT: ## BB#2: ## %ret 448; CHECK-NEXT: retq 449; CHECK-NEXT: .align 4, 0x90 450; CHECK-NEXT: LBB31_1: ## %footer349VF 451; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 452; CHECK-NEXT: jmp LBB31_1 453WGLoopsEntry: 454 br i1 undef, label %ret, label %footer329VF 455 456footer329VF: 457 %A.0.inVF = fmul float undef, 6.553600e+04 458 %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04> 459 %A.0VF = fptosi float %A.0.inVF to i32 460 %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32> 461 %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> 462 %1 = and i32 %A.0VF, 65535 463 %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0 464 %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer 465 br i1 undef, label %preload1201VF, label %footer349VF 466 467preload1201VF: 468 br label %footer349VF 469 470footer349VF: 471 %2 = mul nsw <8 x i32> undef, %0 472 %3 = mul nsw <8 x i32> undef, %vector1099VF 473 br label %footer329VF 474 475ret: 476 ret void 477} 478 479define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp { 480; CHECK-LABEL: _inreg0: 481; CHECK: ## BB#0: 482; CHECK-NEXT: vmovd %edi, %xmm0 483; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 484; CHECK-NEXT: retq 485 %in = insertelement <8 x i32> undef, i32 %scalar, i32 0 486 %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer 487 ret <8 x i32> %wide 488} 489 490define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp { 491; CHECK-LABEL: _inreg1: 492; CHECK: ## BB#0: 493; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 494; CHECK-NEXT: retq 495 %in = insertelement <8 x float> undef, float %scalar, i32 0 496 %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer 497 ret <8 x float> %wide 498} 499 500define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp { 501; CHECK-LABEL: _inreg2: 502; CHECK: ## BB#0: 503; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 504; CHECK-NEXT: retq 505 %in = insertelement <4 x float> undef, float %scalar, i32 0 506 %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer 507 ret <4 x float> %wide 508} 509 510define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp { 511; CHECK-LABEL: _inreg3: 512; CHECK: ## BB#0: 513; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 514; CHECK-NEXT: retq 515 %in = insertelement <4 x double> undef, double %scalar, i32 0 516 %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer 517 ret <4 x double> %wide 518} 519 520define <8 x float> @_inreg8xfloat(<8 x float> %a) { 521; CHECK-LABEL: _inreg8xfloat: 522; CHECK: ## BB#0: 523; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 524; CHECK-NEXT: retq 525 %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer 526 ret <8 x float> %b 527} 528 529define <4 x float> @_inreg4xfloat(<4 x float> %a) { 530; CHECK-LABEL: _inreg4xfloat: 531; CHECK: ## BB#0: 532; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 533; CHECK-NEXT: retq 534 %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer 535 ret <4 x float> %b 536} 537 538define <16 x i16> @_inreg16xi16(<16 x i16> %a) { 539; CHECK-LABEL: _inreg16xi16: 540; CHECK: ## BB#0: 541; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 542; CHECK-NEXT: retq 543 %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer 544 ret <16 x i16> %b 545} 546 547define <8 x i16> @_inreg8xi16(<8 x i16> %a) { 548; CHECK-LABEL: _inreg8xi16: 549; CHECK: ## BB#0: 550; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 551; CHECK-NEXT: retq 552 %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer 553 ret <8 x i16> %b 554} 555 556define <4 x i64> @_inreg4xi64(<4 x i64> %a) { 557; CHECK-LABEL: _inreg4xi64: 558; CHECK: ## BB#0: 559; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 560; CHECK-NEXT: retq 561 %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer 562 ret <4 x i64> %b 563} 564 565define <2 x i64> @_inreg2xi64(<2 x i64> %a) { 566; CHECK-LABEL: _inreg2xi64: 567; CHECK: ## BB#0: 568; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 569; CHECK-NEXT: retq 570 %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer 571 ret <2 x i64> %b 572} 573 574define <4 x double> @_inreg4xdouble(<4 x double> %a) { 575; CHECK-LABEL: _inreg4xdouble: 576; CHECK: ## BB#0: 577; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 578; CHECK-NEXT: retq 579 %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer 580 ret <4 x double> %b 581} 582 583define <2 x double> @_inreg2xdouble(<2 x double> %a) { 584; CHECK-LABEL: _inreg2xdouble: 585; CHECK: ## BB#0: 586; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 587; CHECK-NEXT: retq 588 %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer 589 ret <2 x double> %b 590} 591 592define <8 x i32> @_inreg8xi32(<8 x i32> %a) { 593; CHECK-LABEL: _inreg8xi32: 594; CHECK: ## BB#0: 595; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 596; CHECK-NEXT: retq 597 %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer 598 ret <8 x i32> %b 599} 600 601define <4 x i32> @_inreg4xi32(<4 x i32> %a) { 602; CHECK-LABEL: _inreg4xi32: 603; CHECK: ## BB#0: 604; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 605; CHECK-NEXT: retq 606 %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer 607 ret <4 x i32> %b 608} 609 610define <32 x i8> @_inreg32xi8(<32 x i8> %a) { 611; CHECK-LABEL: _inreg32xi8: 612; CHECK: ## BB#0: 613; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 614; CHECK-NEXT: retq 615 %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer 616 ret <32 x i8> %b 617} 618 619define <16 x i8> @_inreg16xi8(<16 x i8> %a) { 620; CHECK-LABEL: _inreg16xi8: 621; CHECK: ## BB#0: 622; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 623; CHECK-NEXT: retq 624 %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer 625 ret <16 x i8> %b 626} 627 628; These tests check that a vbroadcast instruction is used when we have a splat 629; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs 630; (via the insertelements). 631 632define <8 x float> @splat_concat1(float %f) { 633; CHECK-LABEL: splat_concat1: 634; CHECK: ## BB#0: 635; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 636; CHECK-NEXT: retq 637 %1 = insertelement <4 x float> undef, float %f, i32 0 638 %2 = insertelement <4 x float> %1, float %f, i32 1 639 %3 = insertelement <4 x float> %2, float %f, i32 2 640 %4 = insertelement <4 x float> %3, float %f, i32 3 641 %5 = shufflevector <4 x float> %4, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 642 ret <8 x float> %5 643} 644 645define <8 x float> @splat_concat2(float %f) { 646; CHECK-LABEL: splat_concat2: 647; CHECK: ## BB#0: 648; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 649; CHECK-NEXT: retq 650 %1 = insertelement <4 x float> undef, float %f, i32 0 651 %2 = insertelement <4 x float> %1, float %f, i32 1 652 %3 = insertelement <4 x float> %2, float %f, i32 2 653 %4 = insertelement <4 x float> %3, float %f, i32 3 654 %5 = insertelement <4 x float> undef, float %f, i32 0 655 %6 = insertelement <4 x float> %5, float %f, i32 1 656 %7 = insertelement <4 x float> %6, float %f, i32 2 657 %8 = insertelement <4 x float> %7, float %f, i32 3 658 %9 = shufflevector <4 x float> %4, <4 x float> %8, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 659 ret <8 x float> %9 660} 661 662define <4 x double> @splat_concat3(double %d) { 663; CHECK-LABEL: splat_concat3: 664; CHECK: ## BB#0: 665; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 666; CHECK-NEXT: retq 667 %1 = insertelement <2 x double> undef, double %d, i32 0 668 %2 = insertelement <2 x double> %1, double %d, i32 1 669 %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 670 ret <4 x double> %3 671} 672 673define <4 x double> @splat_concat4(double %d) { 674; CHECK-LABEL: splat_concat4: 675; CHECK: ## BB#0: 676; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 677; CHECK-NEXT: retq 678 %1 = insertelement <2 x double> undef, double %d, i32 0 679 %2 = insertelement <2 x double> %1, double %d, i32 1 680 %3 = insertelement <2 x double> undef, double %d, i32 0 681 %4 = insertelement <2 x double> %3, double %d, i32 1 682 %5 = shufflevector <2 x double> %2, <2 x double> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 683 ret <4 x double> %5 684} 685 686; Test cases for <rdar://problem/16074331>. 687; Instruction selection for broacast instruction fails if 688; the load cannot be folded into the broadcast. 689; This happens if the load has initial one use but other uses are 690; created later, or if selection DAG cannot prove that folding the 691; load will not create a cycle in the DAG. 692; Those test cases exerce the latter. 693 694; CHECK-LABEL: isel_crash_16b 695; CHECK: vpbroadcastb {{[^,]+}}, %xmm{{[0-9]+}} 696; CHECK: ret 697define void @isel_crash_16b(i8* %cV_R.addr) { 698eintry: 699 %__a.addr.i = alloca <2 x i64>, align 16 700 %__b.addr.i = alloca <2 x i64>, align 16 701 %vCr = alloca <2 x i64>, align 16 702 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16 703 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16 704 %tmp2 = load i8, i8* %cV_R.addr, align 4 705 %splat.splatinsert = insertelement <16 x i8> undef, i8 %tmp2, i32 0 706 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer 707 %tmp3 = bitcast <16 x i8> %splat.splat to <2 x i64> 708 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16 709 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16 710 ret void 711} 712 713; CHECK-LABEL: isel_crash_32b 714; CHECK: vpbroadcastb {{[^,]+}}, %ymm{{[0-9]+}} 715; CHECK: ret 716define void @isel_crash_32b(i8* %cV_R.addr) { 717eintry: 718 %__a.addr.i = alloca <4 x i64>, align 16 719 %__b.addr.i = alloca <4 x i64>, align 16 720 %vCr = alloca <4 x i64>, align 16 721 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16 722 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16 723 %tmp2 = load i8, i8* %cV_R.addr, align 4 724 %splat.splatinsert = insertelement <32 x i8> undef, i8 %tmp2, i32 0 725 %splat.splat = shufflevector <32 x i8> %splat.splatinsert, <32 x i8> undef, <32 x i32> zeroinitializer 726 %tmp3 = bitcast <32 x i8> %splat.splat to <4 x i64> 727 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16 728 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16 729 ret void 730} 731 732; CHECK-LABEL: isel_crash_8w 733; CHECK: vpbroadcastw {{[^,]+}}, %xmm{{[0-9]+}} 734; CHECK: ret 735define void @isel_crash_8w(i16* %cV_R.addr) { 736entry: 737 %__a.addr.i = alloca <2 x i64>, align 16 738 %__b.addr.i = alloca <2 x i64>, align 16 739 %vCr = alloca <2 x i64>, align 16 740 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16 741 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16 742 %tmp2 = load i16, i16* %cV_R.addr, align 4 743 %splat.splatinsert = insertelement <8 x i16> undef, i16 %tmp2, i32 0 744 %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer 745 %tmp3 = bitcast <8 x i16> %splat.splat to <2 x i64> 746 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16 747 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16 748 ret void 749} 750 751; CHECK-LABEL: isel_crash_16w 752; CHECK: vpbroadcastw {{[^,]+}}, %ymm{{[0-9]+}} 753; CHECK: ret 754define void @isel_crash_16w(i16* %cV_R.addr) { 755eintry: 756 %__a.addr.i = alloca <4 x i64>, align 16 757 %__b.addr.i = alloca <4 x i64>, align 16 758 %vCr = alloca <4 x i64>, align 16 759 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16 760 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16 761 %tmp2 = load i16, i16* %cV_R.addr, align 4 762 %splat.splatinsert = insertelement <16 x i16> undef, i16 %tmp2, i32 0 763 %splat.splat = shufflevector <16 x i16> %splat.splatinsert, <16 x i16> undef, <16 x i32> zeroinitializer 764 %tmp3 = bitcast <16 x i16> %splat.splat to <4 x i64> 765 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16 766 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16 767 ret void 768} 769 770; CHECK-LABEL: isel_crash_4d 771; CHECK: vbroadcastss {{[^,]+}}, %xmm{{[0-9]+}} 772; CHECK: ret 773define void @isel_crash_4d(i32* %cV_R.addr) { 774entry: 775 %__a.addr.i = alloca <2 x i64>, align 16 776 %__b.addr.i = alloca <2 x i64>, align 16 777 %vCr = alloca <2 x i64>, align 16 778 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16 779 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16 780 %tmp2 = load i32, i32* %cV_R.addr, align 4 781 %splat.splatinsert = insertelement <4 x i32> undef, i32 %tmp2, i32 0 782 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 783 %tmp3 = bitcast <4 x i32> %splat.splat to <2 x i64> 784 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16 785 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16 786 ret void 787} 788 789; CHECK-LABEL: isel_crash_8d 790; CHECK: vbroadcastss {{[^,]+}}, %ymm{{[0-9]+}} 791; CHECK: ret 792define void @isel_crash_8d(i32* %cV_R.addr) { 793eintry: 794 %__a.addr.i = alloca <4 x i64>, align 16 795 %__b.addr.i = alloca <4 x i64>, align 16 796 %vCr = alloca <4 x i64>, align 16 797 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16 798 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16 799 %tmp2 = load i32, i32* %cV_R.addr, align 4 800 %splat.splatinsert = insertelement <8 x i32> undef, i32 %tmp2, i32 0 801 %splat.splat = shufflevector <8 x i32> %splat.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer 802 %tmp3 = bitcast <8 x i32> %splat.splat to <4 x i64> 803 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16 804 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16 805 ret void 806} 807 808; CHECK-LABEL: isel_crash_2q 809; CHECK: vpbroadcastq {{[^,]+}}, %xmm{{[0-9]+}} 810; CHECK: ret 811define void @isel_crash_2q(i64* %cV_R.addr) { 812entry: 813 %__a.addr.i = alloca <2 x i64>, align 16 814 %__b.addr.i = alloca <2 x i64>, align 16 815 %vCr = alloca <2 x i64>, align 16 816 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16 817 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16 818 %tmp2 = load i64, i64* %cV_R.addr, align 4 819 %splat.splatinsert = insertelement <2 x i64> undef, i64 %tmp2, i32 0 820 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer 821 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16 822 store <2 x i64> %splat.splat, <2 x i64>* %__b.addr.i, align 16 823 ret void 824} 825 826; CHECK-LABEL: isel_crash_4q 827; CHECK: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}} 828; CHECK: ret 829define void @isel_crash_4q(i64* %cV_R.addr) { 830eintry: 831 %__a.addr.i = alloca <4 x i64>, align 16 832 %__b.addr.i = alloca <4 x i64>, align 16 833 %vCr = alloca <4 x i64>, align 16 834 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16 835 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16 836 %tmp2 = load i64, i64* %cV_R.addr, align 4 837 %splat.splatinsert = insertelement <4 x i64> undef, i64 %tmp2, i32 0 838 %splat.splat = shufflevector <4 x i64> %splat.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer 839 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16 840 store <4 x i64> %splat.splat, <4 x i64>* %__b.addr.i, align 16 841 ret void 842} 843