1; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s 2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s 3 4%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 } 5%struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 } 6 7; CHECK-LABEL: merge_const_store: 8; save 1,2,3 ... as one big integer. 9; CHECK: movabsq $578437695752307201 10; CHECK: ret 11define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp { 12 %1 = icmp sgt i32 %count, 0 13 br i1 %1, label %.lr.ph, label %._crit_edge 14.lr.ph: 15 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 16 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] 17 %2 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0 18 store i8 1, i8* %2, align 1 19 %3 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1 20 store i8 2, i8* %3, align 1 21 %4 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 2 22 store i8 3, i8* %4, align 1 23 %5 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 3 24 store i8 4, i8* %5, align 1 25 %6 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 4 26 store i8 5, i8* %6, align 1 27 %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 5 28 store i8 6, i8* %7, align 1 29 %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 6 30 store i8 7, i8* %8, align 1 31 %9 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 7 32 store i8 8, i8* %9, align 1 33 %10 = add nsw i32 %i.02, 1 34 %11 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1 35 %exitcond = icmp eq i32 %10, %count 36 br i1 %exitcond, label %._crit_edge, label %.lr.ph 37._crit_edge: 38 ret void 39} 40 41; No vectors because we use noimplicitfloat 42; CHECK-LABEL: merge_const_store_no_vec: 43; CHECK-NOT: vmovups 44; CHECK: ret 45define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{ 46 %1 = icmp sgt i32 %count, 0 47 br i1 %1, label %.lr.ph, label %._crit_edge 48.lr.ph: 49 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 50 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ] 51 %2 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0 52 store i32 0, i32* %2, align 4 53 %3 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1 54 store i32 0, i32* %3, align 4 55 %4 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2 56 store i32 0, i32* %4, align 4 57 %5 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3 58 store i32 0, i32* %5, align 4 59 %6 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 4 60 store i32 0, i32* %6, align 4 61 %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 5 62 store i32 0, i32* %7, align 4 63 %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 6 64 store i32 0, i32* %8, align 4 65 %9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 7 66 store i32 0, i32* %9, align 4 67 %10 = add nsw i32 %i.02, 1 68 %11 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1 69 %exitcond = icmp eq i32 %10, %count 70 br i1 %exitcond, label %._crit_edge, label %.lr.ph 71._crit_edge: 72 ret void 73} 74 75; Move the constants using a single vector store. 76; CHECK-LABEL: merge_const_store_vec: 77; CHECK: vmovups 78; CHECK: ret 79define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp { 80 %1 = icmp sgt i32 %count, 0 81 br i1 %1, label %.lr.ph, label %._crit_edge 82.lr.ph: 83 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 84 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ] 85 %2 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0 86 store i32 0, i32* %2, align 4 87 %3 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1 88 store i32 0, i32* %3, align 4 89 %4 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2 90 store i32 0, i32* %4, align 4 91 %5 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3 92 store i32 0, i32* %5, align 4 93 %6 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 4 94 store i32 0, i32* %6, align 4 95 %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 5 96 store i32 0, i32* %7, align 4 97 %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 6 98 store i32 0, i32* %8, align 4 99 %9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 7 100 store i32 0, i32* %9, align 4 101 %10 = add nsw i32 %i.02, 1 102 %11 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1 103 %exitcond = icmp eq i32 %10, %count 104 br i1 %exitcond, label %._crit_edge, label %.lr.ph 105._crit_edge: 106 ret void 107} 108 109; Move the first 4 constants as a single vector. Move the rest as scalars. 110; CHECK-LABEL: merge_nonconst_store: 111; CHECK: movl $67305985 112; CHECK: movb 113; CHECK: movb 114; CHECK: movb 115; CHECK: movb 116; CHECK: ret 117define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp { 118 %1 = icmp sgt i32 %count, 0 119 br i1 %1, label %.lr.ph, label %._crit_edge 120.lr.ph: 121 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 122 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] 123 %2 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0 124 store i8 1, i8* %2, align 1 125 %3 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1 126 store i8 2, i8* %3, align 1 127 %4 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 2 128 store i8 3, i8* %4, align 1 129 %5 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 3 130 store i8 4, i8* %5, align 1 131 %6 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 4 132 store i8 %zz, i8* %6, align 1 ; <----------- Not a const; 133 %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 5 134 store i8 6, i8* %7, align 1 135 %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 6 136 store i8 7, i8* %8, align 1 137 %9 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 7 138 store i8 8, i8* %9, align 1 139 %10 = add nsw i32 %i.02, 1 140 %11 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1 141 %exitcond = icmp eq i32 %10, %count 142 br i1 %exitcond, label %._crit_edge, label %.lr.ph 143._crit_edge: 144 ret void 145} 146 147 148; CHECK-LABEL: merge_loads_i16: 149; load: 150; CHECK: movw 151; store: 152; CHECK: movw 153; CHECK: ret 154define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { 155 %1 = icmp sgt i32 %count, 0 156 br i1 %1, label %.lr.ph, label %._crit_edge 157 158.lr.ph: ; preds = %0 159 %2 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 0 160 %3 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 1 161 br label %4 162 163; <label>:4 ; preds = %4, %.lr.ph 164 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ] 165 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ] 166 %5 = load i8, i8* %2, align 1 167 %6 = load i8, i8* %3, align 1 168 %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0 169 store i8 %5, i8* %7, align 1 170 %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1 171 store i8 %6, i8* %8, align 1 172 %9 = add nsw i32 %i.02, 1 173 %10 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1 174 %exitcond = icmp eq i32 %9, %count 175 br i1 %exitcond, label %._crit_edge, label %4 176 177._crit_edge: ; preds = %4, %0 178 ret void 179} 180 181; The loads and the stores are interleaved. Can't merge them. 182; CHECK-LABEL: no_merge_loads: 183; CHECK: movb 184; CHECK: movb 185; CHECK: movb 186; CHECK: movb 187; CHECK: ret 188define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { 189 %1 = icmp sgt i32 %count, 0 190 br i1 %1, label %.lr.ph, label %._crit_edge 191 192.lr.ph: ; preds = %0 193 %2 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 0 194 %3 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 1 195 br label %a4 196 197a4: ; preds = %4, %.lr.ph 198 %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ] 199 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ] 200 %a5 = load i8, i8* %2, align 1 201 %a7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0 202 store i8 %a5, i8* %a7, align 1 203 %a8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1 204 %a6 = load i8, i8* %3, align 1 205 store i8 %a6, i8* %a8, align 1 206 %a9 = add nsw i32 %i.02, 1 207 %a10 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1 208 %exitcond = icmp eq i32 %a9, %count 209 br i1 %exitcond, label %._crit_edge, label %a4 210 211._crit_edge: ; preds = %4, %0 212 ret void 213} 214 215 216; CHECK-LABEL: merge_loads_integer: 217; load: 218; CHECK: movq 219; store: 220; CHECK: movq 221; CHECK: ret 222define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { 223 %1 = icmp sgt i32 %count, 0 224 br i1 %1, label %.lr.ph, label %._crit_edge 225 226.lr.ph: ; preds = %0 227 %2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0 228 %3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1 229 br label %4 230 231; <label>:4 ; preds = %4, %.lr.ph 232 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ] 233 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ] 234 %5 = load i32, i32* %2 235 %6 = load i32, i32* %3 236 %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0 237 store i32 %5, i32* %7 238 %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1 239 store i32 %6, i32* %8 240 %9 = add nsw i32 %i.02, 1 241 %10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1 242 %exitcond = icmp eq i32 %9, %count 243 br i1 %exitcond, label %._crit_edge, label %4 244 245._crit_edge: ; preds = %4, %0 246 ret void 247} 248 249 250; CHECK-LABEL: merge_loads_vector: 251; load: 252; CHECK: movups 253; store: 254; CHECK: movups 255; CHECK: ret 256define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { 257 %a1 = icmp sgt i32 %count, 0 258 br i1 %a1, label %.lr.ph, label %._crit_edge 259 260.lr.ph: ; preds = %0 261 %a2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0 262 %a3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1 263 %a4 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 2 264 %a5 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 3 265 br label %block4 266 267block4: ; preds = %4, %.lr.ph 268 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ] 269 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ] 270 %a7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0 271 %a8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1 272 %a9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2 273 %a10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3 274 %b1 = load i32, i32* %a2 275 %b2 = load i32, i32* %a3 276 %b3 = load i32, i32* %a4 277 %b4 = load i32, i32* %a5 278 store i32 %b1, i32* %a7 279 store i32 %b2, i32* %a8 280 store i32 %b3, i32* %a9 281 store i32 %b4, i32* %a10 282 %c9 = add nsw i32 %i.02, 1 283 %c10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1 284 %exitcond = icmp eq i32 %c9, %count 285 br i1 %exitcond, label %._crit_edge, label %block4 286 287._crit_edge: ; preds = %4, %0 288 ret void 289} 290 291;; On x86, even unaligned copies should be merged to vector ops. 292;; TODO: however, this cannot happen at the moment, due to brokenness 293;; in MergeConsecutiveStores. See UseAA FIXME in DAGCombiner.cpp 294;; visitSTORE. 295 296; CHECK-LABEL: merge_loads_no_align: 297; load: 298; CHECK-NOT: vmovups ;; TODO 299; store: 300; CHECK-NOT: vmovups ;; TODO 301; CHECK: ret 302define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { 303 %a1 = icmp sgt i32 %count, 0 304 br i1 %a1, label %.lr.ph, label %._crit_edge 305 306.lr.ph: ; preds = %0 307 %a2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0 308 %a3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1 309 %a4 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 2 310 %a5 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 3 311 br label %block4 312 313block4: ; preds = %4, %.lr.ph 314 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ] 315 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ] 316 %a7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0 317 %a8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1 318 %a9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2 319 %a10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3 320 %b1 = load i32, i32* %a2, align 1 321 %b2 = load i32, i32* %a3, align 1 322 %b3 = load i32, i32* %a4, align 1 323 %b4 = load i32, i32* %a5, align 1 324 store i32 %b1, i32* %a7, align 1 325 store i32 %b2, i32* %a8, align 1 326 store i32 %b3, i32* %a9, align 1 327 store i32 %b4, i32* %a10, align 1 328 %c9 = add nsw i32 %i.02, 1 329 %c10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1 330 %exitcond = icmp eq i32 %c9, %count 331 br i1 %exitcond, label %._crit_edge, label %block4 332 333._crit_edge: ; preds = %4, %0 334 ret void 335} 336 337; Make sure that we merge the consecutive load/store sequence below and use a 338; word (16 bit) instead of a byte copy. 339; CHECK-LABEL: MergeLoadStoreBaseIndexOffset: 340; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] 341; CHECK: movw [[REG]], (%{{.*}}) 342define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) { 343 br label %1 344 345; <label>:1 346 %.09 = phi i32 [ %n, %0 ], [ %11, %1 ] 347 %.08 = phi i8* [ %b, %0 ], [ %10, %1 ] 348 %.0 = phi i64* [ %a, %0 ], [ %2, %1 ] 349 %2 = getelementptr inbounds i64, i64* %.0, i64 1 350 %3 = load i64, i64* %.0, align 1 351 %4 = getelementptr inbounds i8, i8* %c, i64 %3 352 %5 = load i8, i8* %4, align 1 353 %6 = add i64 %3, 1 354 %7 = getelementptr inbounds i8, i8* %c, i64 %6 355 %8 = load i8, i8* %7, align 1 356 store i8 %5, i8* %.08, align 1 357 %9 = getelementptr inbounds i8, i8* %.08, i64 1 358 store i8 %8, i8* %9, align 1 359 %10 = getelementptr inbounds i8, i8* %.08, i64 2 360 %11 = add nsw i32 %.09, -1 361 %12 = icmp eq i32 %11, 0 362 br i1 %12, label %13, label %1 363 364; <label>:13 365 ret void 366} 367 368; Make sure that we merge the consecutive load/store sequence below and use a 369; word (16 bit) instead of a byte copy even if there are intermediate sign 370; extensions. 371; CHECK-LABEL: MergeLoadStoreBaseIndexOffsetSext: 372; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] 373; CHECK: movw [[REG]], (%{{.*}}) 374define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) { 375 br label %1 376 377; <label>:1 378 %.09 = phi i32 [ %n, %0 ], [ %12, %1 ] 379 %.08 = phi i8* [ %b, %0 ], [ %11, %1 ] 380 %.0 = phi i8* [ %a, %0 ], [ %2, %1 ] 381 %2 = getelementptr inbounds i8, i8* %.0, i64 1 382 %3 = load i8, i8* %.0, align 1 383 %4 = sext i8 %3 to i64 384 %5 = getelementptr inbounds i8, i8* %c, i64 %4 385 %6 = load i8, i8* %5, align 1 386 %7 = add i64 %4, 1 387 %8 = getelementptr inbounds i8, i8* %c, i64 %7 388 %9 = load i8, i8* %8, align 1 389 store i8 %6, i8* %.08, align 1 390 %10 = getelementptr inbounds i8, i8* %.08, i64 1 391 store i8 %9, i8* %10, align 1 392 %11 = getelementptr inbounds i8, i8* %.08, i64 2 393 %12 = add nsw i32 %.09, -1 394 %13 = icmp eq i32 %12, 0 395 br i1 %13, label %14, label %1 396 397; <label>:14 398 ret void 399} 400 401; However, we can only merge ignore sign extensions when they are on all memory 402; computations; 403; CHECK-LABEL: loadStoreBaseIndexOffsetSextNoSex: 404; CHECK-NOT: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] 405; CHECK-NOT: movw [[REG]], (%{{.*}}) 406define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) { 407 br label %1 408 409; <label>:1 410 %.09 = phi i32 [ %n, %0 ], [ %12, %1 ] 411 %.08 = phi i8* [ %b, %0 ], [ %11, %1 ] 412 %.0 = phi i8* [ %a, %0 ], [ %2, %1 ] 413 %2 = getelementptr inbounds i8, i8* %.0, i64 1 414 %3 = load i8, i8* %.0, align 1 415 %4 = sext i8 %3 to i64 416 %5 = getelementptr inbounds i8, i8* %c, i64 %4 417 %6 = load i8, i8* %5, align 1 418 %7 = add i8 %3, 1 419 %wrap.4 = sext i8 %7 to i64 420 %8 = getelementptr inbounds i8, i8* %c, i64 %wrap.4 421 %9 = load i8, i8* %8, align 1 422 store i8 %6, i8* %.08, align 1 423 %10 = getelementptr inbounds i8, i8* %.08, i64 1 424 store i8 %9, i8* %10, align 1 425 %11 = getelementptr inbounds i8, i8* %.08, i64 2 426 %12 = add nsw i32 %.09, -1 427 %13 = icmp eq i32 %12, 0 428 br i1 %13, label %14, label %1 429 430; <label>:14 431 ret void 432} 433 434; PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 ) 435define void @merge_vec_element_store(<8 x float> %v, float* %ptr) { 436 %vecext0 = extractelement <8 x float> %v, i32 0 437 %vecext1 = extractelement <8 x float> %v, i32 1 438 %vecext2 = extractelement <8 x float> %v, i32 2 439 %vecext3 = extractelement <8 x float> %v, i32 3 440 %vecext4 = extractelement <8 x float> %v, i32 4 441 %vecext5 = extractelement <8 x float> %v, i32 5 442 %vecext6 = extractelement <8 x float> %v, i32 6 443 %vecext7 = extractelement <8 x float> %v, i32 7 444 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 1 445 %arrayidx2 = getelementptr inbounds float, float* %ptr, i64 2 446 %arrayidx3 = getelementptr inbounds float, float* %ptr, i64 3 447 %arrayidx4 = getelementptr inbounds float, float* %ptr, i64 4 448 %arrayidx5 = getelementptr inbounds float, float* %ptr, i64 5 449 %arrayidx6 = getelementptr inbounds float, float* %ptr, i64 6 450 %arrayidx7 = getelementptr inbounds float, float* %ptr, i64 7 451 store float %vecext0, float* %ptr, align 4 452 store float %vecext1, float* %arrayidx1, align 4 453 store float %vecext2, float* %arrayidx2, align 4 454 store float %vecext3, float* %arrayidx3, align 4 455 store float %vecext4, float* %arrayidx4, align 4 456 store float %vecext5, float* %arrayidx5, align 4 457 store float %vecext6, float* %arrayidx6, align 4 458 store float %vecext7, float* %arrayidx7, align 4 459 ret void 460 461; CHECK-LABEL: merge_vec_element_store 462; CHECK: vmovups 463; CHECK-NEXT: vzeroupper 464; CHECK-NEXT: retq 465} 466 467; PR21711 - Merge vector stores into wider vector stores. 468; These should be merged into 32-byte stores. 469define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x float>* %ptr) { 470 %idx0 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 3 471 %idx1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4 472 %idx2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 5 473 %idx3 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 6 474 %shuffle0 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 475 %shuffle1 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 476 %shuffle2 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 477 %shuffle3 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 478 store <4 x float> %shuffle0, <4 x float>* %idx0, align 16 479 store <4 x float> %shuffle1, <4 x float>* %idx1, align 16 480 store <4 x float> %shuffle2, <4 x float>* %idx2, align 16 481 store <4 x float> %shuffle3, <4 x float>* %idx3, align 16 482 ret void 483 484; CHECK-LABEL: merge_vec_extract_stores 485; CHECK: vmovups %ymm0, 48(%rdi) 486; CHECK-NEXT: vmovups %ymm1, 80(%rdi) 487; CHECK-NEXT: vzeroupper 488; CHECK-NEXT: retq 489} 490 491; Merging vector stores when sourced from vector loads is not currently handled. 492define void @merge_vec_stores_from_loads(<4 x float>* %v, <4 x float>* %ptr) { 493 %load_idx0 = getelementptr inbounds <4 x float>, <4 x float>* %v, i64 0 494 %load_idx1 = getelementptr inbounds <4 x float>, <4 x float>* %v, i64 1 495 %v0 = load <4 x float>, <4 x float>* %load_idx0 496 %v1 = load <4 x float>, <4 x float>* %load_idx1 497 %store_idx0 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 0 498 %store_idx1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 1 499 store <4 x float> %v0, <4 x float>* %store_idx0, align 16 500 store <4 x float> %v1, <4 x float>* %store_idx1, align 16 501 ret void 502 503; CHECK-LABEL: merge_vec_stores_from_loads 504; CHECK: vmovaps 505; CHECK-NEXT: vmovaps 506; CHECK-NEXT: vmovaps 507; CHECK-NEXT: vmovaps 508; CHECK-NEXT: retq 509} 510 511; Merging vector stores when sourced from a constant vector is not currently handled. 512define void @merge_vec_stores_of_constants(<4 x i32>* %ptr) { 513 %idx0 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3 514 %idx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 4 515 store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx0, align 16 516 store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx1, align 16 517 ret void 518 519; CHECK-LABEL: merge_vec_stores_of_constants 520; CHECK: vxorps 521; CHECK-NEXT: vmovaps 522; CHECK-NEXT: vmovaps 523; CHECK-NEXT: retq 524} 525 526; This is a minimized test based on real code that was failing. 527; We could merge stores (and loads) like this... 528 529define void @merge_vec_element_and_scalar_load([6 x i64]* %array) { 530 %idx0 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 0 531 %idx1 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 1 532 %idx4 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 4 533 %idx5 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 5 534 535 %a0 = load i64, i64* %idx0, align 8 536 store i64 %a0, i64* %idx4, align 8 537 538 %b = bitcast i64* %idx1 to <2 x i64>* 539 %v = load <2 x i64>, <2 x i64>* %b, align 8 540 %a1 = extractelement <2 x i64> %v, i32 0 541 store i64 %a1, i64* %idx5, align 8 542 ret void 543 544; CHECK-LABEL: merge_vec_element_and_scalar_load 545; CHECK: movq (%rdi), %rax 546; CHECK-NEXT: movq %rax, 32(%rdi) 547; CHECK-NEXT: movq 8(%rdi), %rax 548; CHECK-NEXT: movq %rax, 40(%rdi) 549; CHECK-NEXT: retq 550} 551