• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
3
4; For @test11_pattern
5; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 1, i32 1, i32 1, i32 1]
6
7; For @test13_pattern
8; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i32*] [i32* @G, i32* @G]
9
10target triple = "x86_64-apple-darwin10.0.0"
11
12define void @test1(i8* %Base, i64 %Size) nounwind ssp {
13bb.nph:                                           ; preds = %entry
14  br label %for.body
15
16for.body:                                         ; preds = %bb.nph, %for.body
17  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
18  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
19  store i8 0, i8* %I.0.014, align 1
20  %indvar.next = add i64 %indvar, 1
21  %exitcond = icmp eq i64 %indvar.next, %Size
22  br i1 %exitcond, label %for.end, label %for.body
23
24for.end:                                          ; preds = %for.body, %entry
25  ret void
26; CHECK-LABEL: @test1(
27; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false)
28; CHECK-NOT: store
29}
30
31; Make sure memset is formed for larger than 1 byte stores, and that the
32; alignment of the store is preserved
33define void @test1_i16(i16* align 2 %Base, i64 %Size) nounwind ssp {
34bb.nph:                                           ; preds = %entry
35  br label %for.body
36
37for.body:                                         ; preds = %bb.nph, %for.body
38  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
39  %I.0.014 = getelementptr i16, i16* %Base, i64 %indvar
40  store i16 0, i16* %I.0.014, align 2
41  %indvar.next = add i64 %indvar, 1
42  %exitcond = icmp eq i64 %indvar.next, %Size
43  br i1 %exitcond, label %for.end, label %for.body
44
45for.end:                                          ; preds = %for.body, %entry
46  ret void
47; CHECK-LABEL: @test1_i16(
48; CHECK: %[[BaseBC:.*]] = bitcast i16* %Base to i8*
49; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 1
50; CHECK: call void @llvm.memset.p0i8.i64(i8* align 2 %[[BaseBC]], i8 0, i64 %[[Sz]], i1 false)
51; CHECK-NOT: store
52}
53
54; This is a loop that was rotated but where the blocks weren't merged.  This
55; shouldn't perturb us.
56define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
57bb.nph:                                           ; preds = %entry
58  br label %for.body
59
60for.body:                                         ; preds = %bb.nph, %for.body
61  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
62  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
63  store i8 0, i8* %I.0.014, align 1
64  %indvar.next = add i64 %indvar, 1
65  br label %for.body.cont
66for.body.cont:
67  %exitcond = icmp eq i64 %indvar.next, %Size
68  br i1 %exitcond, label %for.end, label %for.body
69
70for.end:                                          ; preds = %for.body, %entry
71  ret void
72; CHECK-LABEL: @test1a(
73; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false)
74; CHECK-NOT: store
75}
76
77
78define void @test2(i32* %Base, i64 %Size) nounwind ssp {
79entry:
80  %cmp10 = icmp eq i64 %Size, 0
81  br i1 %cmp10, label %for.end, label %for.body
82
83for.body:                                         ; preds = %entry, %for.body
84  %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
85  %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011
86  store i32 16843009, i32* %add.ptr.i, align 4
87  %inc = add nsw i64 %i.011, 1
88  %exitcond = icmp eq i64 %inc, %Size
89  br i1 %exitcond, label %for.end, label %for.body
90
91for.end:                                          ; preds = %for.body, %entry
92  ret void
93; CHECK-LABEL: @test2(
94; CHECK: br i1 %cmp10,
95; CHECK: %0 = shl i64 %Size, 2
96; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %Base1, i8 1, i64 %0, i1 false)
97; CHECK-NOT: store
98}
99
100; This is a case where there is an extra may-aliased store in the loop, we can't
101; promote the memset.
102define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp {
103entry:
104  br label %for.body
105
106for.body:                                         ; preds = %entry, %for.body
107  %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
108  %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011
109  store i32 16843009, i32* %add.ptr.i, align 4
110
111  store i8 42, i8* %MayAlias
112  %inc = add nsw i64 %i.011, 1
113  %exitcond = icmp eq i64 %inc, %Size
114  br i1 %exitcond, label %for.end, label %for.body
115
116for.end:                                          ; preds = %entry
117  ret void
118; CHECK-LABEL: @test3(
119; CHECK-NOT: memset
120; CHECK: ret void
121}
122
123; Make sure the first store in the loop is turned into a memset.
124define void @test4(i8* %Base) nounwind ssp {
125bb.nph:                                           ; preds = %entry
126  %Base100 = getelementptr i8, i8* %Base, i64 1000
127  br label %for.body
128
129for.body:                                         ; preds = %bb.nph, %for.body
130  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
131  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
132  store i8 0, i8* %I.0.014, align 1
133
134  ;; Store beyond the range memset, should be safe to promote.
135  store i8 42, i8* %Base100
136
137  %indvar.next = add i64 %indvar, 1
138  %exitcond = icmp eq i64 %indvar.next, 100
139  br i1 %exitcond, label %for.end, label %for.body
140
141for.end:                                          ; preds = %for.body, %entry
142  ret void
143; CHECK-LABEL: @test4(
144; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 100, i1 false)
145}
146
147; This can't be promoted: the memset is a store of a loop variant value.
148define void @test5(i8* %Base, i64 %Size) nounwind ssp {
149bb.nph:                                           ; preds = %entry
150  br label %for.body
151
152for.body:                                         ; preds = %bb.nph, %for.body
153  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
154  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
155
156  %V = trunc i64 %indvar to i8
157  store i8 %V, i8* %I.0.014, align 1
158  %indvar.next = add i64 %indvar, 1
159  %exitcond = icmp eq i64 %indvar.next, %Size
160  br i1 %exitcond, label %for.end, label %for.body
161
162for.end:                                          ; preds = %for.body, %entry
163  ret void
164; CHECK-LABEL: @test5(
165; CHECK-NOT: memset
166; CHECK: ret void
167}
168
169
170;; memcpy formation
171define void @test6(i64 %Size) nounwind ssp {
172bb.nph:
173  %Base = alloca i8, i32 10000
174  %Dest = alloca i8, i32 10000
175  br label %for.body
176
177for.body:                                         ; preds = %bb.nph, %for.body
178  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
179  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
180  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
181  %V = load i8, i8* %I.0.014, align 1
182  store i8 %V, i8* %DestI, align 1
183  %indvar.next = add i64 %indvar, 1
184  %exitcond = icmp eq i64 %indvar.next, %Size
185  br i1 %exitcond, label %for.end, label %for.body
186
187for.end:                                          ; preds = %for.body, %entry
188  ret void
189; CHECK-LABEL: @test6(
190; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i1 false)
191; CHECK-NOT: store
192; CHECK: ret void
193}
194
195;; memcpy formation, check alignment
196define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp {
197bb.nph:
198  br label %for.body
199
200for.body:                                         ; preds = %bb.nph, %for.body
201  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
202  %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
203  %DestI = getelementptr i32, i32* %Dest, i64 %indvar
204  %V = load i32, i32* %I.0.014, align 1
205  store i32 %V, i32* %DestI, align 4
206  %indvar.next = add i64 %indvar, 1
207  %exitcond = icmp eq i64 %indvar.next, %Size
208  br i1 %exitcond, label %for.end, label %for.body
209
210for.end:                                          ; preds = %for.body, %entry
211  ret void
212; CHECK-LABEL: @test6_dest_align(
213; CHECK: %[[Dst:.*]] = bitcast i32* %Dest to i8*
214; CHECK: %[[Src:.*]] = bitcast i32* %Base to i8*
215; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 2
216; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %[[Dst]], i8* align 1 %[[Src]], i64 %[[Sz]], i1 false)
217; CHECK-NOT: store
218; CHECK: ret void
219}
220
221;; memcpy formation, check alignment
222define void @test6_src_align(i32* noalias align 4 %Base, i32* noalias align 1 %Dest, i64 %Size) nounwind ssp {
223bb.nph:
224  br label %for.body
225
226for.body:                                         ; preds = %bb.nph, %for.body
227  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
228  %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
229  %DestI = getelementptr i32, i32* %Dest, i64 %indvar
230  %V = load i32, i32* %I.0.014, align 4
231  store i32 %V, i32* %DestI, align 1
232  %indvar.next = add i64 %indvar, 1
233  %exitcond = icmp eq i64 %indvar.next, %Size
234  br i1 %exitcond, label %for.end, label %for.body
235
236for.end:                                          ; preds = %for.body, %entry
237  ret void
238; CHECK-LABEL: @test6_src_align(
239; CHECK: %[[Dst]] = bitcast i32* %Dest to i8*
240; CHECK: %[[Src]] = bitcast i32* %Base to i8*
241; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 2
242; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %[[Dst]], i8* align 4 %[[Src]], i64 %[[Sz]], i1 false)
243; CHECK-NOT: store
244; CHECK: ret void
245}
246
247
248; This is a loop that was rotated but where the blocks weren't merged.  This
249; shouldn't perturb us.
250define void @test7(i8* %Base, i64 %Size) nounwind ssp {
251bb.nph:                                           ; preds = %entry
252  br label %for.body
253
254for.body:                                         ; preds = %bb.nph, %for.body
255  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
256  br label %for.body.cont
257for.body.cont:
258  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
259  store i8 0, i8* %I.0.014, align 1
260  %indvar.next = add i64 %indvar, 1
261  %exitcond = icmp eq i64 %indvar.next, %Size
262  br i1 %exitcond, label %for.end, label %for.body
263
264for.end:                                          ; preds = %for.body, %entry
265  ret void
266; CHECK-LABEL: @test7(
267; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false)
268; CHECK-NOT: store
269}
270
271; This is a loop should not be transformed, it only executes one iteration.
272define void @test8(i64* %Ptr, i64 %Size) nounwind ssp {
273bb.nph:                                           ; preds = %entry
274  br label %for.body
275
276for.body:                                         ; preds = %bb.nph, %for.body
277  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
278  %PI = getelementptr i64, i64* %Ptr, i64 %indvar
279  store i64 0, i64 *%PI
280  %indvar.next = add i64 %indvar, 1
281  %exitcond = icmp eq i64 %indvar.next, 1
282  br i1 %exitcond, label %for.end, label %for.body
283
284for.end:                                          ; preds = %for.body, %entry
285  ret void
286; CHECK-LABEL: @test8(
287; CHECK: store i64 0, i64* %PI
288}
289
290declare i8* @external(i8*)
291
292;; This cannot be transformed into a memcpy, because the read-from location is
293;; mutated by the loop.
294define void @test9(i64 %Size) nounwind ssp {
295bb.nph:
296  %Base = alloca i8, i32 10000
297  %Dest = alloca i8, i32 10000
298
299  %BaseAlias = call i8* @external(i8* %Base)
300  br label %for.body
301
302for.body:                                         ; preds = %bb.nph, %for.body
303  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
304  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
305  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
306  %V = load i8, i8* %I.0.014, align 1
307  store i8 %V, i8* %DestI, align 1
308
309  ;; This store can clobber the input.
310  store i8 4, i8* %BaseAlias
311
312  %indvar.next = add i64 %indvar, 1
313  %exitcond = icmp eq i64 %indvar.next, %Size
314  br i1 %exitcond, label %for.end, label %for.body
315
316for.end:                                          ; preds = %for.body, %entry
317  ret void
318; CHECK-LABEL: @test9(
319; CHECK-NOT: llvm.memcpy
320; CHECK: ret void
321}
322
323; Two dimensional nested loop should be promoted to one big memset.
324define void @test10(i8* %X) nounwind ssp {
325entry:
326  br label %bb.nph
327
328bb.nph:                                           ; preds = %entry, %for.inc10
329  %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
330  br label %for.body5
331
332for.body5:                                        ; preds = %for.body5, %bb.nph
333  %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ]
334  %mul = mul nsw i32 %i.04, 100
335  %add = add nsw i32 %j.02, %mul
336  %idxprom = sext i32 %add to i64
337  %arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom
338  store i8 0, i8* %arrayidx, align 1
339  %inc = add nsw i32 %j.02, 1
340  %cmp4 = icmp eq i32 %inc, 100
341  br i1 %cmp4, label %for.inc10, label %for.body5
342
343for.inc10:                                        ; preds = %for.body5
344  %inc12 = add nsw i32 %i.04, 1
345  %cmp = icmp eq i32 %inc12, 100
346  br i1 %cmp, label %for.end13, label %bb.nph
347
348for.end13:                                        ; preds = %for.inc10
349  ret void
350; CHECK-LABEL: @test10(
351; CHECK: entry:
352; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %X, i8 0, i64 10000, i1 false)
353; CHECK-NOT: store
354; CHECK: ret void
355}
356
357; On darwin10 (which is the triple in this .ll file) this loop can be turned
358; into a memset_pattern call.
359; rdar://9009151
360define void @test11_pattern(i32* nocapture %P) nounwind ssp {
361entry:
362  br label %for.body
363
364for.body:                                         ; preds = %entry, %for.body
365  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
366  %arrayidx = getelementptr i32, i32* %P, i64 %indvar
367  store i32 1, i32* %arrayidx, align 4
368  %indvar.next = add i64 %indvar, 1
369  %exitcond = icmp eq i64 %indvar.next, 10000
370  br i1 %exitcond, label %for.end, label %for.body
371
372for.end:                                          ; preds = %for.body
373  ret void
374; CHECK-LABEL: @test11_pattern(
375; CHECK-NEXT: entry:
376; CHECK-NEXT: bitcast
377; CHECK-NEXT: memset_pattern
378; CHECK-NOT: store
379; CHECK: ret void
380}
381
382; Store of null should turn into memset of zero.
383define void @test12(i32** nocapture %P) nounwind ssp {
384entry:
385  br label %for.body
386
387for.body:                                         ; preds = %entry, %for.body
388  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
389  %arrayidx = getelementptr i32*, i32** %P, i64 %indvar
390  store i32* null, i32** %arrayidx, align 4
391  %indvar.next = add i64 %indvar, 1
392  %exitcond = icmp eq i64 %indvar.next, 10000
393  br i1 %exitcond, label %for.end, label %for.body
394
395for.end:                                          ; preds = %for.body
396  ret void
397; CHECK-LABEL: @test12(
398; CHECK-NEXT: entry:
399; CHECK-NEXT: bitcast
400; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 %P1, i8 0, i64 80000, i1 false)
401; CHECK-NOT: store
402; CHECK: ret void
403}
404
405@G = global i32 5
406
407; This store-of-address loop can be turned into a memset_pattern call.
408; rdar://9009151
409define void @test13_pattern(i32** nocapture %P) nounwind ssp {
410entry:
411  br label %for.body
412
413for.body:                                         ; preds = %entry, %for.body
414  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
415  %arrayidx = getelementptr i32*, i32** %P, i64 %indvar
416  store i32* @G, i32** %arrayidx, align 4
417  %indvar.next = add i64 %indvar, 1
418  %exitcond = icmp eq i64 %indvar.next, 10000
419  br i1 %exitcond, label %for.end, label %for.body
420
421for.end:                                          ; preds = %for.body
422  ret void
423; CHECK-LABEL: @test13_pattern(
424; CHECK-NEXT: entry:
425; CHECK-NEXT: bitcast
426; CHECK-NEXT: memset_pattern
427; CHECK-NOT: store
428; CHECK: ret void
429}
430
431
432
433; PR9815 - This is a partial overlap case that cannot be safely transformed
434; into a memcpy.
435@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
436
437define i32 @test14() nounwind {
438entry:
439  br label %for.body
440
441for.body:                                         ; preds = %for.inc, %for.body.lr.ph
442  %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
443  %add = add nsw i32 %tmp5, 4
444  %idxprom = sext i32 %add to i64
445  %arrayidx = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom
446  %tmp2 = load i32, i32* %arrayidx, align 4
447  %add4 = add nsw i32 %tmp5, 5
448  %idxprom5 = sext i32 %add4 to i64
449  %arrayidx6 = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom5
450  store i32 %tmp2, i32* %arrayidx6, align 4
451  %inc = add nsw i32 %tmp5, 1
452  %cmp = icmp slt i32 %inc, 2
453  br i1 %cmp, label %for.body, label %for.end
454
455for.end:                                          ; preds = %for.inc
456  %tmp8 = load i32, i32* getelementptr inbounds ([7 x i32], [7 x i32]* @g_50, i32 0, i64 6), align 4
457  ret i32 %tmp8
458; CHECK-LABEL: @test14(
459; CHECK: for.body:
460; CHECK: load i32
461; CHECK: store i32
462; CHECK: br i1 %cmp
463
464}
465
466define void @PR14241(i32* %s, i64 %size) {
467; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught
468; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy
469; instead of a memmove. If we get the memmove transform back, this will catch
470; regressions.
471;
472; CHECK-LABEL: @PR14241(
473
474entry:
475  %end.idx = add i64 %size, -1
476  %end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx
477  br label %while.body
478; CHECK-NOT: memcpy
479;
480; FIXME: When we regain the ability to form a memmove here, this test should be
481; reversed and turned into a positive assertion.
482; CHECK-NOT: memmove
483
484while.body:
485  %phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ]
486  %src.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1
487  %val = load i32, i32* %src.ptr, align 4
488; CHECK: load
489  %dst.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 0
490  store i32 %val, i32* %dst.ptr, align 4
491; CHECK: store
492  %next.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1
493  %cmp = icmp eq i32* %next.ptr, %end.ptr
494  br i1 %cmp, label %exit, label %while.body
495
496exit:
497  ret void
498; CHECK: ret void
499}
500
501; Recognize loops with a negative stride.
502define void @test15(i32* nocapture %f) {
503entry:
504  br label %for.body
505
506for.body:
507  %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
508  %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
509  store i32 0, i32* %arrayidx, align 4
510  %indvars.iv.next = add nsw i64 %indvars.iv, -1
511  %cmp = icmp sgt i64 %indvars.iv, 0
512  br i1 %cmp, label %for.body, label %for.cond.cleanup
513
514for.cond.cleanup:
515  ret void
516; CHECK-LABEL: @test15(
517; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %f1, i8 0, i64 262148, i1 false)
518; CHECK-NOT: store
519; CHECK: ret void
520}
521
522; Loop with a negative stride.  Verify an aliasing write to f[65536] prevents
523; the creation of a memset.
524define void @test16(i32* nocapture %f) {
525entry:
526  %arrayidx1 = getelementptr inbounds i32, i32* %f, i64 65536
527  br label %for.body
528
529for.body:                                         ; preds = %entry, %for.body
530  %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
531  %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
532  store i32 0, i32* %arrayidx, align 4
533  store i32 1, i32* %arrayidx1, align 4
534  %indvars.iv.next = add nsw i64 %indvars.iv, -1
535  %cmp = icmp sgt i64 %indvars.iv, 0
536  br i1 %cmp, label %for.body, label %for.cond.cleanup
537
538for.cond.cleanup:                                 ; preds = %for.body
539  ret void
540; CHECK-LABEL: @test16(
541; CHECK-NOT: call void @llvm.memset.p0i8.i64
542; CHECK: ret void
543}
544
545; Handle memcpy-able loops with negative stride.
546define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) {
547entry:
548  %conv = sext i32 %c to i64
549  %mul = shl nsw i64 %conv, 2
550  %call = tail call noalias i8* @malloc(i64 %mul)
551  %0 = bitcast i8* %call to i32*
552  %tobool.9 = icmp eq i32 %c, 0
553  br i1 %tobool.9, label %while.end, label %while.body.preheader
554
555while.body.preheader:                             ; preds = %entry
556  br label %while.body
557
558while.body:                                       ; preds = %while.body.preheader, %while.body
559  %dec10.in = phi i32 [ %dec10, %while.body ], [ %c, %while.body.preheader ]
560  %dec10 = add nsw i32 %dec10.in, -1
561  %idxprom = sext i32 %dec10 to i64
562  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
563  %1 = load i32, i32* %arrayidx, align 4
564  %arrayidx2 = getelementptr inbounds i32, i32* %0, i64 %idxprom
565  store i32 %1, i32* %arrayidx2, align 4
566  %tobool = icmp eq i32 %dec10, 0
567  br i1 %tobool, label %while.end.loopexit, label %while.body
568
569while.end.loopexit:                               ; preds = %while.body
570  br label %while.end
571
572while.end:                                        ; preds = %while.end.loopexit, %entry
573  ret i32* %0
574; CHECK-LABEL: @test17(
575; CHECK: call void @llvm.memcpy
576; CHECK: ret i32*
577}
578
579declare noalias i8* @malloc(i64)
580
581; Handle memcpy-able loops with negative stride.
582; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) {
583;   for (int i = 2047; i >= 0; --i) {
584;     a[i] = b[i];
585;   }
586; }
587define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 {
588entry:
589  br label %for.body
590
591for.body:                                         ; preds = %entry, %for.body
592  %indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ]
593  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
594  %0 = load i32, i32* %arrayidx, align 4
595  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
596  store i32 %0, i32* %arrayidx2, align 4
597  %indvars.iv.next = add nsw i64 %indvars.iv, -1
598  %cmp = icmp sgt i64 %indvars.iv, 0
599  br i1 %cmp, label %for.body, label %for.cond.cleanup
600
601for.cond.cleanup:                                 ; preds = %for.body
602  ret void
603; CHECK-LABEL: @test18(
604; CHECK: call void @llvm.memcpy
605; CHECK: ret
606}
607
608; Two dimensional nested loop with negative stride should be promoted to one big memset.
609define void @test19(i8* nocapture %X) {
610entry:
611  br label %for.cond1.preheader
612
613for.cond1.preheader:                              ; preds = %entry, %for.inc4
614  %i.06 = phi i32 [ 99, %entry ], [ %dec5, %for.inc4 ]
615  %mul = mul nsw i32 %i.06, 100
616  br label %for.body3
617
618for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
619  %j.05 = phi i32 [ 99, %for.cond1.preheader ], [ %dec, %for.body3 ]
620  %add = add nsw i32 %j.05, %mul
621  %idxprom = sext i32 %add to i64
622  %arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom
623  store i8 0, i8* %arrayidx, align 1
624  %dec = add nsw i32 %j.05, -1
625  %cmp2 = icmp sgt i32 %j.05, 0
626  br i1 %cmp2, label %for.body3, label %for.inc4
627
628for.inc4:                                         ; preds = %for.body3
629  %dec5 = add nsw i32 %i.06, -1
630  %cmp = icmp sgt i32 %i.06, 0
631  br i1 %cmp, label %for.cond1.preheader, label %for.end6
632
633for.end6:                                         ; preds = %for.inc4
634  ret void
635; CHECK-LABEL: @test19(
636; CHECK: entry:
637; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %X, i8 0, i64 10000, i1 false)
638; CHECK: ret void
639}
640
641; Handle loops where the trip count is a narrow integer that needs to be
642; extended.
643define void @form_memset_narrow_size(i64* %ptr, i32 %size) {
644; CHECK-LABEL: @form_memset_narrow_size(
645entry:
646  %cmp1 = icmp sgt i32 %size, 0
647  br i1 %cmp1, label %loop.ph, label %exit
648; CHECK:       entry:
649; CHECK:         %[[C1:.*]] = icmp sgt i32 %size, 0
650; CHECK-NEXT:    br i1 %[[C1]], label %loop.ph, label %exit
651
652loop.ph:
653  br label %loop.body
654; CHECK:       loop.ph:
655; CHECK-NEXT:    %[[ZEXT_SIZE:.*]] = zext i32 %size to i64
656; CHECK-NEXT:    %[[SCALED_SIZE:.*]] = shl i64 %[[ZEXT_SIZE]], 3
657; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 %{{.*}}, i8 0, i64 %[[SCALED_SIZE]], i1 false)
658
659loop.body:
660  %storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ]
661  %idxprom = sext i32 %storemerge4 to i64
662  %arrayidx = getelementptr inbounds i64, i64* %ptr, i64 %idxprom
663  store i64 0, i64* %arrayidx, align 8
664  %inc = add nsw i32 %storemerge4, 1
665  %cmp2 = icmp slt i32 %inc, %size
666  br i1 %cmp2, label %loop.body, label %loop.exit
667
668loop.exit:
669  br label %exit
670
671exit:
672  ret void
673}
674
675define void @form_memcpy_narrow_size(i64* noalias %dst, i64* noalias %src, i32 %size) {
676; CHECK-LABEL: @form_memcpy_narrow_size(
677entry:
678  %cmp1 = icmp sgt i32 %size, 0
679  br i1 %cmp1, label %loop.ph, label %exit
680; CHECK:       entry:
681; CHECK:         %[[C1:.*]] = icmp sgt i32 %size, 0
682; CHECK-NEXT:    br i1 %[[C1]], label %loop.ph, label %exit
683
684loop.ph:
685  br label %loop.body
686; CHECK:       loop.ph:
687; CHECK-NEXT:    %[[ZEXT_SIZE:.*]] = zext i32 %size to i64
688; CHECK-NEXT:    %[[SCALED_SIZE:.*]] = shl i64 %[[ZEXT_SIZE]], 3
689; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %{{.*}}, i8* align 8 %{{.*}}, i64 %[[SCALED_SIZE]], i1 false)
690
691loop.body:
692  %storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ]
693  %idxprom1 = sext i32 %storemerge4 to i64
694  %arrayidx1 = getelementptr inbounds i64, i64* %src, i64 %idxprom1
695  %v = load i64, i64* %arrayidx1, align 8
696  %idxprom2 = sext i32 %storemerge4 to i64
697  %arrayidx2 = getelementptr inbounds i64, i64* %dst, i64 %idxprom2
698  store i64 %v, i64* %arrayidx2, align 8
699  %inc = add nsw i32 %storemerge4, 1
700  %cmp2 = icmp slt i32 %inc, %size
701  br i1 %cmp2, label %loop.body, label %loop.exit
702
703loop.exit:
704  br label %exit
705
706exit:
707  ret void
708}
709
710; Validate that "memset_pattern" has the proper attributes.
711; CHECK: declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) [[ATTRS:#[0-9]+]]
712; CHECK: [[ATTRS]] = { argmemonly }
713