• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
4target triple = "aarch64--linux-gnueabi"
5
6; 3 reads and 3 writes should need 12 memchecks
7; CHECK: function 'testf':
8; CHECK: Memory dependences are safe with run-time checks
9
10; Memory dependencies have labels starting from 0, so in
11; order to verify that we have n checks, we look for
12; (n-1): and not n:.
13
14; CHECK: Run-time memory checks:
15; CHECK-NEXT: Check 0:
16; CHECK: Check 11:
17; CHECK-NOT: Check 12:
18
19define void @testf(i16* %a,
20               i16* %b,
21               i16* %c,
22               i16* %d,
23               i16* %e,
24               i16* %f) {
25entry:
26  br label %for.body
27
28for.body:                                         ; preds = %for.body, %entry
29  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
30
31  %add = add nuw nsw i64 %ind, 1
32
33  %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
34  %loadA = load i16, i16* %arrayidxA, align 2
35
36  %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
37  %loadB = load i16, i16* %arrayidxB, align 2
38
39  %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %ind
40  %loadC = load i16, i16* %arrayidxC, align 2
41
42  %mul = mul i16 %loadB, %loadA
43  %mul1 = mul i16 %mul, %loadC
44
45  %arrayidxD = getelementptr inbounds i16, i16* %d, i64 %ind
46  store i16 %mul1, i16* %arrayidxD, align 2
47
48  %arrayidxE = getelementptr inbounds i16, i16* %e, i64 %ind
49  store i16 %mul, i16* %arrayidxE, align 2
50
51  %arrayidxF = getelementptr inbounds i16, i16* %f, i64 %ind
52  store i16 %mul1, i16* %arrayidxF, align 2
53
54  %exitcond = icmp eq i64 %add, 20
55  br i1 %exitcond, label %for.end, label %for.body
56
57for.end:                                          ; preds = %for.body
58  ret void
59}
60
61; The following (testg and testh) check that we can group
62; memory checks of accesses which differ by a constant value.
63; Both tests are based on the following C code:
64;
65; void testh(short *a, short *b, short *c) {
66;   unsigned long ind = 0;
67;   for (unsigned long ind = 0; ind < 20; ++ind) {
68;     c[2 * ind] = a[ind] * a[ind + 1];
69;     c[2 * ind + 1] = a[ind] * a[ind + 1] * b[ind];
70;   }
71; }
72;
73; It is sufficient to check the intervals
74; [a, a + 21], [b, b + 20] against [c, c + 41].
75
76; 3 reads and 2 writes - two of the reads can be merged,
77; and the writes can be merged as well. This gives us a
78; total of 2 memory checks.
79
80; CHECK: function 'testg':
81
82; CHECK: Run-time memory checks:
83; CHECK-NEXT:   Check 0:
84; CHECK-NEXT:     Comparing group ([[ZERO:.+]]):
85; CHECK-NEXT:       %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
86; CHECK-NEXT:       %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
87; CHECK-NEXT:     Against group ([[ONE:.+]]):
88; CHECK-NEXT:       %arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add
89; CHECK-NEXT:       %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
90; CHECK-NEXT:   Check 1:
91; CHECK-NEXT:     Comparing group ({{.*}}[[ZERO]]):
92; CHECK-NEXT:       %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
93; CHECK-NEXT:       %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
94; CHECK-NEXT:     Against group ([[TWO:.+]]):
95; CHECK-NEXT:       %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
96; CHECK-NEXT:   Grouped accesses:
97; CHECK-NEXT:    Group {{.*}}[[ZERO]]:
98; CHECK-NEXT:       (Low: %c High: (78 + %c))
99; CHECK-NEXT:         Member: {(2 + %c),+,4}
100; CHECK-NEXT:         Member: {%c,+,4}
101; CHECK-NEXT:     Group {{.*}}[[ONE]]:
102; CHECK-NEXT:       (Low: %a High: (40 + %a))
103; CHECK-NEXT:         Member: {(2 + %a),+,2}
104; CHECK-NEXT:         Member: {%a,+,2}
105; CHECK-NEXT:     Group {{.*}}[[TWO]]:
106; CHECK-NEXT:       (Low: %b High: (38 + %b))
107; CHECK-NEXT:         Member: {%b,+,2}
108
109define void @testg(i16* %a,
110               i16* %b,
111               i16* %c) {
112entry:
113  br label %for.body
114
115for.body:                                         ; preds = %for.body, %entry
116  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
117  %store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ]
118
119  %add = add nuw nsw i64 %ind, 1
120  %store_ind_inc = add nuw nsw i64 %store_ind, 1
121  %store_ind_next = add nuw nsw i64 %store_ind_inc, 1
122
123  %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
124  %loadA = load i16, i16* %arrayidxA, align 2
125
126  %arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add
127  %loadA1 = load i16, i16* %arrayidxA1, align 2
128
129  %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
130  %loadB = load i16, i16* %arrayidxB, align 2
131
132  %mul = mul i16 %loadA, %loadA1
133  %mul1 = mul i16 %mul, %loadB
134
135  %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
136  store i16 %mul1, i16* %arrayidxC, align 2
137
138  %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
139  store i16 %mul, i16* %arrayidxC1, align 2
140
141  %exitcond = icmp eq i64 %add, 20
142  br i1 %exitcond, label %for.end, label %for.body
143
144for.end:                                          ; preds = %for.body
145  ret void
146}
147
148; 3 reads and 2 writes - the writes can be merged into a single
149; group, but the GEPs used for the reads are not marked as inbounds.
150; We can still merge them because we are using a unit stride for
151; accesses, so we cannot overflow the GEPs.
152
153; CHECK: function 'testh':
154; CHECK: Run-time memory checks:
155; CHECK-NEXT:   Check 0:
156; CHECK-NEXT:     Comparing group ([[ZERO:.+]]):
157; CHECK-NEXT:         %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
158; CHECK-NEXT:         %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
159; CHECK-NEXT:     Against group ([[ONE:.+]]):
160; CHECK-NEXT:         %arrayidxA1 = getelementptr i16, i16* %a, i64 %add
161; CHECK-NEXT:         %arrayidxA = getelementptr i16, i16* %a, i64 %ind
162; CHECK-NEXT:   Check 1:
163; CHECK-NEXT:     Comparing group ({{.*}}[[ZERO]]):
164; CHECK-NEXT:         %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
165; CHECK-NEXT:         %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
166; CHECK-NEXT:     Against group ([[TWO:.+]]):
167; CHECK-NEXT:         %arrayidxB = getelementptr i16, i16* %b, i64 %ind
168; CHECK-NEXT:   Grouped accesses:
169; CHECK-NEXT:     Group {{.*}}[[ZERO]]:
170; CHECK-NEXT:       (Low: %c High: (78 + %c))
171; CHECK-NEXT:         Member: {(2 + %c),+,4}
172; CHECK-NEXT:         Member: {%c,+,4}
173; CHECK-NEXT:     Group {{.*}}[[ONE]]:
174; CHECK-NEXT:       (Low: %a High: (40 + %a))
175; CHECK-NEXT:         Member: {(2 + %a),+,2}
176; CHECK-NEXT:         Member: {%a,+,2}
177; CHECK-NEXT:     Group {{.*}}[[TWO]]:
178; CHECK-NEXT:       (Low: %b High: (38 + %b))
179; CHECK-NEXT:         Member: {%b,+,2}
180
181define void @testh(i16* %a,
182               i16* %b,
183               i16* %c) {
184entry:
185  br label %for.body
186
187for.body:                                         ; preds = %for.body, %entry
188  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
189  %store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ]
190
191  %add = add nuw nsw i64 %ind, 1
192  %store_ind_inc = add nuw nsw i64 %store_ind, 1
193  %store_ind_next = add nuw nsw i64 %store_ind_inc, 1
194
195  %arrayidxA = getelementptr i16, i16* %a, i64 %ind
196  %loadA = load i16, i16* %arrayidxA, align 2
197
198  %arrayidxA1 = getelementptr i16, i16* %a, i64 %add
199  %loadA1 = load i16, i16* %arrayidxA1, align 2
200
201  %arrayidxB = getelementptr i16, i16* %b, i64 %ind
202  %loadB = load i16, i16* %arrayidxB, align 2
203
204  %mul = mul i16 %loadA, %loadA1
205  %mul1 = mul i16 %mul, %loadB
206
207  %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
208  store i16 %mul1, i16* %arrayidxC, align 2
209
210  %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
211  store i16 %mul, i16* %arrayidxC1, align 2
212
213  %exitcond = icmp eq i64 %add, 20
214  br i1 %exitcond, label %for.end, label %for.body
215
216for.end:                                          ; preds = %for.body
217  ret void
218}
219
220; Don't merge pointers if we need to perform a check against a pointer
221; to the same underlying object (doing so would emit a check that could be
222; falsely invalidated) For example, in the following loop:
223;
224; for (i = 0; i < 5000; ++i)
225;   a[i + offset] = a[i] + a[i + 10000]
226;
227; we should not merge the intervals associated with the reads (0,5000) and
228; (10000, 15000) into (0, 15000) as this will pottentially fail the check
229; against the interval associated with the write.
230;
231; We cannot have this check unless ShouldRetryWithRuntimeCheck is set,
232; and therefore the grouping algorithm would create a separate group for
233; each pointer.
234
235; CHECK: function 'testi':
236; CHECK: Run-time memory checks:
237; CHECK-NEXT:   Check 0:
238; CHECK-NEXT:     Comparing group ([[ZERO:.+]]):
239; CHECK-NEXT:       %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
240; CHECK-NEXT:     Against group ([[ONE:.+]]):
241; CHECK-NEXT:       %arrayidxA1 = getelementptr i16, i16* %a, i64 %ind
242; CHECK-NEXT:   Check 1:
243; CHECK-NEXT:     Comparing group ({{.*}}[[ZERO]]):
244; CHECK-NEXT:       %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
245; CHECK-NEXT:     Against group ([[TWO:.+]]):
246; CHECK-NEXT:       %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
247; CHECK-NEXT:   Grouped accesses:
248; CHECK-NEXT:     Group {{.*}}[[ZERO]]:
249; CHECK-NEXT:       (Low: ((2 * %offset) + %a) High: (9998 + (2 * %offset) + %a))
250; CHECK-NEXT:         Member: {((2 * %offset) + %a),+,2}<nsw><%for.body>
251; CHECK-NEXT:     Group {{.*}}[[ONE]]:
252; CHECK-NEXT:       (Low: %a High: (9998 + %a))
253; CHECK-NEXT:         Member: {%a,+,2}<%for.body>
254; CHECK-NEXT:     Group {{.*}}[[TWO]]:
255; CHECK-NEXT:       (Low: (20000 + %a) High: (29998 + %a))
256; CHECK-NEXT:         Member: {(20000 + %a),+,2}<%for.body>
257
258define void @testi(i16* %a,
259                   i64 %offset) {
260entry:
261  br label %for.body
262
263for.body:                                         ; preds = %for.body, %entry
264  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
265  %store_ind = phi i64 [ %offset, %entry ], [ %store_ind_inc, %for.body ]
266
267  %add = add nuw nsw i64 %ind, 1
268  %store_ind_inc = add nuw nsw i64 %store_ind, 1
269
270  %arrayidxA1 = getelementptr i16, i16* %a, i64 %ind
271  %ind2 = add nuw nsw i64 %ind, 10000
272  %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
273
274  %loadA1 = load i16, i16* %arrayidxA1, align 2
275  %loadA2 = load i16, i16* %arrayidxA2, align 2
276
277  %addres = add i16 %loadA1, %loadA2
278
279  %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
280  store i16 %addres, i16* %storeidx, align 2
281
282  %exitcond = icmp eq i64 %add, 5000
283  br i1 %exitcond, label %for.end, label %for.body
284
285for.end:                                          ; preds = %for.body
286  ret void
287}
288