• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; Check that the unexpanded accumulator functions and the dummy variables are deleted.
2; There doesn't seem to be any way to compute the basename of %s, hence the unfortunate
3;   explicit uses of "test_reduce_general_cleanup" below.
4; There doesn't seem to be a way to write a CHECK-NOT pattern that matches only at the
5;   end of a line (llvm-objdump dumps symbol name at end of line), so sed is employed
6;   to add a '<' at the end of each line (symbol name).  This allows us to use (e.g.)
7;   "aiAccum<" to match the symbol "aiAccum" but not the symbol "aiAccum.expand".
8
9; RUN: llvm-rs-as %s -o %t
10; RUN: bcc -o test_reduce_general_cleanup -output_path %T -bclib libclcore.bc -mtriple armv7-none-linux-gnueabi %t
11; RUN: llvm-objdump -t %T/test_reduce_general_cleanup.o | sed -e 's!$!<!' | FileCheck %s
12
13; CHECK-NOT: .rs.reduce_fn
14; CHECK-NOT: aiAccum<
15; CHECK-NOT: mpyAccum<
16; CHECK-NOT: dpAccum<
17; CHECK-NOT: fMMAccumulator<
18; CHECK-NOT: fzAccum<
19; CHECK-NOT: fz2Accum<
20; CHECK-NOT: hsgAccum<
21
22; ModuleID = 'reduce.bc'
23target datalayout = "e-p:32:32-i64:64-v128:64:128-n32-S64"
24target triple = "armv7-none-linux-gnueabi"
25
26%struct.MinAndMax = type { %struct.IndexedVal, %struct.IndexedVal }
27%struct.IndexedVal = type { float, i32 }
28
29@.rs.reduce_fn.aiAccum = global i8* bitcast (void (i32*, i32)* @aiAccum to i8*), align 4
30@.rs.reduce_fn.dpAccum = global i8* bitcast (void (float*, float, float)* @dpAccum to i8*), align 4
31@.rs.reduce_fn.dpSum = global i8* bitcast (void (float*, float*)* @dpSum to i8*), align 4
32@.rs.reduce_fn.fMMInit = global i8* bitcast (void (%struct.MinAndMax*)* @fMMInit to i8*), align 4
33@.rs.reduce_fn.fMMAccumulator = global i8* bitcast (void (%struct.MinAndMax*, float, i32)* @fMMAccumulator to i8*), align 4
34@.rs.reduce_fn.fMMCombiner = global i8* bitcast (void (%struct.MinAndMax*, %struct.MinAndMax*)* @fMMCombiner to i8*), align 4
35@.rs.reduce_fn.fMMOutConverter = global i8* bitcast (void (<2 x i32>*, %struct.MinAndMax*)* @fMMOutConverter to i8*), align 4
36@.rs.reduce_fn.fzInit = global i8* bitcast (void (i32*)* @fzInit to i8*), align 4
37@.rs.reduce_fn.fzAccum = global i8* bitcast (void (i32*, i32, i32)* @fzAccum to i8*), align 4
38@.rs.reduce_fn.fzCombine = global i8* bitcast (void (i32*, i32*)* @fzCombine to i8*), align 4
39@.rs.reduce_fn.fz2Init = global i8* bitcast (void (<2 x i32>*)* @fz2Init to i8*), align 4
40@.rs.reduce_fn.fz2Accum = global i8* bitcast (void (<2 x i32>*, i32, i32, i32)* @fz2Accum to i8*), align 4
41@.rs.reduce_fn.fz2Combine = global i8* bitcast (void (<2 x i32>*, <2 x i32>*)* @fz2Combine to i8*), align 4
42@.rs.reduce_fn.fz3Init = global i8* bitcast (void (<3 x i32>*)* @fz3Init to i8*), align 4
43@.rs.reduce_fn.fz3Accum = global i8* bitcast (void (<3 x i32>*, i32, i32, i32, i32)* @fz3Accum to i8*), align 4
44@.rs.reduce_fn.fz3Combine = global i8* bitcast (void (<3 x i32>*, <3 x i32>*)* @fz3Combine to i8*), align 4
45@.rs.reduce_fn.hsgAccum = global i8* bitcast (void ([256 x i32]*, i8)* @hsgAccum to i8*), align 4
46@.rs.reduce_fn.hsgCombine = global i8* bitcast (void ([256 x i32]*, [256 x i32]*)* @hsgCombine to i8*), align 4
47@.rs.reduce_fn.modeOutConvert = global i8* bitcast (void (<2 x i32>*, [256 x i32]*)* @modeOutConvert to i8*), align 4
48@negInf = common global float 0.000000e+00, align 4
49@posInf = common global float 0.000000e+00, align 4
50
51; Function Attrs: nounwind
52define internal void @aiAccum(i32* nocapture %accum, i32 %val) #0 {
53  %1 = load i32, i32* %accum, align 4, !tbaa !22
54  %2 = add nsw i32 %1, %val
55  store i32 %2, i32* %accum, align 4, !tbaa !22
56  ret void
57}
58
59; Function Attrs: nounwind
60define internal void @dpAccum(float* nocapture %accum, float %in1, float %in2) #0 {
61  %1 = fmul float %in1, %in2
62  %2 = load float, float* %accum, align 4, !tbaa !26
63  %3 = fadd float %1, %2
64  store float %3, float* %accum, align 4, !tbaa !26
65  ret void
66}
67
68; Function Attrs: nounwind
69define internal void @dpSum(float* nocapture %accum, float* nocapture %val) #0 {
70  %1 = load float, float* %val, align 4, !tbaa !26
71  %2 = load float, float* %accum, align 4, !tbaa !26
72  %3 = fadd float %1, %2
73  store float %3, float* %accum, align 4, !tbaa !26
74  ret void
75}
76
77; Function Attrs: nounwind
78define internal void @fMMInit(%struct.MinAndMax* nocapture %accum) #0 {
79  %1 = load i32, i32* bitcast (float* @posInf to i32*), align 4, !tbaa !26
80  %2 = bitcast %struct.MinAndMax* %accum to i32*
81  store i32 %1, i32* %2, align 4, !tbaa !26
82  %3 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %accum, i32 0, i32 0, i32 1
83  store i32 -1, i32* %3, align 4, !tbaa !22
84  %4 = load i32, i32* bitcast (float* @negInf to i32*), align 4, !tbaa !26
85  %5 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %accum, i32 0, i32 1
86  %6 = bitcast %struct.IndexedVal* %5 to i32*
87  store i32 %4, i32* %6, align 4, !tbaa !26
88  %7 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %accum, i32 0, i32 1, i32 1
89  store i32 -1, i32* %7, align 4, !tbaa !22
90  ret void
91}
92
93; Function Attrs: nounwind
94define internal void @fMMAccumulator(%struct.MinAndMax* nocapture %accum, float %in, i32 %x) #0 {
95  %1 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %accum, i32 0, i32 0, i32 0
96  %2 = load float, float* %1, align 4, !tbaa !26
97  %3 = fcmp ogt float %2, %in
98  br i1 %3, label %4, label %6
99
100; <label>:4                                       ; preds = %0
101  store float %in, float* %1, align 4
102  %5 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %accum, i32 0, i32 0, i32 1
103  store i32 %x, i32* %5, align 4
104  br label %6
105
106; <label>:6                                       ; preds = %4, %0
107  %7 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %accum, i32 0, i32 1, i32 0
108  %8 = load float, float* %7, align 4, !tbaa !26
109  %9 = fcmp olt float %8, %in
110  br i1 %9, label %10, label %12
111
112; <label>:10                                      ; preds = %6
113  store float %in, float* %7, align 4
114  %11 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %accum, i32 0, i32 1, i32 1
115  store i32 %x, i32* %11, align 4
116  br label %12
117
118; <label>:12                                      ; preds = %10, %6
119  ret void
120}
121
122; Function Attrs: nounwind
123define internal void @fMMCombiner(%struct.MinAndMax* nocapture %accum, %struct.MinAndMax* nocapture %val) #0 {
124  %1 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %val, i32 0, i32 0, i32 0
125  %2 = load float, float* %1, align 4, !tbaa !26
126  %3 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %val, i32 0, i32 0, i32 1
127  %4 = load i32, i32* %3, align 4, !tbaa !22
128  tail call void @fMMAccumulator(%struct.MinAndMax* %accum, float %2, i32 %4)
129  %5 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %val, i32 0, i32 1, i32 0
130  %6 = load float, float* %5, align 4, !tbaa !26
131  %7 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %val, i32 0, i32 1, i32 1
132  %8 = load i32, i32* %7, align 4, !tbaa !22
133  tail call void @fMMAccumulator(%struct.MinAndMax* %accum, float %6, i32 %8)
134  ret void
135}
136
137; Function Attrs: nounwind
138define internal void @fMMOutConverter(<2 x i32>* nocapture %result, %struct.MinAndMax* nocapture %val) #0 {
139  %1 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %val, i32 0, i32 0, i32 1
140  %2 = load i32, i32* %1, align 4, !tbaa !22
141  %3 = load <2 x i32>, <2 x i32>* %result, align 8
142  %4 = insertelement <2 x i32> %3, i32 %2, i32 0
143  store <2 x i32> %4, <2 x i32>* %result, align 8
144  %5 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %val, i32 0, i32 1, i32 1
145  %6 = load i32, i32* %5, align 4, !tbaa !22
146  %7 = insertelement <2 x i32> %4, i32 %6, i32 1
147  store <2 x i32> %7, <2 x i32>* %result, align 8
148  ret void
149}
150
151; Function Attrs: nounwind
152define internal void @fzInit(i32* nocapture %accumIdx) #0 {
153  store i32 -1, i32* %accumIdx, align 4, !tbaa !22
154  ret void
155}
156
157; Function Attrs: nounwind
158define internal void @fzAccum(i32* nocapture %accumIdx, i32 %inVal, i32 %x) #0 {
159  %1 = icmp eq i32 %inVal, 0
160  br i1 %1, label %2, label %3
161
162; <label>:2                                       ; preds = %0
163  store i32 %x, i32* %accumIdx, align 4, !tbaa !22
164  br label %3
165
166; <label>:3                                       ; preds = %2, %0
167  ret void
168}
169
170; Function Attrs: nounwind
171define internal void @fzCombine(i32* nocapture %accumIdx, i32* nocapture %accumIdx2) #0 {
172  %1 = load i32, i32* %accumIdx2, align 4, !tbaa !22
173  %2 = icmp sgt i32 %1, -1
174  br i1 %2, label %3, label %4
175
176; <label>:3                                       ; preds = %0
177  store i32 %1, i32* %accumIdx, align 4, !tbaa !22
178  br label %4
179
180; <label>:4                                       ; preds = %3, %0
181  ret void
182}
183
184; Function Attrs: nounwind
185define internal void @fz2Init(<2 x i32>* nocapture %accum) #0 {
186  store <2 x i32> <i32 -1, i32 -1>, <2 x i32>* %accum, align 8
187  ret void
188}
189
190; Function Attrs: nounwind
191define internal void @fz2Accum(<2 x i32>* nocapture %accum, i32 %inVal, i32 %x, i32 %y) #0 {
192  %1 = icmp eq i32 %inVal, 0
193  br i1 %1, label %2, label %5
194
195; <label>:2                                       ; preds = %0
196  %3 = insertelement <2 x i32> undef, i32 %x, i32 0
197  %4 = insertelement <2 x i32> %3, i32 %y, i32 1
198  store <2 x i32> %4, <2 x i32>* %accum, align 8
199  br label %5
200
201; <label>:5                                       ; preds = %2, %0
202  ret void
203}
204
205; Function Attrs: nounwind
206define internal void @fz2Combine(<2 x i32>* nocapture %accum, <2 x i32>* nocapture %accum2) #0 {
207  %1 = load <2 x i32>, <2 x i32>* %accum2, align 8
208  %2 = extractelement <2 x i32> %1, i32 0
209  %3 = icmp sgt i32 %2, -1
210  br i1 %3, label %4, label %5
211
212; <label>:4                                       ; preds = %0
213  store <2 x i32> %1, <2 x i32>* %accum, align 8, !tbaa !28
214  br label %5
215
216; <label>:5                                       ; preds = %4, %0
217  ret void
218}
219
220; Function Attrs: nounwind
221define internal void @fz3Init(<3 x i32>* nocapture %accum) #0 {
222  store <3 x i32> <i32 -1, i32 -1, i32 -1>, <3 x i32>* %accum, align 16
223  ret void
224}
225
226; Function Attrs: nounwind
227define internal void @fz3Accum(<3 x i32>* nocapture %accum, i32 %inVal, i32 %x, i32 %y, i32 %z) #0 {
228  %1 = icmp eq i32 %inVal, 0
229  br i1 %1, label %2, label %6
230
231; <label>:2                                       ; preds = %0
232  %3 = insertelement <3 x i32> undef, i32 %x, i32 0
233  %4 = insertelement <3 x i32> %3, i32 %y, i32 1
234  %5 = insertelement <3 x i32> %4, i32 %z, i32 2
235  store <3 x i32> %5, <3 x i32>* %accum, align 16
236  br label %6
237
238; <label>:6                                       ; preds = %2, %0
239  ret void
240}
241
242; Function Attrs: nounwind
243define internal void @fz3Combine(<3 x i32>* nocapture %accum, <3 x i32>* nocapture %accum2) #0 {
244  %1 = load <3 x i32>, <3 x i32>* %accum, align 16
245  %2 = extractelement <3 x i32> %1, i32 0
246  %3 = icmp sgt i32 %2, -1
247  br i1 %3, label %4, label %8
248
249; <label>:4                                       ; preds = %0
250  %5 = bitcast <3 x i32>* %accum2 to <4 x i32>*
251  %6 = load <4 x i32>, <4 x i32>* %5, align 8
252  %7 = bitcast <3 x i32>* %accum to <4 x i32>*
253  store <4 x i32> %6, <4 x i32>* %7, align 16, !tbaa !28
254  br label %8
255
256; <label>:8                                       ; preds = %4, %0
257  ret void
258}
259
260; Function Attrs: nounwind
261define internal void @hsgAccum([256 x i32]* nocapture %h, i8 zeroext %in) #0 {
262  %1 = zext i8 %in to i32
263  %2 = getelementptr inbounds [256 x i32], [256 x i32]* %h, i32 0, i32 %1
264  %3 = load i32, i32* %2, align 4, !tbaa !22
265  %4 = add i32 %3, 1
266  store i32 %4, i32* %2, align 4, !tbaa !22
267  ret void
268}
269
270; Function Attrs: nounwind
271define internal void @hsgCombine([256 x i32]* nocapture %accum, [256 x i32]* nocapture %addend) #0 {
272  br label %2
273
274; <label>:1                                       ; preds = %2
275  ret void
276
277; <label>:2                                       ; preds = %2, %0
278  %i.01 = phi i32 [ 0, %0 ], [ %8, %2 ]
279  %3 = getelementptr inbounds [256 x i32], [256 x i32]* %addend, i32 0, i32 %i.01
280  %4 = load i32, i32* %3, align 4, !tbaa !22
281  %5 = getelementptr inbounds [256 x i32], [256 x i32]* %accum, i32 0, i32 %i.01
282  %6 = load i32, i32* %5, align 4, !tbaa !22
283  %7 = add i32 %6, %4
284  store i32 %7, i32* %5, align 4, !tbaa !22
285  %8 = add nuw nsw i32 %i.01, 1
286  %exitcond = icmp eq i32 %8, 256
287  br i1 %exitcond, label %1, label %2
288}
289
290; Function Attrs: nounwind
291define internal void @modeOutConvert(<2 x i32>* nocapture %result, [256 x i32]* nocapture %h) #0 {
292  br label %7
293
294; <label>:1                                       ; preds = %7
295  %2 = load <2 x i32>, <2 x i32>* %result, align 8
296  %3 = insertelement <2 x i32> %2, i32 %i.0.mode.0, i32 0
297  store <2 x i32> %3, <2 x i32>* %result, align 8
298  %4 = getelementptr inbounds [256 x i32], [256 x i32]* %h, i32 0, i32 %i.0.mode.0
299  %5 = load i32, i32* %4, align 4, !tbaa !22
300  %6 = insertelement <2 x i32> %3, i32 %5, i32 1
301  store <2 x i32> %6, <2 x i32>* %result, align 8
302  ret void
303
304; <label>:7                                       ; preds = %7, %0
305  %i.02 = phi i32 [ 1, %0 ], [ %13, %7 ]
306  %mode.01 = phi i32 [ 0, %0 ], [ %i.0.mode.0, %7 ]
307  %8 = getelementptr inbounds [256 x i32], [256 x i32]* %h, i32 0, i32 %i.02
308  %9 = load i32, i32* %8, align 4, !tbaa !22
309  %10 = getelementptr inbounds [256 x i32], [256 x i32]* %h, i32 0, i32 %mode.01
310  %11 = load i32, i32* %10, align 4, !tbaa !22
311  %12 = icmp ugt i32 %9, %11
312  %i.0.mode.0 = select i1 %12, i32 %i.02, i32 %mode.01
313  %13 = add nuw nsw i32 %i.02, 1
314  %exitcond = icmp eq i32 %13, 256
315  br i1 %exitcond, label %1, label %7
316}
317
318attributes #0 = { nounwind }
319
320!llvm.module.flags = !{!0, !1}
321!llvm.ident = !{!2}
322!\23pragma = !{!3, !4}
323!\23rs_export_var = !{!5, !6}
324!\23rs_object_slots = !{}
325!\23rs_export_reduce = !{!7, !9, !11, !13, !15, !17, !19, !21}
326
327!0 = !{i32 1, !"wchar_size", i32 4}
328!1 = !{i32 1, !"min_enum_size", i32 4}
329!2 = !{!"clang version 3.6 "}
330!3 = !{!"version", !"1"}
331!4 = !{!"java_package_name", !"com.android.rs.test"}
332!5 = !{!"negInf", !"1"}
333!6 = !{!"posInf", !"1"}
334!7 = !{!"addint", !"4", !8}
335!8 = !{!"aiAccum", !"1"}
336!9 = !{!"dp", !"4", !10, null, !"dpSum"}
337!10 = !{!"dpAccum", !"1"}
338!11 = !{!"findMinAndMax", !"16", !12, !"fMMInit", !"fMMCombiner", !"fMMOutConverter"}
339!12 = !{!"fMMAccumulator", !"9"}
340!13 = !{!"fz", !"4", !14, !"fzInit", !"fzCombine"}
341!14 = !{!"fzAccum", !"9"}
342!15 = !{!"fz2", !"8", !16, !"fz2Init", !"fz2Combine"}
343!16 = !{!"fz2Accum", !"25"}
344!17 = !{!"fz3", !"16", !18, !"fz3Init", !"fz3Combine"}
345!18 = !{!"fz3Accum", !"89"}
346!19 = !{!"histogram", !"1024", !20, null, !"hsgCombine"}
347!20 = !{!"hsgAccum", !"1"}
348!21 = !{!"mode", !"1024", !20, null, !"hsgCombine", !"modeOutConvert"}
349!22 = !{!23, !23, i64 0}
350!23 = !{!"int", !24, i64 0}
351!24 = !{!"omnipotent char", !25, i64 0}
352!25 = !{!"Simple C/C++ TBAA"}
353!26 = !{!27, !27, i64 0}
354!27 = !{!"float", !24, i64 0}
355!28 = !{!24, !24, i64 0}
356