1; Check that the unexpanded accumulator functions and the dummy variables are deleted. 2; There doesn't seem to be any way to compute the basename of %s, hence the unfortunate 3; explicit uses of "test_reduce_general_cleanup" below. 4; There doesn't seem to be a way to write a CHECK-NOT pattern that matches only at the 5; end of a line (llvm-objdump dumps symbol name at end of line), so sed is employed 6; to add a '<' at the end of each line (symbol name). This allows us to use (e.g.) 7; "aiAccum<" to match the symbol "aiAccum" but not the symbol "aiAccum.expand". 8 9; RUN: llvm-rs-as %s -o %t 10; RUN: bcc -o test_reduce_general_cleanup -output_path %T -bclib libclcore.bc -mtriple armv7-none-linux-gnueabi %t 11; RUN: llvm-objdump -t %T/test_reduce_general_cleanup.o | sed -e 's!$!<!' | FileCheck %s 12 13; CHECK-NOT: .rs.reduce_fn 14; CHECK-NOT: aiAccum< 15; CHECK-NOT: mpyAccum< 16; CHECK-NOT: dpAccum< 17; CHECK-NOT: fMMAccumulator< 18; CHECK-NOT: fzAccum< 19; CHECK-NOT: fz2Accum< 20; CHECK-NOT: hsgAccum< 21 22; ModuleID = 'reduce.bc' 23target datalayout = "e-p:32:32-i64:64-v128:64:128-n32-S64" 24target triple = "armv7-none-linux-gnueabi" 25 26%struct.MinAndMax = type { %struct.IndexedVal, %struct.IndexedVal } 27%struct.IndexedVal = type { float, i32 } 28 29@.rs.reduce_fn.aiAccum = global i8* bitcast (void (i32*, i32)* @aiAccum to i8*), align 4 30@.rs.reduce_fn.dpAccum = global i8* bitcast (void (float*, float, float)* @dpAccum to i8*), align 4 31@.rs.reduce_fn.dpSum = global i8* bitcast (void (float*, float*)* @dpSum to i8*), align 4 32@.rs.reduce_fn.fMMInit = global i8* bitcast (void (%struct.MinAndMax*)* @fMMInit to i8*), align 4 33@.rs.reduce_fn.fMMAccumulator = global i8* bitcast (void (%struct.MinAndMax*, float, i32)* @fMMAccumulator to i8*), align 4 34@.rs.reduce_fn.fMMCombiner = global i8* bitcast (void (%struct.MinAndMax*, %struct.MinAndMax*)* @fMMCombiner to i8*), align 4 35@.rs.reduce_fn.fMMOutConverter = global i8* bitcast (void (<2 x i32>*, %struct.MinAndMax*)* @fMMOutConverter to i8*), align 4 36@.rs.reduce_fn.fzInit = global i8* bitcast (void (i32*)* @fzInit to i8*), align 4 37@.rs.reduce_fn.fzAccum = global i8* bitcast (void (i32*, i32, i32)* @fzAccum to i8*), align 4 38@.rs.reduce_fn.fzCombine = global i8* bitcast (void (i32*, i32*)* @fzCombine to i8*), align 4 39@.rs.reduce_fn.fz2Init = global i8* bitcast (void (<2 x i32>*)* @fz2Init to i8*), align 4 40@.rs.reduce_fn.fz2Accum = global i8* bitcast (void (<2 x i32>*, i32, i32, i32)* @fz2Accum to i8*), align 4 41@.rs.reduce_fn.fz2Combine = global i8* bitcast (void (<2 x i32>*, <2 x i32>*)* @fz2Combine to i8*), align 4 42@.rs.reduce_fn.fz3Init = global i8* bitcast (void (<3 x i32>*)* @fz3Init to i8*), align 4 43@.rs.reduce_fn.fz3Accum = global i8* bitcast (void (<3 x i32>*, i32, i32, i32, i32)* @fz3Accum to i8*), align 4 44@.rs.reduce_fn.fz3Combine = global i8* bitcast (void (<3 x i32>*, <3 x i32>*)* @fz3Combine to i8*), align 4 45@.rs.reduce_fn.hsgAccum = global i8* bitcast (void ([256 x i32]*, i8)* @hsgAccum to i8*), align 4 46@.rs.reduce_fn.hsgCombine = global i8* bitcast (void ([256 x i32]*, [256 x i32]*)* @hsgCombine to i8*), align 4 47@.rs.reduce_fn.modeOutConvert = global i8* bitcast (void (<2 x i32>*, [256 x i32]*)* @modeOutConvert to i8*), align 4 48@negInf = common global float 0.000000e+00, align 4 49@posInf = common global float 0.000000e+00, align 4 50 51; Function Attrs: nounwind 52define internal void @aiAccum(i32* nocapture %accum, i32 %val) #0 { 53 %1 = load i32, i32* %accum, align 4, !tbaa !22 54 %2 = add nsw i32 %1, %val 55 store i32 %2, i32* %accum, align 4, !tbaa !22 56 ret void 57} 58 59; Function Attrs: nounwind 60define internal void @dpAccum(float* nocapture %accum, float %in1, float %in2) #0 { 61 %1 = fmul float %in1, %in2 62 %2 = load float, float* %accum, align 4, !tbaa !26 63 %3 = fadd float %1, %2 64 store float %3, float* %accum, align 4, !tbaa !26 65 ret void 66} 67 68; Function Attrs: nounwind 69define internal void @dpSum(float* nocapture %accum, float* nocapture %val) #0 { 70 %1 = load float, float* %val, align 4, !tbaa !26 71 %2 = load float, float* %accum, align 4, !tbaa !26 72 %3 = fadd float %1, %2 73 store float %3, float* %accum, align 4, !tbaa !26 74 ret void 75} 76 77; Function Attrs: nounwind 78define internal void @fMMInit(%struct.MinAndMax* nocapture %accum) #0 { 79 %1 = load i32, i32* bitcast (float* @posInf to i32*), align 4, !tbaa !26 80 %2 = bitcast %struct.MinAndMax* %accum to i32* 81 store i32 %1, i32* %2, align 4, !tbaa !26 82 %3 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %accum, i32 0, i32 0, i32 1 83 store i32 -1, i32* %3, align 4, !tbaa !22 84 %4 = load i32, i32* bitcast (float* @negInf to i32*), align 4, !tbaa !26 85 %5 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %accum, i32 0, i32 1 86 %6 = bitcast %struct.IndexedVal* %5 to i32* 87 store i32 %4, i32* %6, align 4, !tbaa !26 88 %7 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %accum, i32 0, i32 1, i32 1 89 store i32 -1, i32* %7, align 4, !tbaa !22 90 ret void 91} 92 93; Function Attrs: nounwind 94define internal void @fMMAccumulator(%struct.MinAndMax* nocapture %accum, float %in, i32 %x) #0 { 95 %1 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %accum, i32 0, i32 0, i32 0 96 %2 = load float, float* %1, align 4, !tbaa !26 97 %3 = fcmp ogt float %2, %in 98 br i1 %3, label %4, label %6 99 100; <label>:4 ; preds = %0 101 store float %in, float* %1, align 4 102 %5 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %accum, i32 0, i32 0, i32 1 103 store i32 %x, i32* %5, align 4 104 br label %6 105 106; <label>:6 ; preds = %4, %0 107 %7 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %accum, i32 0, i32 1, i32 0 108 %8 = load float, float* %7, align 4, !tbaa !26 109 %9 = fcmp olt float %8, %in 110 br i1 %9, label %10, label %12 111 112; <label>:10 ; preds = %6 113 store float %in, float* %7, align 4 114 %11 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %accum, i32 0, i32 1, i32 1 115 store i32 %x, i32* %11, align 4 116 br label %12 117 118; <label>:12 ; preds = %10, %6 119 ret void 120} 121 122; Function Attrs: nounwind 123define internal void @fMMCombiner(%struct.MinAndMax* nocapture %accum, %struct.MinAndMax* nocapture %val) #0 { 124 %1 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %val, i32 0, i32 0, i32 0 125 %2 = load float, float* %1, align 4, !tbaa !26 126 %3 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %val, i32 0, i32 0, i32 1 127 %4 = load i32, i32* %3, align 4, !tbaa !22 128 tail call void @fMMAccumulator(%struct.MinAndMax* %accum, float %2, i32 %4) 129 %5 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %val, i32 0, i32 1, i32 0 130 %6 = load float, float* %5, align 4, !tbaa !26 131 %7 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %val, i32 0, i32 1, i32 1 132 %8 = load i32, i32* %7, align 4, !tbaa !22 133 tail call void @fMMAccumulator(%struct.MinAndMax* %accum, float %6, i32 %8) 134 ret void 135} 136 137; Function Attrs: nounwind 138define internal void @fMMOutConverter(<2 x i32>* nocapture %result, %struct.MinAndMax* nocapture %val) #0 { 139 %1 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %val, i32 0, i32 0, i32 1 140 %2 = load i32, i32* %1, align 4, !tbaa !22 141 %3 = load <2 x i32>, <2 x i32>* %result, align 8 142 %4 = insertelement <2 x i32> %3, i32 %2, i32 0 143 store <2 x i32> %4, <2 x i32>* %result, align 8 144 %5 = getelementptr inbounds %struct.MinAndMax, %struct.MinAndMax* %val, i32 0, i32 1, i32 1 145 %6 = load i32, i32* %5, align 4, !tbaa !22 146 %7 = insertelement <2 x i32> %4, i32 %6, i32 1 147 store <2 x i32> %7, <2 x i32>* %result, align 8 148 ret void 149} 150 151; Function Attrs: nounwind 152define internal void @fzInit(i32* nocapture %accumIdx) #0 { 153 store i32 -1, i32* %accumIdx, align 4, !tbaa !22 154 ret void 155} 156 157; Function Attrs: nounwind 158define internal void @fzAccum(i32* nocapture %accumIdx, i32 %inVal, i32 %x) #0 { 159 %1 = icmp eq i32 %inVal, 0 160 br i1 %1, label %2, label %3 161 162; <label>:2 ; preds = %0 163 store i32 %x, i32* %accumIdx, align 4, !tbaa !22 164 br label %3 165 166; <label>:3 ; preds = %2, %0 167 ret void 168} 169 170; Function Attrs: nounwind 171define internal void @fzCombine(i32* nocapture %accumIdx, i32* nocapture %accumIdx2) #0 { 172 %1 = load i32, i32* %accumIdx2, align 4, !tbaa !22 173 %2 = icmp sgt i32 %1, -1 174 br i1 %2, label %3, label %4 175 176; <label>:3 ; preds = %0 177 store i32 %1, i32* %accumIdx, align 4, !tbaa !22 178 br label %4 179 180; <label>:4 ; preds = %3, %0 181 ret void 182} 183 184; Function Attrs: nounwind 185define internal void @fz2Init(<2 x i32>* nocapture %accum) #0 { 186 store <2 x i32> <i32 -1, i32 -1>, <2 x i32>* %accum, align 8 187 ret void 188} 189 190; Function Attrs: nounwind 191define internal void @fz2Accum(<2 x i32>* nocapture %accum, i32 %inVal, i32 %x, i32 %y) #0 { 192 %1 = icmp eq i32 %inVal, 0 193 br i1 %1, label %2, label %5 194 195; <label>:2 ; preds = %0 196 %3 = insertelement <2 x i32> undef, i32 %x, i32 0 197 %4 = insertelement <2 x i32> %3, i32 %y, i32 1 198 store <2 x i32> %4, <2 x i32>* %accum, align 8 199 br label %5 200 201; <label>:5 ; preds = %2, %0 202 ret void 203} 204 205; Function Attrs: nounwind 206define internal void @fz2Combine(<2 x i32>* nocapture %accum, <2 x i32>* nocapture %accum2) #0 { 207 %1 = load <2 x i32>, <2 x i32>* %accum2, align 8 208 %2 = extractelement <2 x i32> %1, i32 0 209 %3 = icmp sgt i32 %2, -1 210 br i1 %3, label %4, label %5 211 212; <label>:4 ; preds = %0 213 store <2 x i32> %1, <2 x i32>* %accum, align 8, !tbaa !28 214 br label %5 215 216; <label>:5 ; preds = %4, %0 217 ret void 218} 219 220; Function Attrs: nounwind 221define internal void @fz3Init(<3 x i32>* nocapture %accum) #0 { 222 store <3 x i32> <i32 -1, i32 -1, i32 -1>, <3 x i32>* %accum, align 16 223 ret void 224} 225 226; Function Attrs: nounwind 227define internal void @fz3Accum(<3 x i32>* nocapture %accum, i32 %inVal, i32 %x, i32 %y, i32 %z) #0 { 228 %1 = icmp eq i32 %inVal, 0 229 br i1 %1, label %2, label %6 230 231; <label>:2 ; preds = %0 232 %3 = insertelement <3 x i32> undef, i32 %x, i32 0 233 %4 = insertelement <3 x i32> %3, i32 %y, i32 1 234 %5 = insertelement <3 x i32> %4, i32 %z, i32 2 235 store <3 x i32> %5, <3 x i32>* %accum, align 16 236 br label %6 237 238; <label>:6 ; preds = %2, %0 239 ret void 240} 241 242; Function Attrs: nounwind 243define internal void @fz3Combine(<3 x i32>* nocapture %accum, <3 x i32>* nocapture %accum2) #0 { 244 %1 = load <3 x i32>, <3 x i32>* %accum, align 16 245 %2 = extractelement <3 x i32> %1, i32 0 246 %3 = icmp sgt i32 %2, -1 247 br i1 %3, label %4, label %8 248 249; <label>:4 ; preds = %0 250 %5 = bitcast <3 x i32>* %accum2 to <4 x i32>* 251 %6 = load <4 x i32>, <4 x i32>* %5, align 8 252 %7 = bitcast <3 x i32>* %accum to <4 x i32>* 253 store <4 x i32> %6, <4 x i32>* %7, align 16, !tbaa !28 254 br label %8 255 256; <label>:8 ; preds = %4, %0 257 ret void 258} 259 260; Function Attrs: nounwind 261define internal void @hsgAccum([256 x i32]* nocapture %h, i8 zeroext %in) #0 { 262 %1 = zext i8 %in to i32 263 %2 = getelementptr inbounds [256 x i32], [256 x i32]* %h, i32 0, i32 %1 264 %3 = load i32, i32* %2, align 4, !tbaa !22 265 %4 = add i32 %3, 1 266 store i32 %4, i32* %2, align 4, !tbaa !22 267 ret void 268} 269 270; Function Attrs: nounwind 271define internal void @hsgCombine([256 x i32]* nocapture %accum, [256 x i32]* nocapture %addend) #0 { 272 br label %2 273 274; <label>:1 ; preds = %2 275 ret void 276 277; <label>:2 ; preds = %2, %0 278 %i.01 = phi i32 [ 0, %0 ], [ %8, %2 ] 279 %3 = getelementptr inbounds [256 x i32], [256 x i32]* %addend, i32 0, i32 %i.01 280 %4 = load i32, i32* %3, align 4, !tbaa !22 281 %5 = getelementptr inbounds [256 x i32], [256 x i32]* %accum, i32 0, i32 %i.01 282 %6 = load i32, i32* %5, align 4, !tbaa !22 283 %7 = add i32 %6, %4 284 store i32 %7, i32* %5, align 4, !tbaa !22 285 %8 = add nuw nsw i32 %i.01, 1 286 %exitcond = icmp eq i32 %8, 256 287 br i1 %exitcond, label %1, label %2 288} 289 290; Function Attrs: nounwind 291define internal void @modeOutConvert(<2 x i32>* nocapture %result, [256 x i32]* nocapture %h) #0 { 292 br label %7 293 294; <label>:1 ; preds = %7 295 %2 = load <2 x i32>, <2 x i32>* %result, align 8 296 %3 = insertelement <2 x i32> %2, i32 %i.0.mode.0, i32 0 297 store <2 x i32> %3, <2 x i32>* %result, align 8 298 %4 = getelementptr inbounds [256 x i32], [256 x i32]* %h, i32 0, i32 %i.0.mode.0 299 %5 = load i32, i32* %4, align 4, !tbaa !22 300 %6 = insertelement <2 x i32> %3, i32 %5, i32 1 301 store <2 x i32> %6, <2 x i32>* %result, align 8 302 ret void 303 304; <label>:7 ; preds = %7, %0 305 %i.02 = phi i32 [ 1, %0 ], [ %13, %7 ] 306 %mode.01 = phi i32 [ 0, %0 ], [ %i.0.mode.0, %7 ] 307 %8 = getelementptr inbounds [256 x i32], [256 x i32]* %h, i32 0, i32 %i.02 308 %9 = load i32, i32* %8, align 4, !tbaa !22 309 %10 = getelementptr inbounds [256 x i32], [256 x i32]* %h, i32 0, i32 %mode.01 310 %11 = load i32, i32* %10, align 4, !tbaa !22 311 %12 = icmp ugt i32 %9, %11 312 %i.0.mode.0 = select i1 %12, i32 %i.02, i32 %mode.01 313 %13 = add nuw nsw i32 %i.02, 1 314 %exitcond = icmp eq i32 %13, 256 315 br i1 %exitcond, label %1, label %7 316} 317 318attributes #0 = { nounwind } 319 320!llvm.module.flags = !{!0, !1} 321!llvm.ident = !{!2} 322!\23pragma = !{!3, !4} 323!\23rs_export_var = !{!5, !6} 324!\23rs_object_slots = !{} 325!\23rs_export_reduce = !{!7, !9, !11, !13, !15, !17, !19, !21} 326 327!0 = !{i32 1, !"wchar_size", i32 4} 328!1 = !{i32 1, !"min_enum_size", i32 4} 329!2 = !{!"clang version 3.6 "} 330!3 = !{!"version", !"1"} 331!4 = !{!"java_package_name", !"com.android.rs.test"} 332!5 = !{!"negInf", !"1"} 333!6 = !{!"posInf", !"1"} 334!7 = !{!"addint", !"4", !8} 335!8 = !{!"aiAccum", !"1"} 336!9 = !{!"dp", !"4", !10, null, !"dpSum"} 337!10 = !{!"dpAccum", !"1"} 338!11 = !{!"findMinAndMax", !"16", !12, !"fMMInit", !"fMMCombiner", !"fMMOutConverter"} 339!12 = !{!"fMMAccumulator", !"9"} 340!13 = !{!"fz", !"4", !14, !"fzInit", !"fzCombine"} 341!14 = !{!"fzAccum", !"9"} 342!15 = !{!"fz2", !"8", !16, !"fz2Init", !"fz2Combine"} 343!16 = !{!"fz2Accum", !"25"} 344!17 = !{!"fz3", !"16", !18, !"fz3Init", !"fz3Combine"} 345!18 = !{!"fz3Accum", !"89"} 346!19 = !{!"histogram", !"1024", !20, null, !"hsgCombine"} 347!20 = !{!"hsgAccum", !"1"} 348!21 = !{!"mode", !"1024", !20, null, !"hsgCombine", !"modeOutConvert"} 349!22 = !{!23, !23, i64 0} 350!23 = !{!"int", !24, i64 0} 351!24 = !{!"omnipotent char", !25, i64 0} 352!25 = !{!"Simple C/C++ TBAA"} 353!26 = !{!27, !27, i64 0} 354!27 = !{!"float", !24, i64 0} 355!28 = !{!24, !24, i64 0} 356