1; RUN: opt -basic-aa -loop-distribute -enable-loop-distribute -verify-loop-info -verify-dom-info -S \ 2; RUN: < %s | FileCheck %s 3 4; RUN: opt -basic-aa -loop-distribute -enable-loop-distribute -verify-loop-info -verify-dom-info \ 5; RUN: -loop-accesses -analyze < %s -enable-new-pm=0 | FileCheck %s --check-prefix=ANALYSIS 6 7; TODO: the following changes the order loop-access printing prints loops, remove legacy RUN and change after NPM switch 8; TODO: opt -aa-pipeline=basic-aa -passes='loop-distribute,print-access-info' -enable-loop-distribute \ 9; TODO: -verify-loop-info -verify-dom-info -disable-output < %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS 10 11; RUN: opt -basic-aa -loop-distribute -enable-loop-distribute -loop-vectorize -force-vector-width=4 -S \ 12; RUN: < %s | FileCheck %s --check-prefix=VECTORIZE 13 14; We should distribute this loop into a safe (2nd statement) and unsafe loop 15; (1st statement): 16; for (i = 0; i < n; i++) { 17; A[i + 1] = A[i] * B[i]; 18; ======================= 19; C[i] = D[i] * E[i]; 20; } 21 22target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 23target triple = "x86_64-apple-macosx10.10.0" 24 25; CHECK-LABEL: @f( 26define void @f(i32* noalias %a, 27 i32* noalias %b, 28 i32* noalias %c, 29 i32* noalias %d, 30 i32* noalias %e) { 31entry: 32 br label %for.body 33 34; Verify the two distributed loops. 35 36; CHECK: entry.split.ldist1: 37; CHECK: br label %for.body.ldist1 38; CHECK: for.body.ldist1: 39; CHECK: %mulA.ldist1 = mul i32 %loadB.ldist1, %loadA.ldist1 40; CHECK: br i1 %exitcond.ldist1, label %entry.split, label %for.body.ldist1 41 42; CHECK: entry.split: 43; CHECK: br label %for.body 44; CHECK: for.body: 45; CHECK: %mulC = mul i32 %loadD, %loadE 46; CHECK: for.end: 47 48 49; ANALYSIS: for.body: 50; ANALYSIS-NEXT: Memory dependences are safe{{$}} 51; ANALYSIS: for.body.ldist1: 52; ANALYSIS-NEXT: Report: unsafe dependent memory operations in loop 53 54 55; VECTORIZE: mul <4 x i32> 56 57for.body: ; preds = %for.body, %entry 58 %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] 59 60 %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind 61 %loadA = load i32, i32* %arrayidxA, align 4 62 63 %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind 64 %loadB = load i32, i32* %arrayidxB, align 4 65 66 %mulA = mul i32 %loadB, %loadA 67 68 %add = add nuw nsw i64 %ind, 1 69 %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add 70 store i32 %mulA, i32* %arrayidxA_plus_4, align 4 71 72 %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind 73 %loadD = load i32, i32* %arrayidxD, align 4 74 75 %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind 76 %loadE = load i32, i32* %arrayidxE, align 4 77 78 %mulC = mul i32 %loadD, %loadE 79 80 %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind 81 store i32 %mulC, i32* %arrayidxC, align 4 82 83 %exitcond = icmp eq i64 %add, 20 84 br i1 %exitcond, label %for.end, label %for.body 85 86for.end: ; preds = %for.body 87 ret void 88} 89 90declare i32 @llvm.convergent(i32) #0 91 92; It is OK to distribute with a convergent operation, since in each 93; new loop the convergent operation has the ssame control dependency. 94; CHECK-LABEL: @f_with_convergent( 95define void @f_with_convergent(i32* noalias %a, 96 i32* noalias %b, 97 i32* noalias %c, 98 i32* noalias %d, 99 i32* noalias %e) { 100entry: 101 br label %for.body 102 103; Verify the two distributed loops. 104 105; CHECK: entry.split.ldist1: 106; CHECK: br label %for.body.ldist1 107; CHECK: for.body.ldist1: 108; CHECK: %mulA.ldist1 = mul i32 %loadB.ldist1, %loadA.ldist1 109; CHECK: br i1 %exitcond.ldist1, label %entry.split, label %for.body.ldist1 110 111; CHECK: entry.split: 112; CHECK: br label %for.body 113; CHECK: for.body: 114; CHECK: %convergentD = call i32 @llvm.convergent(i32 %loadD) 115; CHECK: %mulC = mul i32 %convergentD, %loadE 116; CHECK: for.end: 117 118 119; ANALYSIS: for.body: 120; ANALYSIS-NEXT: Has convergent operation in loop 121; ANALYSIS-NEXT: Report: cannot add control dependency to convergent operation 122; ANALYSIS: for.body.ldist1: 123; ANALYSIS-NEXT: Report: unsafe dependent memory operations in loop 124 125; convergent instruction happens to block vectorization 126; VECTORIZE: call i32 @llvm.convergent 127; VECTORIZE: mul i32 128 129for.body: ; preds = %for.body, %entry 130 %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] 131 132 %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind 133 %loadA = load i32, i32* %arrayidxA, align 4 134 135 %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind 136 %loadB = load i32, i32* %arrayidxB, align 4 137 138 %mulA = mul i32 %loadB, %loadA 139 140 %add = add nuw nsw i64 %ind, 1 141 %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add 142 store i32 %mulA, i32* %arrayidxA_plus_4, align 4 143 144 %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind 145 %loadD = load i32, i32* %arrayidxD, align 4 146 147 %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind 148 %loadE = load i32, i32* %arrayidxE, align 4 149 150 %convergentD = call i32 @llvm.convergent(i32 %loadD) 151 %mulC = mul i32 %convergentD, %loadE 152 153 %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind 154 store i32 %mulC, i32* %arrayidxC, align 4 155 156 %exitcond = icmp eq i64 %add, 20 157 br i1 %exitcond, label %for.end, label %for.body 158 159for.end: ; preds = %for.body 160 ret void 161} 162 163attributes #0 = { nounwind readnone convergent } 164