1; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s 2 3; Check vectorization that would ordinarily require a runtime bounds 4; check on the pointers when mixing address spaces. For now we cannot 5; assume address spaces do not alias, and we can't assume that 6; different pointers are directly comparable. 7; 8; These all test this basic loop for different combinations of address 9; spaces, and swapping in globals or adding noalias. 10; 11;void foo(int addrspace(N)* [noalias] a, int addrspace(M)* [noalias] b, int n) 12;{ 13; for (int i = 0; i < n; ++i) 14; { 15; a[i] = 3 * b[i]; 16; } 17;} 18 19; Artificial datalayout 20target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64" 21 22 23@g_as1 = common addrspace(1) global [1024 x i32] zeroinitializer, align 16 24@q_as2 = common addrspace(2) global [1024 x i32] zeroinitializer, align 16 25 26; Both parameters are unidentified objects with the same address 27; space, so this should vectorize normally. 28define void @foo(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %n) #0 { 29; CHECK-LABEL: @foo( 30; CHECK: <4 x i32> 31; CHECK: ret 32 33entry: 34 br label %for.cond 35 36for.cond: ; preds = %for.body, %entry 37 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 38 %cmp = icmp slt i32 %i.0, %n 39 br i1 %cmp, label %for.body, label %for.end 40 41for.body: ; preds = %for.cond 42 %idxprom = sext i32 %i.0 to i64 43 %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom 44 %0 = load i32 addrspace(1)* %arrayidx, align 4 45 %mul = mul nsw i32 %0, 3 46 %idxprom1 = sext i32 %i.0 to i64 47 %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1 48 store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4 49 %inc = add nsw i32 %i.0, 1 50 br label %for.cond 51 52for.end: ; preds = %for.cond 53 ret void 54} 55 56; Parameters are unidentified and different address spaces, so cannot vectorize. 57define void @bar0(i32* %a, i32 addrspace(1)* %b, i32 %n) #0 { 58; CHECK-LABEL: @bar0( 59; CHECK-NOT: <4 x i32> 60; CHECK: ret 61 62entry: 63 br label %for.cond 64 65for.cond: ; preds = %for.body, %entry 66 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 67 %cmp = icmp slt i32 %i.0, %n 68 br i1 %cmp, label %for.body, label %for.end 69 70for.body: ; preds = %for.cond 71 %idxprom = sext i32 %i.0 to i64 72 %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom 73 %0 = load i32 addrspace(1)* %arrayidx, align 4 74 %mul = mul nsw i32 %0, 3 75 %idxprom1 = sext i32 %i.0 to i64 76 %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1 77 store i32 %mul, i32* %arrayidx2, align 4 78 %inc = add nsw i32 %i.0, 1 79 br label %for.cond 80 81for.end: ; preds = %for.cond 82 ret void 83} 84 85; Swapped arguments should be the same 86define void @bar1(i32 addrspace(1)* %a, i32* %b, i32 %n) #0 { 87; CHECK-LABEL: @bar1( 88; CHECK-NOT: <4 x i32> 89; CHECK: ret 90 91entry: 92 br label %for.cond 93 94for.cond: ; preds = %for.body, %entry 95 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 96 %cmp = icmp slt i32 %i.0, %n 97 br i1 %cmp, label %for.body, label %for.end 98 99for.body: ; preds = %for.cond 100 %idxprom = sext i32 %i.0 to i64 101 %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom 102 %0 = load i32* %arrayidx, align 4 103 %mul = mul nsw i32 %0, 3 104 %idxprom1 = sext i32 %i.0 to i64 105 %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1 106 store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4 107 %inc = add nsw i32 %i.0, 1 108 br label %for.cond 109 110for.end: ; preds = %for.cond 111 ret void 112} 113 114; We should still be able to vectorize with noalias even if the 115; address spaces are different. 116define void @bar2(i32* noalias %a, i32 addrspace(1)* noalias %b, i32 %n) #0 { 117; CHECK-LABEL: @bar2( 118; CHECK: <4 x i32> 119; CHECK: ret 120 121entry: 122 br label %for.cond 123 124for.cond: ; preds = %for.body, %entry 125 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 126 %cmp = icmp slt i32 %i.0, %n 127 br i1 %cmp, label %for.body, label %for.end 128 129for.body: ; preds = %for.cond 130 %idxprom = sext i32 %i.0 to i64 131 %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom 132 %0 = load i32 addrspace(1)* %arrayidx, align 4 133 %mul = mul nsw i32 %0, 3 134 %idxprom1 = sext i32 %i.0 to i64 135 %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1 136 store i32 %mul, i32* %arrayidx2, align 4 137 %inc = add nsw i32 %i.0, 1 138 br label %for.cond 139 140for.end: ; preds = %for.cond 141 ret void 142} 143 144; Store to identified global with different address space. This isn't 145; generally safe and shouldn't be vectorized. 146define void @arst0(i32* %b, i32 %n) #0 { 147; CHECK-LABEL: @arst0( 148; CHECK-NOT: <4 x i32> 149; CHECK: ret 150 151entry: 152 br label %for.cond 153 154for.cond: ; preds = %for.body, %entry 155 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 156 %cmp = icmp slt i32 %i.0, %n 157 br i1 %cmp, label %for.body, label %for.end 158 159for.body: ; preds = %for.cond 160 %idxprom = sext i32 %i.0 to i64 161 %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom 162 %0 = load i32* %arrayidx, align 4 163 %mul = mul nsw i32 %0, 3 164 %idxprom1 = sext i32 %i.0 to i64 165 %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1 166 store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4 167 %inc = add nsw i32 %i.0, 1 168 br label %for.cond 169 170for.end: ; preds = %for.cond 171 ret void 172} 173 174 175; Load from identified global with different address space. 176; This isn't generally safe and shouldn't be vectorized. 177define void @arst1(i32* %b, i32 %n) #0 { 178; CHECK-LABEL: @arst1( 179; CHECK-NOT: <4 x i32> 180; CHECK: ret 181 182entry: 183 br label %for.cond 184 185for.cond: ; preds = %for.body, %entry 186 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 187 %cmp = icmp slt i32 %i.0, %n 188 br i1 %cmp, label %for.body, label %for.end 189 190for.body: ; preds = %for.cond 191 %idxprom = sext i32 %i.0 to i64 192 %arrayidx = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom 193 %0 = load i32 addrspace(1)* %arrayidx, align 4 194 %mul = mul nsw i32 %0, 3 195 %idxprom1 = sext i32 %i.0 to i64 196 %arrayidx2 = getelementptr inbounds i32* %b, i64 %idxprom1 197 store i32 %mul, i32* %arrayidx2, align 4 198 %inc = add nsw i32 %i.0, 1 199 br label %for.cond 200 201for.end: ; preds = %for.cond 202 ret void 203} 204 205; Read and write to 2 identified globals in different address 206; spaces. This should be vectorized. 207define void @aoeu(i32 %n) #0 { 208; CHECK-LABEL: @aoeu( 209; CHECK: <4 x i32> 210; CHECK: ret 211 212entry: 213 br label %for.cond 214 215for.cond: ; preds = %for.body, %entry 216 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 217 %cmp = icmp slt i32 %i.0, %n 218 br i1 %cmp, label %for.body, label %for.end 219 220for.body: ; preds = %for.cond 221 %idxprom = sext i32 %i.0 to i64 222 %arrayidx = getelementptr inbounds [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom 223 %0 = load i32 addrspace(2)* %arrayidx, align 4 224 %mul = mul nsw i32 %0, 3 225 %idxprom1 = sext i32 %i.0 to i64 226 %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1 227 store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4 228 %inc = add nsw i32 %i.0, 1 229 br label %for.cond 230 231for.end: ; preds = %for.cond 232 ret void 233} 234 235attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 236