1; RUN: opt < %s -O1 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s 2 3target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" 4 5%struct.anon = type { [100 x i32], i32, [100 x i32] } 6%struct.anon.0 = type { [100 x [100 x i32]], i32, [100 x [100 x i32]] } 7 8@Foo = common global %struct.anon zeroinitializer, align 4 9@Bar = common global %struct.anon.0 zeroinitializer, align 4 10 11@PB = external global i32* 12@PA = external global i32* 13 14 15;; === First, the tests that should always vectorize, wither statically or by adding run-time checks === 16 17 18; /// Different objects, positive induction, constant distance 19; int noAlias01 (int a) { 20; int i; 21; for (i=0; i<SIZE; i++) 22; Foo.A[i] = Foo.B[i] + a; 23; return Foo.A[a]; 24; } 25; CHECK-LABEL: define i32 @noAlias01( 26; CHECK: add nsw <4 x i32> 27; CHECK: ret 28 29define i32 @noAlias01(i32 %a) nounwind { 30entry: 31 %a.addr = alloca i32, align 4 32 %i = alloca i32, align 4 33 store i32 %a, i32* %a.addr, align 4 34 store i32 0, i32* %i, align 4 35 br label %for.cond 36 37for.cond: ; preds = %for.inc, %entry 38 %0 = load i32* %i, align 4 39 %cmp = icmp slt i32 %0, 100 40 br i1 %cmp, label %for.body, label %for.end 41 42for.body: ; preds = %for.cond 43 %1 = load i32* %i, align 4 44 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1 45 %2 = load i32* %arrayidx, align 4 46 %3 = load i32* %a.addr, align 4 47 %add = add nsw i32 %2, %3 48 %4 = load i32* %i, align 4 49 %arrayidx1 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4 50 store i32 %add, i32* %arrayidx1, align 4 51 br label %for.inc 52 53for.inc: ; preds = %for.body 54 %5 = load i32* %i, align 4 55 %inc = add nsw i32 %5, 1 56 store i32 %inc, i32* %i, align 4 57 br label %for.cond 58 59for.end: ; preds = %for.cond 60 %6 = load i32* %a.addr, align 4 61 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 62 %7 = load i32* %arrayidx2, align 4 63 ret i32 %7 64} 65 66; /// Different objects, positive induction with widening slide 67; int noAlias02 (int a) { 68; int i; 69; for (i=0; i<SIZE-10; i++) 70; Foo.A[i] = Foo.B[i+10] + a; 71; return Foo.A[a]; 72; } 73; CHECK-LABEL: define i32 @noAlias02( 74; CHECK: add nsw <4 x i32> 75; CHECK: ret 76 77define i32 @noAlias02(i32 %a) { 78entry: 79 %a.addr = alloca i32, align 4 80 %i = alloca i32, align 4 81 store i32 %a, i32* %a.addr, align 4 82 store i32 0, i32* %i, align 4 83 br label %for.cond 84 85for.cond: ; preds = %for.inc, %entry 86 %0 = load i32* %i, align 4 87 %cmp = icmp slt i32 %0, 90 88 br i1 %cmp, label %for.body, label %for.end 89 90for.body: ; preds = %for.cond 91 %1 = load i32* %i, align 4 92 %add = add nsw i32 %1, 10 93 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %add 94 %2 = load i32* %arrayidx, align 4 95 %3 = load i32* %a.addr, align 4 96 %add1 = add nsw i32 %2, %3 97 %4 = load i32* %i, align 4 98 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4 99 store i32 %add1, i32* %arrayidx2, align 4 100 br label %for.inc 101 102for.inc: ; preds = %for.body 103 %5 = load i32* %i, align 4 104 %inc = add nsw i32 %5, 1 105 store i32 %inc, i32* %i, align 4 106 br label %for.cond 107 108for.end: ; preds = %for.cond 109 %6 = load i32* %a.addr, align 4 110 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 111 %7 = load i32* %arrayidx3, align 4 112 ret i32 %7 113} 114 115; /// Different objects, positive induction with shortening slide 116; int noAlias03 (int a) { 117; int i; 118; for (i=0; i<SIZE; i++) 119; Foo.A[i+10] = Foo.B[i] + a; 120; return Foo.A[a]; 121; } 122; CHECK-LABEL: define i32 @noAlias03( 123; CHECK: add nsw <4 x i32> 124; CHECK: ret 125 126define i32 @noAlias03(i32 %a) { 127entry: 128 %a.addr = alloca i32, align 4 129 %i = alloca i32, align 4 130 store i32 %a, i32* %a.addr, align 4 131 store i32 0, i32* %i, align 4 132 br label %for.cond 133 134for.cond: ; preds = %for.inc, %entry 135 %0 = load i32* %i, align 4 136 %cmp = icmp slt i32 %0, 100 137 br i1 %cmp, label %for.body, label %for.end 138 139for.body: ; preds = %for.cond 140 %1 = load i32* %i, align 4 141 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1 142 %2 = load i32* %arrayidx, align 4 143 %3 = load i32* %a.addr, align 4 144 %add = add nsw i32 %2, %3 145 %4 = load i32* %i, align 4 146 %add1 = add nsw i32 %4, 10 147 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add1 148 store i32 %add, i32* %arrayidx2, align 4 149 br label %for.inc 150 151for.inc: ; preds = %for.body 152 %5 = load i32* %i, align 4 153 %inc = add nsw i32 %5, 1 154 store i32 %inc, i32* %i, align 4 155 br label %for.cond 156 157for.end: ; preds = %for.cond 158 %6 = load i32* %a.addr, align 4 159 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 160 %7 = load i32* %arrayidx3, align 4 161 ret i32 %7 162} 163 164; /// Pointer access, positive stride, run-time check added 165; int noAlias04 (int a) { 166; int i; 167; for (i=0; i<SIZE; i++) 168; *(PA+i) = *(PB+i) + a; 169; return *(PA+a); 170; } 171; CHECK-LABEL: define i32 @noAlias04( 172; CHECK-NOT: add nsw <4 x i32> 173; CHECK: ret 174; 175; TODO: This test vectorizes (with run-time check) on real targets with -O3) 176; Check why it's not being vectorized even when forcing vectorization 177 178define i32 @noAlias04(i32 %a) #0 { 179entry: 180 %a.addr = alloca i32, align 4 181 %i = alloca i32, align 4 182 store i32 %a, i32* %a.addr, align 4 183 store i32 0, i32* %i, align 4 184 br label %for.cond 185 186for.cond: ; preds = %for.inc, %entry 187 %0 = load i32* %i, align 4 188 %cmp = icmp slt i32 %0, 100 189 br i1 %cmp, label %for.body, label %for.end 190 191for.body: ; preds = %for.cond 192 %1 = load i32** @PB, align 4 193 %2 = load i32* %i, align 4 194 %add.ptr = getelementptr inbounds i32* %1, i32 %2 195 %3 = load i32* %add.ptr, align 4 196 %4 = load i32* %a.addr, align 4 197 %add = add nsw i32 %3, %4 198 %5 = load i32** @PA, align 4 199 %6 = load i32* %i, align 4 200 %add.ptr1 = getelementptr inbounds i32* %5, i32 %6 201 store i32 %add, i32* %add.ptr1, align 4 202 br label %for.inc 203 204for.inc: ; preds = %for.body 205 %7 = load i32* %i, align 4 206 %inc = add nsw i32 %7, 1 207 store i32 %inc, i32* %i, align 4 208 br label %for.cond 209 210for.end: ; preds = %for.cond 211 %8 = load i32** @PA, align 4 212 %9 = load i32* %a.addr, align 4 213 %add.ptr2 = getelementptr inbounds i32* %8, i32 %9 214 %10 = load i32* %add.ptr2, align 4 215 ret i32 %10 216} 217 218; /// Different objects, positive induction, multi-array 219; int noAlias05 (int a) { 220; int i, N=10; 221; for (i=0; i<SIZE; i++) 222; Bar.A[N][i] = Bar.B[N][i] + a; 223; return Bar.A[N][a]; 224; } 225; CHECK-LABEL: define i32 @noAlias05( 226; CHECK: add nsw <4 x i32> 227; CHECK: ret 228 229define i32 @noAlias05(i32 %a) #0 { 230entry: 231 %a.addr = alloca i32, align 4 232 %i = alloca i32, align 4 233 %N = alloca i32, align 4 234 store i32 %a, i32* %a.addr, align 4 235 store i32 10, i32* %N, align 4 236 store i32 0, i32* %i, align 4 237 br label %for.cond 238 239for.cond: ; preds = %for.inc, %entry 240 %0 = load i32* %i, align 4 241 %cmp = icmp slt i32 %0, 100 242 br i1 %cmp, label %for.body, label %for.end 243 244for.body: ; preds = %for.cond 245 %1 = load i32* %i, align 4 246 %2 = load i32* %N, align 4 247 %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2 248 %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1 249 %3 = load i32* %arrayidx1, align 4 250 %4 = load i32* %a.addr, align 4 251 %add = add nsw i32 %3, %4 252 %5 = load i32* %i, align 4 253 %6 = load i32* %N, align 4 254 %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6 255 %arrayidx3 = getelementptr inbounds [100 x i32]* %arrayidx2, i32 0, i32 %5 256 store i32 %add, i32* %arrayidx3, align 4 257 br label %for.inc 258 259for.inc: ; preds = %for.body 260 %7 = load i32* %i, align 4 261 %inc = add nsw i32 %7, 1 262 store i32 %inc, i32* %i, align 4 263 br label %for.cond 264 265for.end: ; preds = %for.cond 266 %8 = load i32* %a.addr, align 4 267 %9 = load i32* %N, align 4 268 %arrayidx4 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9 269 %arrayidx5 = getelementptr inbounds [100 x i32]* %arrayidx4, i32 0, i32 %8 270 %10 = load i32* %arrayidx5, align 4 271 ret i32 %10 272} 273 274; /// Same objects, positive induction, multi-array, different sub-elements 275; int noAlias06 (int a) { 276; int i, N=10; 277; for (i=0; i<SIZE; i++) 278; Bar.A[N][i] = Bar.A[N+1][i] + a; 279; return Bar.A[N][a]; 280; } 281; CHECK-LABEL: define i32 @noAlias06( 282; CHECK: add nsw <4 x i32> 283; CHECK: ret 284 285define i32 @noAlias06(i32 %a) #0 { 286entry: 287 %a.addr = alloca i32, align 4 288 %i = alloca i32, align 4 289 %N = alloca i32, align 4 290 store i32 %a, i32* %a.addr, align 4 291 store i32 10, i32* %N, align 4 292 store i32 0, i32* %i, align 4 293 br label %for.cond 294 295for.cond: ; preds = %for.inc, %entry 296 %0 = load i32* %i, align 4 297 %cmp = icmp slt i32 %0, 100 298 br i1 %cmp, label %for.body, label %for.end 299 300for.body: ; preds = %for.cond 301 %1 = load i32* %i, align 4 302 %2 = load i32* %N, align 4 303 %add = add nsw i32 %2, 1 304 %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add 305 %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1 306 %3 = load i32* %arrayidx1, align 4 307 %4 = load i32* %a.addr, align 4 308 %add2 = add nsw i32 %3, %4 309 %5 = load i32* %i, align 4 310 %6 = load i32* %N, align 4 311 %arrayidx3 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6 312 %arrayidx4 = getelementptr inbounds [100 x i32]* %arrayidx3, i32 0, i32 %5 313 store i32 %add2, i32* %arrayidx4, align 4 314 br label %for.inc 315 316for.inc: ; preds = %for.body 317 %7 = load i32* %i, align 4 318 %inc = add nsw i32 %7, 1 319 store i32 %inc, i32* %i, align 4 320 br label %for.cond 321 322for.end: ; preds = %for.cond 323 %8 = load i32* %a.addr, align 4 324 %9 = load i32* %N, align 4 325 %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9 326 %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %8 327 %10 = load i32* %arrayidx6, align 4 328 ret i32 %10 329} 330 331; /// Different objects, negative induction, constant distance 332; int noAlias07 (int a) { 333; int i; 334; for (i=0; i<SIZE; i++) 335; Foo.A[SIZE-i-1] = Foo.B[SIZE-i-1] + a; 336; return Foo.A[a]; 337; } 338; CHECK-LABEL: define i32 @noAlias07( 339; CHECK: store <4 x i32> 340; CHECK: ret 341define i32 @noAlias07(i32 %a) #0 { 342entry: 343 %a.addr = alloca i32, align 4 344 %i = alloca i32, align 4 345 store i32 %a, i32* %a.addr, align 4 346 store i32 0, i32* %i, align 4 347 br label %for.cond 348 349for.cond: ; preds = %for.inc, %entry 350 %0 = load i32* %i, align 4 351 %cmp = icmp slt i32 %0, 100 352 br i1 %cmp, label %for.body, label %for.end 353 354for.body: ; preds = %for.cond 355 %1 = load i32* %i, align 4 356 %sub = sub nsw i32 100, %1 357 %sub1 = sub nsw i32 %sub, 1 358 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 359 %2 = load i32* %arrayidx, align 4 360 %3 = load i32* %a.addr, align 4 361 %add = add nsw i32 %2, %3 362 %4 = load i32* %i, align 4 363 %sub2 = sub nsw i32 100, %4 364 %sub3 = sub nsw i32 %sub2, 1 365 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3 366 store i32 %add, i32* %arrayidx4, align 4 367 br label %for.inc 368 369for.inc: ; preds = %for.body 370 %5 = load i32* %i, align 4 371 %inc = add nsw i32 %5, 1 372 store i32 %inc, i32* %i, align 4 373 br label %for.cond 374 375for.end: ; preds = %for.cond 376 %6 = load i32* %a.addr, align 4 377 %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 378 %7 = load i32* %arrayidx5, align 4 379 ret i32 %7 380} 381 382; /// Different objects, negative induction, shortening slide 383; int noAlias08 (int a) { 384; int i; 385; for (i=0; i<SIZE-10; i++) 386; Foo.A[SIZE-i-1] = Foo.B[SIZE-i-10] + a; 387; return Foo.A[a]; 388; } 389; CHECK-LABEL: define i32 @noAlias08( 390; CHECK: sub <4 x i32> 391; CHECK: ret 392 393define i32 @noAlias08(i32 %a) #0 { 394entry: 395 %a.addr = alloca i32, align 4 396 %i = alloca i32, align 4 397 store i32 %a, i32* %a.addr, align 4 398 store i32 0, i32* %i, align 4 399 br label %for.cond 400 401for.cond: ; preds = %for.inc, %entry 402 %0 = load i32* %i, align 4 403 %cmp = icmp slt i32 %0, 90 404 br i1 %cmp, label %for.body, label %for.end 405 406for.body: ; preds = %for.cond 407 %1 = load i32* %i, align 4 408 %sub = sub nsw i32 100, %1 409 %sub1 = sub nsw i32 %sub, 10 410 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 411 %2 = load i32* %arrayidx, align 4 412 %3 = load i32* %a.addr, align 4 413 %add = add nsw i32 %2, %3 414 %4 = load i32* %i, align 4 415 %sub2 = sub nsw i32 100, %4 416 %sub3 = sub nsw i32 %sub2, 1 417 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3 418 store i32 %add, i32* %arrayidx4, align 4 419 br label %for.inc 420 421for.inc: ; preds = %for.body 422 %5 = load i32* %i, align 4 423 %inc = add nsw i32 %5, 1 424 store i32 %inc, i32* %i, align 4 425 br label %for.cond 426 427for.end: ; preds = %for.cond 428 %6 = load i32* %a.addr, align 4 429 %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 430 %7 = load i32* %arrayidx5, align 4 431 ret i32 %7 432} 433 434; /// Different objects, negative induction, widening slide 435; int noAlias09 (int a) { 436; int i; 437; for (i=0; i<SIZE; i++) 438; Foo.A[SIZE-i-10] = Foo.B[SIZE-i-1] + a; 439; return Foo.A[a]; 440; } 441; CHECK-LABEL: define i32 @noAlias09( 442; CHECK: sub <4 x i32> 443; CHECK: ret 444 445define i32 @noAlias09(i32 %a) #0 { 446entry: 447 %a.addr = alloca i32, align 4 448 %i = alloca i32, align 4 449 store i32 %a, i32* %a.addr, align 4 450 store i32 0, i32* %i, align 4 451 br label %for.cond 452 453for.cond: ; preds = %for.inc, %entry 454 %0 = load i32* %i, align 4 455 %cmp = icmp slt i32 %0, 100 456 br i1 %cmp, label %for.body, label %for.end 457 458for.body: ; preds = %for.cond 459 %1 = load i32* %i, align 4 460 %sub = sub nsw i32 100, %1 461 %sub1 = sub nsw i32 %sub, 1 462 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 463 %2 = load i32* %arrayidx, align 4 464 %3 = load i32* %a.addr, align 4 465 %add = add nsw i32 %2, %3 466 %4 = load i32* %i, align 4 467 %sub2 = sub nsw i32 100, %4 468 %sub3 = sub nsw i32 %sub2, 10 469 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3 470 store i32 %add, i32* %arrayidx4, align 4 471 br label %for.inc 472 473for.inc: ; preds = %for.body 474 %5 = load i32* %i, align 4 475 %inc = add nsw i32 %5, 1 476 store i32 %inc, i32* %i, align 4 477 br label %for.cond 478 479for.end: ; preds = %for.cond 480 %6 = load i32* %a.addr, align 4 481 %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 482 %7 = load i32* %arrayidx5, align 4 483 ret i32 %7 484} 485 486; /// Pointer access, negative stride, run-time check added 487; int noAlias10 (int a) { 488; int i; 489; for (i=0; i<SIZE; i++) 490; *(PA+SIZE-i-1) = *(PB+SIZE-i-1) + a; 491; return *(PA+a); 492; } 493; CHECK-LABEL: define i32 @noAlias10( 494; CHECK-NOT: sub {{.*}} <4 x i32> 495; CHECK: ret 496; 497; TODO: This test vectorizes (with run-time check) on real targets with -O3) 498; Check why it's not being vectorized even when forcing vectorization 499 500define i32 @noAlias10(i32 %a) #0 { 501entry: 502 %a.addr = alloca i32, align 4 503 %i = alloca i32, align 4 504 store i32 %a, i32* %a.addr, align 4 505 store i32 0, i32* %i, align 4 506 br label %for.cond 507 508for.cond: ; preds = %for.inc, %entry 509 %0 = load i32* %i, align 4 510 %cmp = icmp slt i32 %0, 100 511 br i1 %cmp, label %for.body, label %for.end 512 513for.body: ; preds = %for.cond 514 %1 = load i32** @PB, align 4 515 %add.ptr = getelementptr inbounds i32* %1, i32 100 516 %2 = load i32* %i, align 4 517 %idx.neg = sub i32 0, %2 518 %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg 519 %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1 520 %3 = load i32* %add.ptr2, align 4 521 %4 = load i32* %a.addr, align 4 522 %add = add nsw i32 %3, %4 523 %5 = load i32** @PA, align 4 524 %add.ptr3 = getelementptr inbounds i32* %5, i32 100 525 %6 = load i32* %i, align 4 526 %idx.neg4 = sub i32 0, %6 527 %add.ptr5 = getelementptr inbounds i32* %add.ptr3, i32 %idx.neg4 528 %add.ptr6 = getelementptr inbounds i32* %add.ptr5, i32 -1 529 store i32 %add, i32* %add.ptr6, align 4 530 br label %for.inc 531 532for.inc: ; preds = %for.body 533 %7 = load i32* %i, align 4 534 %inc = add nsw i32 %7, 1 535 store i32 %inc, i32* %i, align 4 536 br label %for.cond 537 538for.end: ; preds = %for.cond 539 %8 = load i32** @PA, align 4 540 %9 = load i32* %a.addr, align 4 541 %add.ptr7 = getelementptr inbounds i32* %8, i32 %9 542 %10 = load i32* %add.ptr7, align 4 543 ret i32 %10 544} 545 546; /// Different objects, negative induction, multi-array 547; int noAlias11 (int a) { 548; int i, N=10; 549; for (i=0; i<SIZE; i++) 550; Bar.A[N][SIZE-i-1] = Bar.B[N][SIZE-i-1] + a; 551; return Bar.A[N][a]; 552; } 553; CHECK-LABEL: define i32 @noAlias11( 554; CHECK: store <4 x i32> 555; CHECK: ret 556 557define i32 @noAlias11(i32 %a) #0 { 558entry: 559 %a.addr = alloca i32, align 4 560 %i = alloca i32, align 4 561 %N = alloca i32, align 4 562 store i32 %a, i32* %a.addr, align 4 563 store i32 10, i32* %N, align 4 564 store i32 0, i32* %i, align 4 565 br label %for.cond 566 567for.cond: ; preds = %for.inc, %entry 568 %0 = load i32* %i, align 4 569 %cmp = icmp slt i32 %0, 100 570 br i1 %cmp, label %for.body, label %for.end 571 572for.body: ; preds = %for.cond 573 %1 = load i32* %i, align 4 574 %sub = sub nsw i32 100, %1 575 %sub1 = sub nsw i32 %sub, 1 576 %2 = load i32* %N, align 4 577 %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2 578 %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1 579 %3 = load i32* %arrayidx2, align 4 580 %4 = load i32* %a.addr, align 4 581 %add = add nsw i32 %3, %4 582 %5 = load i32* %i, align 4 583 %sub3 = sub nsw i32 100, %5 584 %sub4 = sub nsw i32 %sub3, 1 585 %6 = load i32* %N, align 4 586 %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6 587 %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %sub4 588 store i32 %add, i32* %arrayidx6, align 4 589 br label %for.inc 590 591for.inc: ; preds = %for.body 592 %7 = load i32* %i, align 4 593 %inc = add nsw i32 %7, 1 594 store i32 %inc, i32* %i, align 4 595 br label %for.cond 596 597for.end: ; preds = %for.cond 598 %8 = load i32* %a.addr, align 4 599 %9 = load i32* %N, align 4 600 %arrayidx7 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9 601 %arrayidx8 = getelementptr inbounds [100 x i32]* %arrayidx7, i32 0, i32 %8 602 %10 = load i32* %arrayidx8, align 4 603 ret i32 %10 604} 605 606; /// Same objects, negative induction, multi-array, different sub-elements 607; int noAlias12 (int a) { 608; int i, N=10; 609; for (i=0; i<SIZE; i++) 610; Bar.A[N][SIZE-i-1] = Bar.A[N+1][SIZE-i-1] + a; 611; return Bar.A[N][a]; 612; } 613; CHECK-LABEL: define i32 @noAlias12( 614; CHECK: store <4 x i32> 615; CHECK: ret 616 617define i32 @noAlias12(i32 %a) #0 { 618entry: 619 %a.addr = alloca i32, align 4 620 %i = alloca i32, align 4 621 %N = alloca i32, align 4 622 store i32 %a, i32* %a.addr, align 4 623 store i32 10, i32* %N, align 4 624 store i32 0, i32* %i, align 4 625 br label %for.cond 626 627for.cond: ; preds = %for.inc, %entry 628 %0 = load i32* %i, align 4 629 %cmp = icmp slt i32 %0, 100 630 br i1 %cmp, label %for.body, label %for.end 631 632for.body: ; preds = %for.cond 633 %1 = load i32* %i, align 4 634 %sub = sub nsw i32 100, %1 635 %sub1 = sub nsw i32 %sub, 1 636 %2 = load i32* %N, align 4 637 %add = add nsw i32 %2, 1 638 %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add 639 %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1 640 %3 = load i32* %arrayidx2, align 4 641 %4 = load i32* %a.addr, align 4 642 %add3 = add nsw i32 %3, %4 643 %5 = load i32* %i, align 4 644 %sub4 = sub nsw i32 100, %5 645 %sub5 = sub nsw i32 %sub4, 1 646 %6 = load i32* %N, align 4 647 %arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6 648 %arrayidx7 = getelementptr inbounds [100 x i32]* %arrayidx6, i32 0, i32 %sub5 649 store i32 %add3, i32* %arrayidx7, align 4 650 br label %for.inc 651 652for.inc: ; preds = %for.body 653 %7 = load i32* %i, align 4 654 %inc = add nsw i32 %7, 1 655 store i32 %inc, i32* %i, align 4 656 br label %for.cond 657 658for.end: ; preds = %for.cond 659 %8 = load i32* %a.addr, align 4 660 %9 = load i32* %N, align 4 661 %arrayidx8 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9 662 %arrayidx9 = getelementptr inbounds [100 x i32]* %arrayidx8, i32 0, i32 %8 663 %10 = load i32* %arrayidx9, align 4 664 ret i32 %10 665} 666 667; /// Same objects, positive induction, constant distance, just enough for vector size 668; int noAlias13 (int a) { 669; int i; 670; for (i=0; i<SIZE; i++) 671; Foo.A[i] = Foo.A[i+4] + a; 672; return Foo.A[a]; 673; } 674; CHECK-LABEL: define i32 @noAlias13( 675; CHECK: add nsw <4 x i32> 676; CHECK: ret 677 678define i32 @noAlias13(i32 %a) #0 { 679entry: 680 %a.addr = alloca i32, align 4 681 %i = alloca i32, align 4 682 store i32 %a, i32* %a.addr, align 4 683 store i32 0, i32* %i, align 4 684 br label %for.cond 685 686for.cond: ; preds = %for.inc, %entry 687 %0 = load i32* %i, align 4 688 %cmp = icmp slt i32 %0, 100 689 br i1 %cmp, label %for.body, label %for.end 690 691for.body: ; preds = %for.cond 692 %1 = load i32* %i, align 4 693 %add = add nsw i32 %1, 4 694 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add 695 %2 = load i32* %arrayidx, align 4 696 %3 = load i32* %a.addr, align 4 697 %add1 = add nsw i32 %2, %3 698 %4 = load i32* %i, align 4 699 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4 700 store i32 %add1, i32* %arrayidx2, align 4 701 br label %for.inc 702 703for.inc: ; preds = %for.body 704 %5 = load i32* %i, align 4 705 %inc = add nsw i32 %5, 1 706 store i32 %inc, i32* %i, align 4 707 br label %for.cond 708 709for.end: ; preds = %for.cond 710 %6 = load i32* %a.addr, align 4 711 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 712 %7 = load i32* %arrayidx3, align 4 713 ret i32 %7 714} 715 716; /// Same objects, negative induction, constant distance, just enough for vector size 717; int noAlias14 (int a) { 718; int i; 719; for (i=0; i<SIZE; i++) 720; Foo.A[SIZE-i-1] = Foo.A[SIZE-i-5] + a; 721; return Foo.A[a]; 722; } 723; CHECK-LABEL: define i32 @noAlias14( 724; CHECK: sub <4 x i32> 725; CHECK: ret 726 727define i32 @noAlias14(i32 %a) #0 { 728entry: 729 %a.addr = alloca i32, align 4 730 %i = alloca i32, align 4 731 store i32 %a, i32* %a.addr, align 4 732 store i32 0, i32* %i, align 4 733 br label %for.cond 734 735for.cond: ; preds = %for.inc, %entry 736 %0 = load i32* %i, align 4 737 %cmp = icmp slt i32 %0, 100 738 br i1 %cmp, label %for.body, label %for.end 739 740for.body: ; preds = %for.cond 741 %1 = load i32* %i, align 4 742 %sub = sub nsw i32 100, %1 743 %sub1 = sub nsw i32 %sub, 5 744 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1 745 %2 = load i32* %arrayidx, align 4 746 %3 = load i32* %a.addr, align 4 747 %add = add nsw i32 %2, %3 748 %4 = load i32* %i, align 4 749 %sub2 = sub nsw i32 100, %4 750 %sub3 = sub nsw i32 %sub2, 1 751 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3 752 store i32 %add, i32* %arrayidx4, align 4 753 br label %for.inc 754 755for.inc: ; preds = %for.body 756 %5 = load i32* %i, align 4 757 %inc = add nsw i32 %5, 1 758 store i32 %inc, i32* %i, align 4 759 br label %for.cond 760 761for.end: ; preds = %for.cond 762 %6 = load i32* %a.addr, align 4 763 %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 764 %7 = load i32* %arrayidx5, align 4 765 ret i32 %7 766} 767 768 769;; === Now, the tests that we could vectorize with induction changes or run-time checks === 770 771 772; /// Different objects, swapped induction, alias at the end 773; int mayAlias01 (int a) { 774; int i; 775; for (i=0; i<SIZE; i++) 776; Foo.A[i] = Foo.B[SIZE-i-1] + a; 777; return Foo.A[a]; 778; } 779; CHECK-LABEL: define i32 @mayAlias01( 780; CHECK-NOT: add nsw <4 x i32> 781; CHECK: ret 782 783define i32 @mayAlias01(i32 %a) nounwind { 784entry: 785 %a.addr = alloca i32, align 4 786 %i = alloca i32, align 4 787 store i32 %a, i32* %a.addr, align 4 788 store i32 0, i32* %i, align 4 789 br label %for.cond 790 791for.cond: ; preds = %for.inc, %entry 792 %0 = load i32* %i, align 4 793 %cmp = icmp slt i32 %0, 100 794 br i1 %cmp, label %for.body, label %for.end 795 796for.body: ; preds = %for.cond 797 %1 = load i32* %i, align 4 798 %sub = sub nsw i32 100, %1 799 %sub1 = sub nsw i32 %sub, 1 800 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 801 %2 = load i32* %arrayidx, align 4 802 %3 = load i32* %a.addr, align 4 803 %add = add nsw i32 %2, %3 804 %4 = load i32* %i, align 4 805 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4 806 store i32 %add, i32* %arrayidx2, align 4 807 br label %for.inc 808 809for.inc: ; preds = %for.body 810 %5 = load i32* %i, align 4 811 %inc = add nsw i32 %5, 1 812 store i32 %inc, i32* %i, align 4 813 br label %for.cond 814 815for.end: ; preds = %for.cond 816 %6 = load i32* %a.addr, align 4 817 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 818 %7 = load i32* %arrayidx3, align 4 819 ret i32 %7 820} 821 822; /// Different objects, swapped induction, alias at the beginning 823; int mayAlias02 (int a) { 824; int i; 825; for (i=0; i<SIZE; i++) 826; Foo.A[SIZE-i-1] = Foo.B[i] + a; 827; return Foo.A[a]; 828; } 829; CHECK-LABEL: define i32 @mayAlias02( 830; CHECK-NOT: add nsw <4 x i32> 831; CHECK: ret 832 833define i32 @mayAlias02(i32 %a) nounwind { 834entry: 835 %a.addr = alloca i32, align 4 836 %i = alloca i32, align 4 837 store i32 %a, i32* %a.addr, align 4 838 store i32 0, i32* %i, align 4 839 br label %for.cond 840 841for.cond: ; preds = %for.inc, %entry 842 %0 = load i32* %i, align 4 843 %cmp = icmp slt i32 %0, 100 844 br i1 %cmp, label %for.body, label %for.end 845 846for.body: ; preds = %for.cond 847 %1 = load i32* %i, align 4 848 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1 849 %2 = load i32* %arrayidx, align 4 850 %3 = load i32* %a.addr, align 4 851 %add = add nsw i32 %2, %3 852 %4 = load i32* %i, align 4 853 %sub = sub nsw i32 100, %4 854 %sub1 = sub nsw i32 %sub, 1 855 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1 856 store i32 %add, i32* %arrayidx2, align 4 857 br label %for.inc 858 859for.inc: ; preds = %for.body 860 %5 = load i32* %i, align 4 861 %inc = add nsw i32 %5, 1 862 store i32 %inc, i32* %i, align 4 863 br label %for.cond 864 865for.end: ; preds = %for.cond 866 %6 = load i32* %a.addr, align 4 867 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 868 %7 = load i32* %arrayidx3, align 4 869 ret i32 %7 870} 871 872; /// Pointer access, run-time check added 873; int mayAlias03 (int a) { 874; int i; 875; for (i=0; i<SIZE; i++) 876; *(PA+i) = *(PB+SIZE-i-1) + a; 877; return *(PA+a); 878; } 879; CHECK-LABEL: define i32 @mayAlias03( 880; CHECK-NOT: add nsw <4 x i32> 881; CHECK: ret 882 883define i32 @mayAlias03(i32 %a) nounwind { 884entry: 885 %a.addr = alloca i32, align 4 886 %i = alloca i32, align 4 887 store i32 %a, i32* %a.addr, align 4 888 store i32 0, i32* %i, align 4 889 br label %for.cond 890 891for.cond: ; preds = %for.inc, %entry 892 %0 = load i32* %i, align 4 893 %cmp = icmp slt i32 %0, 100 894 br i1 %cmp, label %for.body, label %for.end 895 896for.body: ; preds = %for.cond 897 %1 = load i32** @PB, align 4 898 %add.ptr = getelementptr inbounds i32* %1, i32 100 899 %2 = load i32* %i, align 4 900 %idx.neg = sub i32 0, %2 901 %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg 902 %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1 903 %3 = load i32* %add.ptr2, align 4 904 %4 = load i32* %a.addr, align 4 905 %add = add nsw i32 %3, %4 906 %5 = load i32** @PA, align 4 907 %6 = load i32* %i, align 4 908 %add.ptr3 = getelementptr inbounds i32* %5, i32 %6 909 store i32 %add, i32* %add.ptr3, align 4 910 br label %for.inc 911 912for.inc: ; preds = %for.body 913 %7 = load i32* %i, align 4 914 %inc = add nsw i32 %7, 1 915 store i32 %inc, i32* %i, align 4 916 br label %for.cond 917 918for.end: ; preds = %for.cond 919 %8 = load i32** @PA, align 4 920 %9 = load i32* %a.addr, align 4 921 %add.ptr4 = getelementptr inbounds i32* %8, i32 %9 922 %10 = load i32* %add.ptr4, align 4 923 ret i32 %10 924} 925 926 927;; === Finally, the tests that should only vectorize with care (or if we ignore undefined behaviour at all) === 928 929 930; int mustAlias01 (int a) { 931; int i; 932; for (i=0; i<SIZE; i++) 933; Foo.A[i+10] = Foo.B[SIZE-i-1] + a; 934; return Foo.A[a]; 935; } 936; CHECK-LABEL: define i32 @mustAlias01( 937; CHECK-NOT: add nsw <4 x i32> 938; CHECK: ret 939 940define i32 @mustAlias01(i32 %a) nounwind { 941entry: 942 %a.addr = alloca i32, align 4 943 %i = alloca i32, align 4 944 store i32 %a, i32* %a.addr, align 4 945 store i32 0, i32* %i, align 4 946 br label %for.cond 947 948for.cond: ; preds = %for.inc, %entry 949 %0 = load i32* %i, align 4 950 %cmp = icmp slt i32 %0, 100 951 br i1 %cmp, label %for.body, label %for.end 952 953for.body: ; preds = %for.cond 954 %1 = load i32* %i, align 4 955 %sub = sub nsw i32 100, %1 956 %sub1 = sub nsw i32 %sub, 1 957 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 958 %2 = load i32* %arrayidx, align 4 959 %3 = load i32* %a.addr, align 4 960 %add = add nsw i32 %2, %3 961 %4 = load i32* %i, align 4 962 %add2 = add nsw i32 %4, 10 963 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2 964 store i32 %add, i32* %arrayidx3, align 4 965 br label %for.inc 966 967for.inc: ; preds = %for.body 968 %5 = load i32* %i, align 4 969 %inc = add nsw i32 %5, 1 970 store i32 %inc, i32* %i, align 4 971 br label %for.cond 972 973for.end: ; preds = %for.cond 974 %6 = load i32* %a.addr, align 4 975 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 976 %7 = load i32* %arrayidx4, align 4 977 ret i32 %7 978} 979 980; int mustAlias02 (int a) { 981; int i; 982; for (i=0; i<SIZE; i++) 983; Foo.A[i] = Foo.B[SIZE-i-10] + a; 984; return Foo.A[a]; 985; } 986; CHECK-LABEL: define i32 @mustAlias02( 987; CHECK-NOT: add nsw <4 x i32> 988; CHECK: ret 989 990define i32 @mustAlias02(i32 %a) nounwind { 991entry: 992 %a.addr = alloca i32, align 4 993 %i = alloca i32, align 4 994 store i32 %a, i32* %a.addr, align 4 995 store i32 0, i32* %i, align 4 996 br label %for.cond 997 998for.cond: ; preds = %for.inc, %entry 999 %0 = load i32* %i, align 4 1000 %cmp = icmp slt i32 %0, 100 1001 br i1 %cmp, label %for.body, label %for.end 1002 1003for.body: ; preds = %for.cond 1004 %1 = load i32* %i, align 4 1005 %sub = sub nsw i32 100, %1 1006 %sub1 = sub nsw i32 %sub, 10 1007 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 1008 %2 = load i32* %arrayidx, align 4 1009 %3 = load i32* %a.addr, align 4 1010 %add = add nsw i32 %2, %3 1011 %4 = load i32* %i, align 4 1012 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4 1013 store i32 %add, i32* %arrayidx2, align 4 1014 br label %for.inc 1015 1016for.inc: ; preds = %for.body 1017 %5 = load i32* %i, align 4 1018 %inc = add nsw i32 %5, 1 1019 store i32 %inc, i32* %i, align 4 1020 br label %for.cond 1021 1022for.end: ; preds = %for.cond 1023 %6 = load i32* %a.addr, align 4 1024 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 1025 %7 = load i32* %arrayidx3, align 4 1026 ret i32 %7 1027} 1028 1029; int mustAlias03 (int a) { 1030; int i; 1031; for (i=0; i<SIZE; i++) 1032; Foo.A[i+10] = Foo.B[SIZE-i-10] + a; 1033; return Foo.A[a]; 1034; } 1035; CHECK-LABEL: define i32 @mustAlias03( 1036; CHECK-NOT: add nsw <4 x i32> 1037; CHECK: ret 1038 1039define i32 @mustAlias03(i32 %a) nounwind { 1040entry: 1041 %a.addr = alloca i32, align 4 1042 %i = alloca i32, align 4 1043 store i32 %a, i32* %a.addr, align 4 1044 store i32 0, i32* %i, align 4 1045 br label %for.cond 1046 1047for.cond: ; preds = %for.inc, %entry 1048 %0 = load i32* %i, align 4 1049 %cmp = icmp slt i32 %0, 100 1050 br i1 %cmp, label %for.body, label %for.end 1051 1052for.body: ; preds = %for.cond 1053 %1 = load i32* %i, align 4 1054 %sub = sub nsw i32 100, %1 1055 %sub1 = sub nsw i32 %sub, 10 1056 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 1057 %2 = load i32* %arrayidx, align 4 1058 %3 = load i32* %a.addr, align 4 1059 %add = add nsw i32 %2, %3 1060 %4 = load i32* %i, align 4 1061 %add2 = add nsw i32 %4, 10 1062 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2 1063 store i32 %add, i32* %arrayidx3, align 4 1064 br label %for.inc 1065 1066for.inc: ; preds = %for.body 1067 %5 = load i32* %i, align 4 1068 %inc = add nsw i32 %5, 1 1069 store i32 %inc, i32* %i, align 4 1070 br label %for.cond 1071 1072for.end: ; preds = %for.cond 1073 %6 = load i32* %a.addr, align 4 1074 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 1075 %7 = load i32* %arrayidx4, align 4 1076 ret i32 %7 1077} 1078