1; RUN: opt %loadPolly -polly-opt-isl -analyze -polly-ast -polly-tile-sizes=256,16 < %s | FileCheck %s 2; RUN: opt %loadPolly -polly-opt-isl -analyze -polly-tiling=false -polly-ast -polly-tile-sizes=256,16 < %s | FileCheck %s --check-prefix=NOTILING 3 4; RUN: opt %loadPolly -polly-opt-isl -analyze \ 5; RUN: -polly-2nd-level-tiling -polly-ast \ 6; RUN: -polly-tile-sizes=256,16 \ 7; RUN: -polly-2nd-level-tile-sizes=16,8 < %s | \ 8; RUN: FileCheck %s --check-prefix=TWOLEVEL 9 10; RUN: opt %loadPolly -polly-opt-isl -analyze \ 11; RUN: -polly-2nd-level-tiling -polly-ast \ 12; RUN: -polly-tile-sizes=256,16 \ 13; RUN: -polly-register-tiling \ 14; RUN: -polly-2nd-level-tile-sizes=16,8 < %s | \ 15; RUN: FileCheck %s --check-prefix=TWO-PLUS-REGISTER 16 17; RUN: opt %loadPolly -polly-opt-isl -analyze \ 18; RUN: -polly-2nd-level-tiling -polly-ast \ 19; RUN: -polly-tile-sizes=256,16 \ 20; RUN: -polly-register-tiling -polly-register-tile-sizes=2,4 \ 21; RUN: -polly-vectorizer=polly \ 22; RUN: -polly-2nd-level-tile-sizes=16,8 < %s | \ 23; RUN: FileCheck %s --check-prefix=TWO-PLUS-REGISTER-PLUS-VECTORIZATION 24 25; CHECK: // 1st level tiling - Tiles 26; CHECK: for (int c0 = 0; c0 <= 3; c0 += 1) 27; CHECK: for (int c1 = 0; c1 <= 31; c1 += 1) 28; CHECK: // 1st level tiling - Points 29; CHECK: for (int c2 = 0; c2 <= 255; c2 += 1) 30; CHECK: for (int c3 = 0; c3 <= 15; c3 += 1) 31; CHECK: Stmt_for_body3(256 * c0 + c2, 16 * c1 + c3); 32 33; NOTILING: for (int c0 = 0; c0 <= 1023; c0 += 1) 34; NOTILING: for (int c1 = 0; c1 <= 511; c1 += 1) 35; NOTILING: Stmt_for_body3(c0, c1); 36 37 38; TWOLEVEL: // 1st level tiling - Tiles 39; TWOLEVEL: for (int c0 = 0; c0 <= 3; c0 += 1) 40; TWOLEVEL: for (int c1 = 0; c1 <= 31; c1 += 1) 41; TWOLEVEL: // 1st level tiling - Points 42; TWOLEVEL: // 2nd level tiling - Tiles 43; TWOLEVEL: for (int c2 = 0; c2 <= 15; c2 += 1) 44; TWOLEVEL: for (int c3 = 0; c3 <= 1; c3 += 1) 45; TWOLEVEL: // 2nd level tiling - Points 46; TWOLEVEL: for (int c4 = 0; c4 <= 15; c4 += 1) 47; TWOLEVEL: for (int c5 = 0; c5 <= 7; c5 += 1) 48; TWOLEVEL: Stmt_for_body3(256 * c0 + 16 * c2 + c4, 16 * c1 + 8 * c3 + c5); 49 50 51; TWO-PLUS-REGISTER: // 1st level tiling - Tiles 52; TWO-PLUS-REGISTER: for (int c0 = 0; c0 <= 3; c0 += 1) 53; TWO-PLUS-REGISTER: for (int c1 = 0; c1 <= 31; c1 += 1) 54; TWO-PLUS-REGISTER: // 1st level tiling - Points 55; TWO-PLUS-REGISTER: // 2nd level tiling - Tiles 56; TWO-PLUS-REGISTER: for (int c2 = 0; c2 <= 15; c2 += 1) 57; TWO-PLUS-REGISTER: for (int c3 = 0; c3 <= 1; c3 += 1) 58; TWO-PLUS-REGISTER: // 2nd level tiling - Points 59; TWO-PLUS-REGISTER: // Register tiling - Tiles 60; TWO-PLUS-REGISTER: for (int c4 = 0; c4 <= 7; c4 += 1) 61; TWO-PLUS-REGISTER: for (int c5 = 0; c5 <= 3; c5 += 1) 62; TWO-PLUS-REGISTER: // Register tiling - Points 63; TWO-PLUS-REGISTER: { 64; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 2 * c5); 65; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 2 * c5 + 1); 66; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 2 * c5); 67; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 2 * c5 + 1); 68; TWO-PLUS-REGISTER: } 69 70; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma known-parallel 71; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c0 = 0; c0 <= 3; c0 += 1) 72; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c1 = 0; c1 <= 31; c1 += 1) 73; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c2 = 0; c2 <= 15; c2 += 1) 74; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c3 = 0; c3 <= 1; c3 += 1) 75; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c4 = 0; c4 <= 7; c4 += 1) 76; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c5 = 0; c5 <= 1; c5 += 1) { 77; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: // SIMD 78; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1) 79; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 4 * c5 + c8); 80; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: // SIMD 81; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1) 82; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 4 * c5 + c8); 83; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: } 84 85target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" 86 87; Function Attrs: nounwind 88define void @rect([512 x i32]* %A) { 89entry: 90 br label %entry.split 91 92entry.split: ; preds = %entry 93 br label %for.body3.lr.ph 94 95for.body3.lr.ph: ; preds = %for.inc5, %entry.split 96 %i.0 = phi i32 [ 0, %entry.split ], [ %inc6, %for.inc5 ] 97 br label %for.body3 98 99for.body3: ; preds = %for.body3.lr.ph, %for.body3 100 %j.0 = phi i32 [ 0, %for.body3.lr.ph ], [ %inc, %for.body3 ] 101 %mul = mul nsw i32 %j.0, %i.0 102 %rem = srem i32 %mul, 42 103 %arrayidx4 = getelementptr inbounds [512 x i32], [512 x i32]* %A, i32 %i.0, i32 %j.0 104 store i32 %rem, i32* %arrayidx4, align 4 105 %inc = add nsw i32 %j.0, 1 106 %cmp2 = icmp slt i32 %inc, 512 107 br i1 %cmp2, label %for.body3, label %for.inc5 108 109for.inc5: ; preds = %for.body3 110 %inc6 = add nsw i32 %i.0, 1 111 %cmp = icmp slt i32 %inc6, 1024 112 br i1 %cmp, label %for.body3.lr.ph, label %for.end7 113 114for.end7: ; preds = %for.inc5 115 ret void 116} 117