1; RUN: opt %loadPolly -polly-import-jscop \ 2; RUN: -polly-codegen -S < %s | FileCheck %s 3; RUN: opt %loadPolly -polly-import-jscop \ 4; RUN: -polly-codegen -polly-import-jscop-postfix=pow2 \ 5; RUN: -S < %s | FileCheck %s -check-prefix=POW2 6; 7; void exprModDiv(float *A, float *B, float *C, long N, long p) { 8; for (long i = 0; i < N; i++) 9; C[i] += A[i] + B[i] + A[i] + B[i + p]; 10; } 11; 12; 13; This test case changes the access functions such that the resulting index 14; expressions are modulo or division operations. We test that the code we 15; generate takes advantage of knowledge about unsigned numerators. This is 16; useful as LLVM will translate urem and udiv operations with power-of-two 17; denominators to fast bitwise and or shift operations. 18 19; A[i % 127] 20; CHECK: %pexp.pdiv_r = urem i64 %polly.indvar, 127 21; CHECK: %polly.access.A9 = getelementptr float, float* %A, i64 %pexp.pdiv_r 22 23; A[floor(i / 127)] 24; 25; Note: without the floor, we would create a map i -> i/127, which only contains 26; values of i that are divisible by 127. All other values of i would not 27; be mapped to any value. However, to generate correct code we require 28; each value of i to indeed be mapped to a value. 29; 30; CHECK: %pexp.p_div_q = udiv i64 %polly.indvar, 127 31; CHECK: %polly.access.B10 = getelementptr float, float* %B, i64 %pexp.p_div_q 32 33; A[p % 128] 34; CHECK: %polly.access.A11 = getelementptr float, float* %A, i64 0 35 36; A[p / 127] 37; CHECK: %pexp.div = sdiv exact i64 %p, 127 38; CHECK: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div 39 40; A[i % 128] 41; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128 42; POW2: %polly.access.A9 = getelementptr float, float* %A, i64 %pexp.pdiv_r 43 44; A[floor(i / 128)] 45; POW2: %pexp.p_div_q = udiv i64 %polly.indvar, 128 46; POW2: %polly.access.B10 = getelementptr float, float* %B, i64 %pexp.p_div_q 47 48; A[p % 128] 49; POW2: %polly.access.A11 = getelementptr float, float* %A, i64 0 50 51; A[p / 128] 52; POW2: %pexp.div = sdiv exact i64 %p, 128 53; POW2: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div 54 55target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 56 57define void @exprModDiv(float* %A, float* %B, float* %C, i64 %N, i64 %p) { 58entry: 59 br label %for.cond 60 61for.cond: ; preds = %for.inc, %entry 62 %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] 63 %cmp = icmp slt i64 %i.0, %N 64 br i1 %cmp, label %for.body, label %for.end 65 66for.body: ; preds = %for.cond 67 %arrayidx = getelementptr inbounds float, float* %A, i64 %i.0 68 %tmp = load float, float* %arrayidx, align 4 69 %arrayidx1 = getelementptr inbounds float, float* %B, i64 %i.0 70 %tmp1 = load float, float* %arrayidx1, align 4 71 %add = fadd float %tmp, %tmp1 72 %arrayidx2 = getelementptr inbounds float, float* %A, i64 %i.0 73 %tmp2 = load float, float* %arrayidx2, align 4 74 %add3 = fadd float %add, %tmp2 75 %padd = add nsw i64 %p, %i.0 76 %arrayidx4 = getelementptr inbounds float, float* %B, i64 %padd 77 %tmp3 = load float, float* %arrayidx4, align 4 78 %add5 = fadd float %add3, %tmp3 79 %arrayidx6 = getelementptr inbounds float, float* %C, i64 %i.0 80 %tmp4 = load float, float* %arrayidx6, align 4 81 %add7 = fadd float %tmp4, %add5 82 store float %add7, float* %arrayidx6, align 4 83 br label %for.inc 84 85for.inc: ; preds = %for.body 86 %inc = add nuw nsw i64 %i.0, 1 87 br label %for.cond 88 89for.end: ; preds = %for.cond 90 ret void 91} 92