1; RUN: opt %loadPolly -polly-scops -analyze \ 2; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=SCOP 3; 4; RUN: opt %loadPolly -polly-scops -S -polly-invariant-load-hoisting \ 5; RUN: -polly-codegen-ppcg < %s | FileCheck %s -check-prefix=HOST-IR 6; 7; RUN: opt %loadPolly -polly-scops -analyze -polly-invariant-load-hoisting \ 8; RUN: -polly-codegen-ppcg -polly-acc-dump-kernel-ir < %s | FileCheck %s -check-prefix=KERNEL-IR 9; 10; REQUIRES: pollyacc 11; 12; SCOP: Function: f 13; SCOP-NEXT: Region: %entry.split---%for.end26 14; SCOP-NEXT: Max Loop Depth: 3 15; SCOP-NEXT: Invariant Accesses: { 16; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] 17; SCOP-NEXT: [n, tmp12] -> { Stmt_for_body6[i0, i1, i2] -> MemRef_invariant[0] }; 18; SCOP-NEXT: Execution Context: [n, tmp12] -> { : n > 0 } 19; SCOP-NEXT: } 20; HOST-IR: call void @polly_launchKernel(i8* %[[REGC:[0-9]+]], i32 %{{[0-9]+}}, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr) 21; HOST-IR-NEXT: call void @polly_freeKernel(i8* %[[REGC]]) 22 23; KERNEL-IR: define ptx_kernel void @FUNC_f_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_B, i8 addrspace(1)* %MemRef_A, i32 %n, i32 %tmp12, i32 %polly.preload.tmp21.merge) 24 25 26; Check that we generate correct GPU code in case of invariant load hoisting. 27; 28; 29; static const int N = 3000; 30; 31; void f(int A[N][N], int *invariant, int B[N][N], int n) { 32; for (int i = 0; i < n; i++) { 33; for (int j = 0; j < n; j++) { 34; for (int k = 0; k < n; k++) { 35; 36; A[*invariant][k] = B[k][k]; 37; A[k][*invariant] += B[k][k]; 38; } 39; } 40; } 41; } 42; 43 44define void @f([3000 x i32]* %A, i32* %invariant, [3000 x i32]* %B, i32 %n) { 45entry: 46 br label %entry.split 47 48entry.split: ; preds = %entry 49 %cmp6 = icmp sgt i32 %n, 0 50 br i1 %cmp6, label %for.cond1.preheader.lr.ph, label %for.end26 51 52for.cond1.preheader.lr.ph: ; preds = %entry.split 53 br label %for.cond1.preheader 54 55for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.inc24 56 %i.07 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc25, %for.inc24 ] 57 %cmp23 = icmp sgt i32 %n, 0 58 br i1 %cmp23, label %for.cond4.preheader.lr.ph, label %for.inc24 59 60for.cond4.preheader.lr.ph: ; preds = %for.cond1.preheader 61 br label %for.cond4.preheader 62 63for.cond4.preheader: ; preds = %for.cond4.preheader.lr.ph, %for.inc21 64 %j.04 = phi i32 [ 0, %for.cond4.preheader.lr.ph ], [ %inc22, %for.inc21 ] 65 %cmp51 = icmp sgt i32 %n, 0 66 br i1 %cmp51, label %for.body6.lr.ph, label %for.inc21 67 68for.body6.lr.ph: ; preds = %for.cond4.preheader 69 br label %for.body6 70 71for.body6: ; preds = %for.body6.lr.ph, %for.body6 72 %k.02 = phi i32 [ 0, %for.body6.lr.ph ], [ %inc, %for.body6 ] 73 %idxprom = sext i32 %k.02 to i64 74 %idxprom7 = sext i32 %k.02 to i64 75 %arrayidx8 = getelementptr inbounds [3000 x i32], [3000 x i32]* %B, i64 %idxprom, i64 %idxprom7 76 %tmp9 = load i32, i32* %arrayidx8, align 4 77 %tmp12 = load i32, i32* %invariant, align 4 78 %idxprom9 = sext i32 %tmp12 to i64 79 %idxprom11 = sext i32 %k.02 to i64 80 %arrayidx12 = getelementptr inbounds [3000 x i32], [3000 x i32]* %A, i64 %idxprom9, i64 %idxprom11 81 store i32 %tmp9, i32* %arrayidx12, align 4 82 %idxprom13 = sext i32 %k.02 to i64 83 %idxprom15 = sext i32 %k.02 to i64 84 %arrayidx16 = getelementptr inbounds [3000 x i32], [3000 x i32]* %B, i64 %idxprom13, i64 %idxprom15 85 %tmp17 = load i32, i32* %arrayidx16, align 4 86 %idxprom17 = sext i32 %k.02 to i64 87 %tmp21 = load i32, i32* %invariant, align 4 88 %idxprom19 = sext i32 %tmp21 to i64 89 %arrayidx20 = getelementptr inbounds [3000 x i32], [3000 x i32]* %A, i64 %idxprom17, i64 %idxprom19 90 %tmp22 = load i32, i32* %arrayidx20, align 4 91 %add = add nsw i32 %tmp22, %tmp17 92 store i32 %add, i32* %arrayidx20, align 4 93 %inc = add nuw nsw i32 %k.02, 1 94 %cmp5 = icmp slt i32 %inc, %n 95 br i1 %cmp5, label %for.body6, label %for.cond4.for.inc21_crit_edge 96 97for.cond4.for.inc21_crit_edge: ; preds = %for.body6 98 br label %for.inc21 99 100for.inc21: ; preds = %for.cond4.for.inc21_crit_edge, %for.cond4.preheader 101 %inc22 = add nuw nsw i32 %j.04, 1 102 %cmp2 = icmp slt i32 %inc22, %n 103 br i1 %cmp2, label %for.cond4.preheader, label %for.cond1.for.inc24_crit_edge 104 105for.cond1.for.inc24_crit_edge: ; preds = %for.inc21 106 br label %for.inc24 107 108for.inc24: ; preds = %for.cond1.for.inc24_crit_edge, %for.cond1.preheader 109 %inc25 = add nuw nsw i32 %i.07, 1 110 %cmp = icmp slt i32 %inc25, %n 111 br i1 %cmp, label %for.cond1.preheader, label %for.cond.for.end26_crit_edge 112 113for.cond.for.end26_crit_edge: ; preds = %for.inc24 114 br label %for.end26 115 116for.end26: ; preds = %for.cond.for.end26_crit_edge, %entry.split 117 ret void 118} 119