1; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s --check-prefix=SCOP 2; RUN: opt %loadPolly -analyze -polly-codegen-ppcg -polly-acc-dump-kernel-ir < %s | FileCheck %s --check-prefix=KERNEL-IR 3; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s --check-prefix=HOST-IR 4 5; Test that we do recognise and codegen a kernel that has intrinsics. 6 7; REQUIRES: pollyacc 8 9; Check that we model the kernel as a scop. 10; SCOP: Function: f 11; SCOP-NEXT: Region: %entry.split---%for.end 12 13; Check that the intrinsic call is present in the kernel IR. 14; KERNEL-IR: %p_sqrt = tail call float @llvm.sqrt.f32(float %A.arr.i.val_p_scalar_) 15; KERNEL-IR: declare float @llvm.sqrt.f32(float) 16; KERNEL-IR: declare float @llvm.fabs.f32(float) 17 18 19; Check that kernel launch is generated in host IR. 20; the declare would not be generated unless a call to a kernel exists. 21; HOST-IR: declare void @polly_launchKernel(i8*, i32, i32, i32, i32, i32, i8*) 22 23 24; void f(float *A, float *B, int N) { 25; for(int i = 0; i < N; i++) { 26; float tmp0 = A[i]; 27; float tmp1 = sqrt(tmp1); 28; float tmp2 = fabs(tmp2); 29; float tmp3 = copysignf(tmp1, tmp2); 30; B[i] = tmp4; 31; } 32; } 33 34target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 35 36define void @f(float* %A, float* %B, i32 %N) { 37entry: 38 br label %entry.split 39 40entry.split: ; preds = %entry 41 %cmp1 = icmp sgt i32 %N, 0 42 br i1 %cmp1, label %for.body.lr.ph, label %for.end 43 44for.body.lr.ph: ; preds = %entry.split 45 br label %for.body 46 47for.body: ; preds = %for.body.lr.ph, %for.body 48 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 49 %A.arr.i = getelementptr inbounds float, float* %A, i64 %indvars.iv 50 %A.arr.i.val = load float, float* %A.arr.i, align 4 51 ; Call to intrinsics that should be part of the kernel. 52 %sqrt = tail call float @llvm.sqrt.f32(float %A.arr.i.val) 53 %fabs = tail call float @llvm.fabs.f32(float %sqrt); 54 %copysign = tail call float @llvm.copysign.f32(float %sqrt, float %fabs); 55 %B.arr.i = getelementptr inbounds float, float* %B, i64 %indvars.iv 56 store float %copysign, float* %B.arr.i, align 4 57 58 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 59 %wide.trip.count = zext i32 %N to i64 60 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count 61 br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge 62 63for.cond.for.end_crit_edge: ; preds = %for.body 64 br label %for.end 65 66for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split 67 ret void 68} 69 70; Function Attrs: nounwind readnone 71declare float @llvm.sqrt.f32(float) #0 72declare float @llvm.fabs.f32(float) #0 73declare float @llvm.copysign.f32(float, float) #0 74 75attributes #0 = { nounwind readnone } 76 77