• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt < %s -vector-library=Accelerate -loop-vectorize -S | FileCheck %s
2
3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
4target triple = "x86_64-unknown-linux-gnu"
5
6;CHECK-LABEL: @sqrt_f32(
7;CHECK: vsqrtf{{.*}}<4 x float>
8;CHECK: ret void
9declare float @sqrtf(float) nounwind readnone
10define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
11entry:
12  %cmp6 = icmp sgt i32 %n, 0
13  br i1 %cmp6, label %for.body, label %for.end
14
15for.body:                                         ; preds = %entry, %for.body
16  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
17  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
18  %0 = load float, float* %arrayidx, align 4
19  %call = tail call float @sqrtf(float %0) nounwind readnone
20  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
21  store float %call, float* %arrayidx2, align 4
22  %indvars.iv.next = add i64 %indvars.iv, 1
23  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
24  %exitcond = icmp eq i32 %lftr.wideiv, %n
25  br i1 %exitcond, label %for.end, label %for.body
26
27for.end:                                          ; preds = %for.body, %entry
28  ret void
29}
30
31;CHECK-LABEL: @exp_f32(
32;CHECK: vexpf{{.*}}<4 x float>
33;CHECK: ret void
34declare float @expf(float) nounwind readnone
35define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
36entry:
37  %cmp6 = icmp sgt i32 %n, 0
38  br i1 %cmp6, label %for.body, label %for.end
39
40for.body:                                         ; preds = %entry, %for.body
41  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
42  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
43  %0 = load float, float* %arrayidx, align 4
44  %call = tail call float @expf(float %0) nounwind readnone
45  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
46  store float %call, float* %arrayidx2, align 4
47  %indvars.iv.next = add i64 %indvars.iv, 1
48  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
49  %exitcond = icmp eq i32 %lftr.wideiv, %n
50  br i1 %exitcond, label %for.end, label %for.body
51
52for.end:                                          ; preds = %for.body, %entry
53  ret void
54}
55
56;CHECK-LABEL: @log_f32(
57;CHECK: vlogf{{.*}}<4 x float>
58;CHECK: ret void
59declare float @logf(float) nounwind readnone
60define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
61entry:
62  %cmp6 = icmp sgt i32 %n, 0
63  br i1 %cmp6, label %for.body, label %for.end
64
65for.body:                                         ; preds = %entry, %for.body
66  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
67  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
68  %0 = load float, float* %arrayidx, align 4
69  %call = tail call float @logf(float %0) nounwind readnone
70  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
71  store float %call, float* %arrayidx2, align 4
72  %indvars.iv.next = add i64 %indvars.iv, 1
73  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
74  %exitcond = icmp eq i32 %lftr.wideiv, %n
75  br i1 %exitcond, label %for.end, label %for.body
76
77for.end:                                          ; preds = %for.body, %entry
78  ret void
79}
80
81; For abs instruction we'll generate vector intrinsic, as it's cheaper than a lib call.
82;CHECK-LABEL: @fabs_f32(
83;CHECK: fabs{{.*}}<4 x float>
84;CHECK: ret void
85declare float @fabsf(float) nounwind readnone
86define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
87entry:
88  %cmp6 = icmp sgt i32 %n, 0
89  br i1 %cmp6, label %for.body, label %for.end
90
91for.body:                                         ; preds = %entry, %for.body
92  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
93  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
94  %0 = load float, float* %arrayidx, align 4
95  %call = tail call float @fabsf(float %0) nounwind readnone
96  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
97  store float %call, float* %arrayidx2, align 4
98  %indvars.iv.next = add i64 %indvars.iv, 1
99  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
100  %exitcond = icmp eq i32 %lftr.wideiv, %n
101  br i1 %exitcond, label %for.end, label %for.body
102
103for.end:                                          ; preds = %for.body, %entry
104  ret void
105}
106
107; Test that we can vectorize an intrinsic into a vector call.
108;CHECK-LABEL: @exp_f32_intrin(
109;CHECK: vexpf{{.*}}<4 x float>
110;CHECK: ret void
111declare float @llvm.exp.f32(float) nounwind readnone
112define void @exp_f32_intrin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
113entry:
114  %cmp6 = icmp sgt i32 %n, 0
115  br i1 %cmp6, label %for.body, label %for.end
116
117for.body:                                         ; preds = %entry, %for.body
118  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
119  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
120  %0 = load float, float* %arrayidx, align 4
121  %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
122  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
123  store float %call, float* %arrayidx2, align 4
124  %indvars.iv.next = add i64 %indvars.iv, 1
125  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
126  %exitcond = icmp eq i32 %lftr.wideiv, %n
127  br i1 %exitcond, label %for.end, label %for.body
128
129for.end:                                          ; preds = %for.body, %entry
130  ret void
131}
132
133; Test that we don't vectorize arbitrary functions.
134;CHECK-LABEL: @foo_f32(
135;CHECK-NOT: foo{{.*}}<4 x float>
136;CHECK: ret void
137declare float @foo(float) nounwind readnone
138define void @foo_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
139entry:
140  %cmp6 = icmp sgt i32 %n, 0
141  br i1 %cmp6, label %for.body, label %for.end
142
143for.body:                                         ; preds = %entry, %for.body
144  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
145  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
146  %0 = load float, float* %arrayidx, align 4
147  %call = tail call float @foo(float %0) nounwind readnone
148  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
149  store float %call, float* %arrayidx2, align 4
150  %indvars.iv.next = add i64 %indvars.iv, 1
151  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
152  %exitcond = icmp eq i32 %lftr.wideiv, %n
153  br i1 %exitcond, label %for.end, label %for.body
154
155for.end:                                          ; preds = %for.body, %entry
156  ret void
157}
158
159; Test that we don't vectorize calls with nobuiltin attribute.
160;CHECK-LABEL: @sqrt_f32_nobuiltin(
161;CHECK-NOT: vsqrtf{{.*}}<4 x float>
162;CHECK: ret void
163define void @sqrt_f32_nobuiltin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
164entry:
165  %cmp6 = icmp sgt i32 %n, 0
166  br i1 %cmp6, label %for.body, label %for.end
167
168for.body:                                         ; preds = %entry, %for.body
169  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
170  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
171  %0 = load float, float* %arrayidx, align 4
172  %call = tail call float @sqrtf(float %0) nounwind readnone nobuiltin
173  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
174  store float %call, float* %arrayidx2, align 4
175  %indvars.iv.next = add i64 %indvars.iv, 1
176  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
177  %exitcond = icmp eq i32 %lftr.wideiv, %n
178  br i1 %exitcond, label %for.end, label %for.body
179
180for.end:                                          ; preds = %for.body, %entry
181  ret void
182}
183