• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
2; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
3
4; Exponent is a variable
5define void @vspow_var(float* nocapture %z, float* nocapture readonly %y, float* nocapture readonly %x)  {
6; CHECK-LABEL:       @vspow_var
7; CHECK-PWR9:        bl __powf4_P9
8; CHECK-PWR8:        bl __powf4_P8
9; CHECK:             blr
10entry:
11  br label %vector.body
12
13vector.body:
14  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
15  %next.gep = getelementptr float, float* %z, i64 %index
16  %next.gep31 = getelementptr float, float* %y, i64 %index
17  %next.gep32 = getelementptr float, float* %x, i64 %index
18  %0 = bitcast float* %next.gep32 to <4 x float>*
19  %wide.load = load <4 x float>, <4 x float>* %0, align 4
20  %1 = bitcast float* %next.gep31 to <4 x float>*
21  %wide.load33 = load <4 x float>, <4 x float>* %1, align 4
22  %2 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> %wide.load33)
23  %3 = bitcast float* %next.gep to <4 x float>*
24  store <4 x float> %2, <4 x float>* %3, align 4
25  %index.next = add i64 %index, 4
26  %4 = icmp eq i64 %index.next, 1024
27  br i1 %4, label %for.end, label %vector.body
28
29for.end:
30  ret void
31}
32
33; Exponent is a constant != 0.75 and !=0.25
34define void @vspow_const(float* nocapture %y, float* nocapture readonly %x)  {
35; CHECK-LABEL:       @vspow_const
36; CHECK-PWR9:        bl __powf4_P9
37; CHECK-PWR8:        bl __powf4_P8
38; CHECK:             blr
39entry:
40  br label %vector.body
41
42vector.body:
43  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
44  %next.gep = getelementptr float, float* %y, i64 %index
45  %next.gep19 = getelementptr float, float* %x, i64 %index
46  %0 = bitcast float* %next.gep19 to <4 x float>*
47  %wide.load = load <4 x float>, <4 x float>* %0, align 4
48  %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
49  %2 = bitcast float* %next.gep to <4 x float>*
50  store <4 x float> %1, <4 x float>* %2, align 4
51  %index.next = add i64 %index, 4
52  %3 = icmp eq i64 %index.next, 1024
53  br i1 %3, label %for.end, label %vector.body
54
55for.end:
56  ret void
57}
58
59; Exponent is a constant != 0.75 and !=0.25 and they are different
60define void @vspow_neq_const(float* nocapture %y, float* nocapture readonly %x)  {
61; CHECK-LABEL:       @vspow_neq_const
62; CHECK-PWR9:        bl __powf4_P9
63; CHECK-PWR8:        bl __powf4_P8
64; CHECK:             blr
65entry:
66  br label %vector.body
67
68vector.body:
69  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
70  %next.gep = getelementptr float, float* %y, i64 %index
71  %next.gep19 = getelementptr float, float* %x, i64 %index
72  %0 = bitcast float* %next.gep19 to <4 x float>*
73  %wide.load = load <4 x float>, <4 x float>* %0, align 4
74  %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 0x3FE861EB80000000, float 0x3FE871EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
75  %2 = bitcast float* %next.gep to <4 x float>*
76  store <4 x float> %1, <4 x float>* %2, align 4
77  %index.next = add i64 %index, 4
78  %3 = icmp eq i64 %index.next, 1024
79  br i1 %3, label %for.end, label %vector.body
80
81for.end:
82  ret void
83}
84
85; Exponent is a constant != 0.75 and !=0.25
86define void @vspow_neq075_const(float* nocapture %y, float* nocapture readonly %x)  {
87; CHECK-LABEL:       @vspow_neq075_const
88; CHECK-PWR9:        bl __powf4_P9
89; CHECK-PWR8:        bl __powf4_P8
90; CHECK:             blr
91entry:
92  br label %vector.body
93
94vector.body:
95  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
96  %next.gep = getelementptr float, float* %y, i64 %index
97  %next.gep19 = getelementptr float, float* %x, i64 %index
98  %0 = bitcast float* %next.gep19 to <4 x float>*
99  %wide.load = load <4 x float>, <4 x float>* %0, align 4
100  %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 0x3FE851EB80000000>)
101  %2 = bitcast float* %next.gep to <4 x float>*
102  store <4 x float> %1, <4 x float>* %2, align 4
103  %index.next = add i64 %index, 4
104  %3 = icmp eq i64 %index.next, 1024
105  br i1 %3, label %for.end, label %vector.body
106
107for.end:
108  ret void
109}
110
111; Exponent is a constant != 0.75 and !=0.25
112define void @vspow_neq025_const(float* nocapture %y, float* nocapture readonly %x)  {
113; CHECK-LABEL:       @vspow_neq025_const
114; CHECK-PWR9:        bl __powf4_P9
115; CHECK-PWR8:        bl __powf4_P8
116; CHECK:             blr
117entry:
118  br label %vector.body
119
120vector.body:
121  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
122  %next.gep = getelementptr float, float* %y, i64 %index
123  %next.gep19 = getelementptr float, float* %x, i64 %index
124  %0 = bitcast float* %next.gep19 to <4 x float>*
125  %wide.load = load <4 x float>, <4 x float>* %0, align 4
126  %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 2.500000e-01, float 0x3FE851EB80000000, float 2.500000e-01>)
127  %2 = bitcast float* %next.gep to <4 x float>*
128  store <4 x float> %1, <4 x float>* %2, align 4
129  %index.next = add i64 %index, 4
130  %3 = icmp eq i64 %index.next, 1024
131  br i1 %3, label %for.end, label %vector.body
132
133for.end:
134  ret void
135}
136
137; Exponent is 0.75
138define void @vspow_075(float* nocapture %y, float* nocapture readonly %x)  {
139; CHECK-LABEL:       @vspow_075
140; CHECK-NOT:         bl __powf4_P{{[8,9]}}
141; CHECK:             xvrsqrtesp
142; CHECK:             blr
143entry:
144  br label %vector.body
145
146vector.body:
147  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
148  %next.gep = getelementptr float, float* %y, i64 %index
149  %next.gep19 = getelementptr float, float* %x, i64 %index
150  %0 = bitcast float* %next.gep19 to <4 x float>*
151  %wide.load = load <4 x float>, <4 x float>* %0, align 4
152  %1 = call ninf afn <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
153  %2 = bitcast float* %next.gep to <4 x float>*
154  store <4 x float> %1, <4 x float>* %2, align 4
155  %index.next = add i64 %index, 4
156  %3 = icmp eq i64 %index.next, 1024
157  br i1 %3, label %for.end, label %vector.body
158
159for.end:
160  ret void
161}
162
163; Exponent is 0.25
164define void @vspow_025(float* nocapture %y, float* nocapture readonly %x)  {
165; CHECK-LABEL:       @vspow_025
166; CHECK-NOT:         bl __powf4_P{{[8,9]}}
167; CHECK:             xvrsqrtesp
168; CHECK:             blr
169entry:
170  br label %vector.body
171
172vector.body:
173  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
174  %next.gep = getelementptr float, float* %y, i64 %index
175  %next.gep19 = getelementptr float, float* %x, i64 %index
176  %0 = bitcast float* %next.gep19 to <4 x float>*
177  %wide.load = load <4 x float>, <4 x float>* %0, align 4
178  %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
179  %2 = bitcast float* %next.gep to <4 x float>*
180  store <4 x float> %1, <4 x float>* %2, align 4
181  %index.next = add i64 %index, 4
182  %3 = icmp eq i64 %index.next, 1024
183  br i1 %3, label %for.end, label %vector.body
184
185for.end:
186  ret void
187}
188
189; Exponent is 0.75 but no proper fast-math flags
190define void @vspow_075_nofast(float* nocapture %y, float* nocapture readonly %x)  {
191; CHECK-LABEL:       @vspow_075_nofast
192; CHECK-PWR9:        bl __powf4_P9
193; CHECK-PWR8:        bl __powf4_P8
194; CHECK-NOT:         xvrsqrtesp
195; CHECK:             blr
196entry:
197  br label %vector.body
198
199vector.body:
200  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
201  %next.gep = getelementptr float, float* %y, i64 %index
202  %next.gep19 = getelementptr float, float* %x, i64 %index
203  %0 = bitcast float* %next.gep19 to <4 x float>*
204  %wide.load = load <4 x float>, <4 x float>* %0, align 4
205  %1 = call <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
206  %2 = bitcast float* %next.gep to <4 x float>*
207  store <4 x float> %1, <4 x float>* %2, align 4
208  %index.next = add i64 %index, 4
209  %3 = icmp eq i64 %index.next, 1024
210  br i1 %3, label %for.end, label %vector.body
211
212for.end:
213  ret void
214}
215
216; Exponent is 0.25 but no proper fast-math flags
217define void @vspow_025_nofast(float* nocapture %y, float* nocapture readonly %x)  {
218; CHECK-LABEL:       @vspow_025_nofast
219; CHECK-PWR9:        bl __powf4_P9
220; CHECK-PWR8:        bl __powf4_P8
221; CHECK-NOT:         xvrsqrtesp
222; CHECK:             blr
223entry:
224  br label %vector.body
225
226vector.body:
227  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
228  %next.gep = getelementptr float, float* %y, i64 %index
229  %next.gep19 = getelementptr float, float* %x, i64 %index
230  %0 = bitcast float* %next.gep19 to <4 x float>*
231  %wide.load = load <4 x float>, <4 x float>* %0, align 4
232  %1 = call <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
233  %2 = bitcast float* %next.gep to <4 x float>*
234  store <4 x float> %1, <4 x float>* %2, align 4
235  %index.next = add i64 %index, 4
236  %3 = icmp eq i64 %index.next, 1024
237  br i1 %3, label %for.end, label %vector.body
238
239for.end:
240  ret void
241}
242
243; Function Attrs: nounwind readnone speculatable willreturn
244declare <4 x float> @__powf4_massv(<4 x float>, <4 x float>)
245