• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
2target triple = "armv7-none-linux-gnueabi"
3
4;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5;;;;;;;;;               INTRINSICS               ;;;;;;;;;;
6;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7
8declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
9declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
10declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
11declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
12declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
13declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
14
15declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
16declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
17declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
18declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
19declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
20declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
21
22declare <8 x i8>  @llvm.arm.neon.vqshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
23declare <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
24declare <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
25
26declare <8 x i8>  @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
27declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
28declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
29
30declare <8 x i8>  @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
31declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
32declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
33
34declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
35declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
36
37declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
38declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
39
40;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
41;;;;;;;;;                HELPERS                 ;;;;;;;;;;
42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43
44define internal <4 x float> @smear_4f(float %in) nounwind readnone alwaysinline {
45  %1 = insertelement <4 x float> undef, float %in, i32 0
46  %2 = insertelement <4 x float> %1, float %in, i32 1
47  %3 = insertelement <4 x float> %2, float %in, i32 2
48  %4 = insertelement <4 x float> %3, float %in, i32 3
49  ret <4 x float> %4
50}
51
52define internal <2 x float> @smear_2f(float %in) nounwind readnone alwaysinline {
53  %1 = insertelement <2 x float> undef, float %in, i32 0
54  %2 = insertelement <2 x float> %1, float %in, i32 1
55  ret <2 x float> %2
56}
57
58define internal <4 x i32> @smear_4i32(i32 %in) nounwind readnone alwaysinline {
59  %1 = insertelement <4 x i32> undef, i32 %in, i32 0
60  %2 = insertelement <4 x i32> %1, i32 %in, i32 1
61  %3 = insertelement <4 x i32> %2, i32 %in, i32 2
62  %4 = insertelement <4 x i32> %3, i32 %in, i32 3
63  ret <4 x i32> %4
64}
65
66
67;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
68;;;;;;;;;                 CLAMP                  ;;;;;;;;;;
69;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
70
71define <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %low, <4 x float> %high) nounwind readonly {
72  %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %value, <4 x float> %high) nounwind readnone
73  %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %low) nounwind readnone
74  ret <4 x float> %2
75}
76
77define <4 x float> @_Z5clampDv4_fff(<4 x float> %value, float %low, float %high) nounwind readonly {
78  %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
79  %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
80  %out = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %_low, <4 x float> %_high) nounwind readonly
81  ret <4 x float> %out
82}
83
84define <3 x float> @_Z5clampDv3_fS_S_(<3 x float> %value, <3 x float> %low, <3 x float> %high) nounwind readonly {
85  %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
86  %_low = shufflevector <3 x float> %low, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
87  %_high = shufflevector <3 x float> %high, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
88  %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone
89  %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone
90  %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
91  ret <3 x float> %c
92}
93
94define <3 x float> @_Z5clampDv3_fff(<3 x float> %value, float %low, float %high) nounwind readonly {
95  %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
96  %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
97  %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
98  %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone
99  %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone
100  %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
101  ret <3 x float> %c
102}
103
104define <2 x float> @_Z5clampDv2_fS_S_(<2 x float> %value, <2 x float> %low, <2 x float> %high) nounwind readonly {
105  %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %high) nounwind readnone
106  %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %1, <2 x float> %low) nounwind readnone
107  ret <2 x float> %2
108}
109
110define <2 x float> @_Z5clampDv2_fff(<2 x float> %value, float %low, float %high) nounwind readonly {
111  %_high = tail call <2 x float> @smear_2f(float %high) nounwind readnone
112  %_low = tail call <2 x float> @smear_2f(float %low) nounwind readnone
113  %a = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %_high) nounwind readnone
114  %b = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %_low) nounwind readnone
115  ret <2 x float> %b
116}
117
118define float @_Z5clampfff(float %value, float %low, float %high) nounwind readonly {
119  %1 = fcmp olt float %value, %high
120  %2 = select i1 %1, float %value, float %high
121  %3 = fcmp ogt float %2, %low
122  %4 = select i1 %3, float %2, float %low
123  ret float %4
124}
125
126
127;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
128;;;;;;;;;                  FMAX                  ;;;;;;;;;;
129;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
130
131define <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
132  %1 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone
133  ret <4 x float> %1
134}
135
136define <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
137  %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
138  %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone
139  ret <4 x float> %2
140}
141
142define <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
143  %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
144  %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
145  %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
146  %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
147  ret <3 x float> %4
148}
149
150define <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
151  %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
152  %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
153  %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
154  %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
155  ret <3 x float> %c
156}
157
158define <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
159  %1 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone
160  ret <2 x float> %1
161}
162
163define <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
164  %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
165  %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone
166  ret <2 x float> %2
167}
168
169define float @_Z4fmaxff(float %v1, float %v2) nounwind readonly {
170  %1 = fcmp ogt float %v1, %v2
171  %2 = select i1 %1, float %v1, float %v2
172  ret float %2
173}
174
175
176;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
177;;;;;;;;;                  FMIN                  ;;;;;;;;;;
178;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
179
180define <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
181  %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone
182  ret <4 x float> %1
183}
184
185define <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
186  %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
187  %2 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone
188  ret <4 x float> %2
189}
190
191define <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
192  %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
193  %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
194  %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
195  %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
196  ret <3 x float> %4
197}
198
199define <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
200  %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
201  %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
202  %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
203  %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
204  ret <3 x float> %c
205}
206
207define <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
208  %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone
209  ret <2 x float> %1
210}
211
212define <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
213  %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
214  %2 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone
215  ret <2 x float> %2
216}
217
218define float @_Z4fminff(float %v1, float %v2) nounwind readnone {
219  %1 = fcmp olt float %v1, %v2
220  %2 = select i1 %1, float %v1, float %v2
221  ret float %2
222}
223
224
225;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
226;;;;;;;;;                  MAX                   ;;;;;;;;;;
227;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
228
229define signext i8 @_Z3maxcc(i8 signext %v1, i8 signext %v2) nounwind readnone {
230  %1 = icmp sgt i8 %v1, %v2
231  %2 = select i1 %1, i8 %v1, i8 %v2
232  ret i8 %2
233}
234
235define <2 x i8> @_Z3maxDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
236  %1 = sext <2 x i8> %v1 to <2 x i32>
237  %2 = sext <2 x i8> %v2 to <2 x i32>
238  %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
239  %4 = trunc <2 x i32> %3 to <2 x i8>
240  ret <2 x i8> %4
241}
242
243define <3 x i8> @_Z3maxDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
244  %1 = sext <3 x i8> %v1 to <3 x i32>
245  %2 = sext <3 x i8> %v2 to <3 x i32>
246  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
247  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
248  %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
249  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
250  %7 = trunc <3 x i32> %6 to <3 x i8>
251  ret <3 x i8> %7
252}
253
254define <4 x i8> @_Z3maxDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
255  %1 = sext <4 x i8> %v1 to <4 x i32>
256  %2 = sext <4 x i8> %v2 to <4 x i32>
257  %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
258  %4 = trunc <4 x i32> %3 to <4 x i8>
259  ret <4 x i8> %4
260}
261
262define signext i16 @_Z3maxss(i16 signext %v1, i16 signext %v2) nounwind readnone {
263  %1 = icmp sgt i16 %v1, %v2
264  %2 = select i1 %1, i16 %v1, i16 %v2
265  ret i16 %2
266}
267
268define <2 x i16> @_Z3maxDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
269  %1 = sext <2 x i16> %v1 to <2 x i32>
270  %2 = sext <2 x i16> %v2 to <2 x i32>
271  %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
272  %4 = trunc <2 x i32> %3 to <2 x i16>
273  ret <2 x i16> %4
274}
275
276define <3 x i16> @_Z3maxDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
277  %1 = sext <3 x i16> %v1 to <3 x i32>
278  %2 = sext <3 x i16> %v2 to <3 x i32>
279  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
280  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
281  %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
282  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
283  %7 = trunc <3 x i32> %6 to <3 x i16>
284  ret <3 x i16> %7
285}
286
287define <4 x i16> @_Z3maxDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
288  %1 = sext <4 x i16> %v1 to <4 x i32>
289  %2 = sext <4 x i16> %v2 to <4 x i32>
290  %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
291  %4 = trunc <4 x i32> %3 to <4 x i16>
292  ret <4 x i16> %4
293}
294
295define i32 @_Z3maxii(i32 %v1, i32 %v2) nounwind readnone {
296  %1 = icmp sgt i32 %v1, %v2
297  %2 = select i1 %1, i32 %v1, i32 %v2
298  ret i32 %2
299}
300
301define <2 x i32> @_Z3maxDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
302  %1 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
303  ret <2 x i32> %1
304}
305
306define <3 x i32> @_Z3maxDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
307  %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
308  %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
309  %3 = tail call <4 x i32   > @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
310  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
311  ret <3 x i32> %4
312}
313
314define <4 x i32> @_Z3maxDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
315  %1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
316  ret <4 x i32> %1
317}
318
319define i64 @_Z3maxxx(i64 %v1, i64 %v2) nounwind readnone {
320  %1 = icmp sgt i64 %v1, %v2
321  %2 = select i1 %1, i64 %v1, i64 %v2
322  ret i64 %2
323}
324
325; TODO:  long vector types
326
327define zeroext i8 @_Z3maxhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone {
328  %1 = icmp ugt i8 %v1, %v2
329  %2 = select i1 %1, i8 %v1, i8 %v2
330  ret i8 %2
331}
332
333define <2 x i8> @_Z3maxDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
334  %1 = zext <2 x i8> %v1 to <2 x i32>
335  %2 = zext <2 x i8> %v2 to <2 x i32>
336  %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
337  %4 = trunc <2 x i32> %3 to <2 x i8>
338  ret <2 x i8> %4
339}
340
341define <3 x i8> @_Z3maxDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
342  %1 = zext <3 x i8> %v1 to <3 x i32>
343  %2 = zext <3 x i8> %v2 to <3 x i32>
344  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
345  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
346  %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
347  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
348  %7 = trunc <3 x i32> %6 to <3 x i8>
349  ret <3 x i8> %7
350}
351
352define <4 x i8> @_Z3maxDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
353  %1 = zext <4 x i8> %v1 to <4 x i32>
354  %2 = zext <4 x i8> %v2 to <4 x i32>
355  %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
356  %4 = trunc <4 x i32> %3 to <4 x i8>
357  ret <4 x i8> %4
358}
359
360define zeroext i16 @_Z3maxtt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone {
361  %1 = icmp ugt i16 %v1, %v2
362  %2 = select i1 %1, i16 %v1, i16 %v2
363  ret i16 %2
364}
365
366define <2 x i16> @_Z3maxDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
367  %1 = zext <2 x i16> %v1 to <2 x i32>
368  %2 = zext <2 x i16> %v2 to <2 x i32>
369  %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
370  %4 = trunc <2 x i32> %3 to <2 x i16>
371  ret <2 x i16> %4
372}
373
374define <3 x i16> @_Z3maxDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
375  %1 = zext <3 x i16> %v1 to <3 x i32>
376  %2 = zext <3 x i16> %v2 to <3 x i32>
377  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
378  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
379  %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
380  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
381  %7 = trunc <3 x i32> %6 to <3 x i16>
382  ret <3 x i16> %7
383}
384
385define <4 x i16> @_Z3maxDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
386  %1 = zext <4 x i16> %v1 to <4 x i32>
387  %2 = zext <4 x i16> %v2 to <4 x i32>
388  %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
389  %4 = trunc <4 x i32> %3 to <4 x i16>
390  ret <4 x i16> %4
391}
392
393define i32 @_Z3maxjj(i32 %v1, i32 %v2) nounwind readnone {
394  %1 = icmp ugt i32 %v1, %v2
395  %2 = select i1 %1, i32 %v1, i32 %v2
396  ret i32 %2
397}
398
399define <2 x i32> @_Z3maxDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
400  %1 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
401  ret <2 x i32> %1
402}
403
404define <3 x i32> @_Z3maxDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
405  %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
406  %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
407  %3 = tail call <4 x i32   > @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
408  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
409  ret <3 x i32> %4
410}
411
412define <4 x i32> @_Z3maxDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
413  %1 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
414  ret <4 x i32> %1
415}
416
417define i64 @_Z3maxyy(i64 %v1, i64 %v2) nounwind readnone {
418  %1 = icmp ugt i64 %v1, %v2
419  %2 = select i1 %1, i64 %v1, i64 %v2
420  ret i64 %2
421}
422
423; TODO:  long vector types
424
425define float @_Z3maxff(float %v1, float %v2) nounwind readnone {
426  %1 = tail call float @_Z4fmaxff(float %v1, float %v2)
427  ret float %1
428}
429
430define <2 x float> @_Z3maxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
431  %1 = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2)
432  ret <2 x float> %1
433}
434
435define <2 x float> @_Z3maxDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
436  %1 = tail call <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2)
437  ret <2 x float> %1
438}
439
440define <3 x float> @_Z3maxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
441  %1 = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2)
442  ret <3 x float> %1
443}
444
445define <3 x float> @_Z3maxDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
446  %1 = tail call <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2)
447  ret <3 x float> %1
448}
449
450define <4 x float> @_Z3maxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
451  %1 = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2)
452  ret <4 x float> %1
453}
454
455define <4 x float> @_Z3maxDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
456  %1 = tail call <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2)
457  ret <4 x float> %1
458}
459
460
461;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
462;;;;;;;;;                  MIN                   ;;;;;;;;;;
463;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
464
465define signext i8 @_Z3mincc(i8 signext %v1, i8 signext %v2) nounwind readnone {
466  %1 = icmp slt i8 %v1, %v2
467  %2 = select i1 %1, i8 %v1, i8 %v2
468  ret i8 %2
469}
470
471define <2 x i8> @_Z3minDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
472  %1 = sext <2 x i8> %v1 to <2 x i32>
473  %2 = sext <2 x i8> %v2 to <2 x i32>
474  %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
475  %4 = trunc <2 x i32> %3 to <2 x i8>
476  ret <2 x i8> %4
477}
478
479define <3 x i8> @_Z3minDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
480  %1 = sext <3 x i8> %v1 to <3 x i32>
481  %2 = sext <3 x i8> %v2 to <3 x i32>
482  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
483  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
484  %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
485  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
486  %7 = trunc <3 x i32> %6 to <3 x i8>
487  ret <3 x i8> %7
488}
489
490define <4 x i8> @_Z3minDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
491  %1 = sext <4 x i8> %v1 to <4 x i32>
492  %2 = sext <4 x i8> %v2 to <4 x i32>
493  %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
494  %4 = trunc <4 x i32> %3 to <4 x i8>
495  ret <4 x i8> %4
496}
497
498define signext i16 @_Z3minss(i16 signext %v1, i16 signext %v2) nounwind readnone {
499  %1 = icmp slt i16 %v1, %v2
500  %2 = select i1 %1, i16 %v1, i16 %v2
501  ret i16 %2
502}
503
504define <2 x i16> @_Z3minDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
505  %1 = sext <2 x i16> %v1 to <2 x i32>
506  %2 = sext <2 x i16> %v2 to <2 x i32>
507  %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
508  %4 = trunc <2 x i32> %3 to <2 x i16>
509  ret <2 x i16> %4
510}
511
512define <3 x i16> @_Z3minDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
513  %1 = sext <3 x i16> %v1 to <3 x i32>
514  %2 = sext <3 x i16> %v2 to <3 x i32>
515  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
516  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
517  %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
518  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
519  %7 = trunc <3 x i32> %6 to <3 x i16>
520  ret <3 x i16> %7
521}
522
523define <4 x i16> @_Z3minDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
524  %1 = sext <4 x i16> %v1 to <4 x i32>
525  %2 = sext <4 x i16> %v2 to <4 x i32>
526  %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
527  %4 = trunc <4 x i32> %3 to <4 x i16>
528  ret <4 x i16> %4
529}
530
531define i32 @_Z3minii(i32 %v1, i32 %v2) nounwind readnone {
532  %1 = icmp slt i32 %v1, %v2
533  %2 = select i1 %1, i32 %v1, i32 %v2
534  ret i32 %2
535}
536
537define <2 x i32> @_Z3minDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
538  %1 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
539  ret <2 x i32> %1
540}
541
542define <3 x i32> @_Z3minDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
543  %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
544  %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
545  %3 = tail call <4 x i32   > @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
546  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
547  ret <3 x i32> %4
548}
549
550define <4 x i32> @_Z3minDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
551  %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
552  ret <4 x i32> %1
553}
554
555define i64 @_Z3minxx(i64 %v1, i64 %v2) nounwind readnone {
556  %1 = icmp slt i64 %v1, %v2
557  %2 = select i1 %1, i64 %v1, i64 %v2
558  ret i64 %2
559}
560
561; TODO:  long vector types
562
563define zeroext i8 @_Z3minhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone {
564  %1 = icmp ult i8 %v1, %v2
565  %2 = select i1 %1, i8 %v1, i8 %v2
566  ret i8 %2
567}
568
569define <2 x i8> @_Z3minDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
570  %1 = zext <2 x i8> %v1 to <2 x i32>
571  %2 = zext <2 x i8> %v2 to <2 x i32>
572  %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
573  %4 = trunc <2 x i32> %3 to <2 x i8>
574  ret <2 x i8> %4
575}
576
577define <3 x i8> @_Z3minDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
578  %1 = zext <3 x i8> %v1 to <3 x i32>
579  %2 = zext <3 x i8> %v2 to <3 x i32>
580  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
581  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
582  %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
583  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
584  %7 = trunc <3 x i32> %6 to <3 x i8>
585  ret <3 x i8> %7
586}
587
588define <4 x i8> @_Z3minDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
589  %1 = zext <4 x i8> %v1 to <4 x i32>
590  %2 = zext <4 x i8> %v2 to <4 x i32>
591  %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
592  %4 = trunc <4 x i32> %3 to <4 x i8>
593  ret <4 x i8> %4
594}
595
596define zeroext i16 @_Z3mintt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone {
597  %1 = icmp ult i16 %v1, %v2
598  %2 = select i1 %1, i16 %v1, i16 %v2
599  ret i16 %2
600}
601
602define <2 x i16> @_Z3minDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
603  %1 = zext <2 x i16> %v1 to <2 x i32>
604  %2 = zext <2 x i16> %v2 to <2 x i32>
605  %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
606  %4 = trunc <2 x i32> %3 to <2 x i16>
607  ret <2 x i16> %4
608}
609
610define <3 x i16> @_Z3minDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
611  %1 = zext <3 x i16> %v1 to <3 x i32>
612  %2 = zext <3 x i16> %v2 to <3 x i32>
613  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
614  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
615  %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
616  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
617  %7 = trunc <3 x i32> %6 to <3 x i16>
618  ret <3 x i16> %7
619}
620
621define <4 x i16> @_Z3minDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
622  %1 = zext <4 x i16> %v1 to <4 x i32>
623  %2 = zext <4 x i16> %v2 to <4 x i32>
624  %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
625  %4 = trunc <4 x i32> %3 to <4 x i16>
626  ret <4 x i16> %4
627}
628
629define i32 @_Z3minjj(i32 %v1, i32 %v2) nounwind readnone {
630  %1 = icmp ult i32 %v1, %v2
631  %2 = select i1 %1, i32 %v1, i32 %v2
632  ret i32 %2
633}
634
635define <2 x i32> @_Z3minDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
636  %1 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
637  ret <2 x i32> %1
638}
639
640define <3 x i32> @_Z3minDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
641  %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
642  %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
643  %3 = tail call <4 x i32   > @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
644  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
645  ret <3 x i32> %4
646}
647
648define <4 x i32> @_Z3minDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
649  %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
650  ret <4 x i32> %1
651}
652
653define i64 @_Z3minyy(i64 %v1, i64 %v2) nounwind readnone {
654  %1 = icmp ult i64 %v1, %v2
655  %2 = select i1 %1, i64 %v1, i64 %v2
656  ret i64 %2
657}
658
659; TODO:  long vector types
660
661define float @_Z3minff(float %v1, float %v2) nounwind readnone {
662  %1 = tail call float @_Z4fminff(float %v1, float %v2)
663  ret float %1
664}
665
666define <2 x float> @_Z3minDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
667  %1 = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2)
668  ret <2 x float> %1
669}
670
671define <2 x float> @_Z3minDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
672  %1 = tail call <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2)
673  ret <2 x float> %1
674}
675
676define <3 x float> @_Z3minDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
677  %1 = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2)
678  ret <3 x float> %1
679}
680
681define <3 x float> @_Z3minDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
682  %1 = tail call <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2)
683  ret <3 x float> %1
684}
685
686define <4 x float> @_Z3minDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
687  %1 = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2)
688  ret <4 x float> %1
689}
690
691define <4 x float> @_Z3minDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
692  %1 = tail call <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2)
693  ret <4 x float> %1
694}
695
696
697;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
698;;;;;;;;;                  YUV                   ;;;;;;;;;;
699;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
700
701@yuv_U = internal constant <4 x i32> <i32 0, i32 -100, i32 516, i32 0>, align 16
702@yuv_V = internal constant <4 x i32> <i32 409, i32 -208, i32 0, i32 0>, align 16
703@yuv_0 = internal constant <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
704@yuv_255 = internal constant <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, align 16
705
706
707define <4 x i8> @_Z18rsYuvToRGBA_uchar4hhh(i8 %pY, i8 %pU, i8 %pV) nounwind readnone alwaysinline {
708  %_sy = zext i8 %pY to i32
709  %_su = zext i8 %pU to i32
710  %_sv = zext i8 %pV to i32
711
712  %_sy2 = add i32 -16, %_sy
713  %_sy3 = mul i32 298, %_sy2
714  %_su2 = add i32 -128, %_su
715  %_sv2 = add i32 -128, %_sv
716  %_y = tail call <4 x i32> @smear_4i32(i32 %_sy3) nounwind readnone
717  %_u = tail call <4 x i32> @smear_4i32(i32 %_su2) nounwind readnone
718  %_v = tail call <4 x i32> @smear_4i32(i32 %_sv2) nounwind readnone
719
720  %mu = load <4 x i32>* @yuv_U, align 8
721  %mv = load <4 x i32>* @yuv_V, align 8
722  %_u2 = mul <4 x i32> %_u, %mu
723  %_v2 = mul <4 x i32> %_v, %mv
724  %_y2 = add <4 x i32> %_y, %_u2
725  %_y3 = add <4 x i32> %_y2, %_v2
726
727 ; %r1 = tail call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %_y3, <4 x i32> <i32 8, i32 8, i32 8, i32 8>) nounwind readnone
728;  %r2 = trunc <4 x i16> %r1 to <4 x i8>
729;  ret <4 x i8> %r2
730
731  %c0 = load <4 x i32>* @yuv_0, align 8
732  %c255 = load <4 x i32>* @yuv_255, align 8
733  %r1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %_y3, <4 x i32> %c0) nounwind readnone
734  %r2 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %r1, <4 x i32> %c255) nounwind readnone
735  %r3 = lshr <4 x i32> %r2, <i32 8, i32 8, i32 8, i32 8>
736  %r4 = trunc <4 x i32> %r3 to <4 x i8>
737  ret <4 x i8> %r4
738}
739
740;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
741;;;;;;;;;              half_RECIP              ;;;;;;;;;;
742;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
743
744define float @_Z10half_recipf(float %v) {
745  %1 = insertelement <2 x float> undef, float %v, i32 0
746  %2 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %1) nounwind readnone
747  %3 = extractelement <2 x float> %2, i32 0
748  ret float %3
749}
750
751define <2 x float> @_Z10half_recip2Dv2_h(<2 x float> %v) nounwind readnone {
752  %1 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %v) nounwind readnone
753  ret <2 x float> %1
754}
755
756define <3 x float> @_Z10half_recip3Dv3_h(<3 x float> %v) nounwind readnone {
757  %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
758  %2 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %1) nounwind readnone
759  %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
760  ret <3 x float> %3
761}
762
763define <4 x float> @_Z10half_recip4Dv4_h(<4 x float> %v) nounwind readnone {
764  %1 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %v) nounwind readnone
765  ret <4 x float> %1
766}
767
768;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
769;;;;;;;;;              half_SQRT               ;;;;;;;;;;
770;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
771
772define float @_Z9half_sqrtf(float %v) {
773  %1 = insertelement <2 x float> undef, float %v, i32 0
774  %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone
775  %3 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %2) nounwind readnone
776  %4 = extractelement <2 x float> %3, i32 0
777  ret float %4
778}
779
780define <2 x float> @_Z9half_sqrt2Dv2_h(<2 x float> %v) nounwind readnone {
781  %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone
782  %2 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %1) nounwind readnone
783  ret <2 x float> %2
784}
785
786define <3 x float> @_Z9half_sqrt3Dv3_h(<3 x float> %v) nounwind readnone {
787  %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
788  %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone
789  %3 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %2) nounwind readnone
790  %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
791  ret <3 x float> %4
792}
793
794define <4 x float> @_Z9half_sqrt4Dv4_h(<4 x float> %v) nounwind readnone {
795  %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone
796  %2 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %1) nounwind readnone
797  ret <4 x float> %2
798}
799
800
801;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
802;;;;;;;;;              half_RSQRT              ;;;;;;;;;;
803;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
804
805define float @_Z10half_rsqrtf(float %v) {
806  %1 = insertelement <2 x float> undef, float %v, i32 0
807  %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone
808  %3 = extractelement <2 x float> %2, i32 0
809  ret float %3
810}
811
812define <2 x float> @_Z10half_rsqrt2Dv2_h(<2 x float> %v) nounwind readnone {
813  %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone
814  ret <2 x float> %1
815}
816
817define <3 x float> @_Z10half_rsqrt3Dv3_h(<3 x float> %v) nounwind readnone {
818  %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
819  %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone
820  %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
821  ret <3 x float> %3
822}
823
824define <4 x float> @_Z10half_rsqrt4Dv4_h(<4 x float> %v) nounwind readnone {
825  %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone
826  ret <4 x float> %1
827}
828
829