neon.ll - OpenGrok cross reference for /frameworks/rs/driver/runtime/arch/neon.ll

Lines Matching refs:x
8 declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
9 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
10 declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
11 declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
12 declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
13 declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
14 declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
15 declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
17 declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
18 declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
19 declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
20 declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
21 declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
22 declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
23 declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
24 declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
26 declare <8 x i8>  @llvm.arm.neon.vqshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
27 declare <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
28 declare <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
30 declare <8 x i8>  @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
31 declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
32 declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
34 declare <8 x i8>  @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
35 declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
36 declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
38 declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
39 declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
41 declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
42 declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
44 declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
45 declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
47 declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
48 declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
54 define internal <4 x float> @smear_4f(float %in) nounwind readnone alwaysinline {
55   %1 = insertelement <4 x float> undef, float %in, i32 0
56   %2 = insertelement <4 x float> %1, float %in, i32 1
57   %3 = insertelement <4 x float> %2, float %in, i32 2
58   %4 = insertelement <4 x float> %3, float %in, i32 3
59   ret <4 x float> %4
62 define internal <4 x i32> @smear_4i(i32 %in) nounwind readnone alwaysinline {
63   %1 = insertelement <4 x i32> undef, i32 %in, i32 0
64   %2 = insertelement <4 x i32> %1, i32 %in, i32 1
65   %3 = insertelement <4 x i32> %2, i32 %in, i32 2
66   %4 = insertelement <4 x i32> %3, i32 %in, i32 3
67   ret <4 x i32> %4
70 define internal <4 x i16> @smear_4s(i16 %in) nounwind readnone alwaysinline {
71   %1 = insertelement <4 x i16> undef, i16 %in, i32 0
72   %2 = insertelement <4 x i16> %1, i16 %in, i32 1
73   %3 = insertelement <4 x i16> %2, i16 %in, i32 2
74   %4 = insertelement <4 x i16> %3, i16 %in, i32 3
75   ret <4 x i16> %4
80 define internal <2 x float> @smear_2f(float %in) nounwind readnone alwaysinline {
81   %1 = insertelement <2 x float> undef, float %in, i32 0
82   %2 = insertelement <2 x float> %1, float %in, i32 1
83   ret <2 x float> %2
86 define internal <2 x i32> @smear_2i(i32 %in) nounwind readnone alwaysinline {
87   %1 = insertelement <2 x i32> undef, i32 %in, i32 0
88   %2 = insertelement <2 x i32> %1, i32 %in, i32 1
89   ret <2 x i32> %2
92 define internal <2 x i16> @smear_2s(i16 %in) nounwind readnone alwaysinline {
93   %1 = insertelement <2 x i16> undef, i16 %in, i32 0
94   %2 = insertelement <2 x i16> %1, i16 %in, i32 1
95   ret <2 x i16> %2
99 define internal <4 x i32> @smear_4i32(i32 %in) nounwind readnone alwaysinline {
100   %1 = insertelement <4 x i32> undef, i32 %in, i32 0
101   %2 = insertelement <4 x i32> %1, i32 %in, i32 1
102   %3 = insertelement <4 x i32> %2, i32 %in, i32 2
103   %4 = insertelement <4 x i32> %3, i32 %in, i32 3
104   ret <4 x i32> %4
112 define <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %low, <4 x float> %high) noun…
113 …%1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %value, <4 x float> %high) nounw…
114 …%2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %low) nounwind r…
115   ret <4 x float> %2
118 define <4 x float> @_Z5clampDv4_fff(<4 x float> %value, float %low, float %high) nounwind readonly {
119   %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
120   %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
121 …%out = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %_low, <4 x float>…
122   ret <4 x float> %out
125 define <3 x float> @_Z5clampDv3_fS_S_(<3 x float> %value, <3 x float> %low, <3 x float> %high) noun…
126 …%_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32…
127   %_low = shufflevector <3 x float> %low, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
128 …%_high = shufflevector <3 x float> %high, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
129 …%a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nou…
130 …%b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind …
131   %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
132   ret <3 x float> %c
135 define <3 x float> @_Z5clampDv3_fff(<3 x float> %value, float %low, float %high) nounwind readonly {
136 …%_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32…
137   %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
138   %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
139 …%a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nou…
140 …%b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind …
141   %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
142   ret <3 x float> %c
145 define <2 x float> @_Z5clampDv2_fS_S_(<2 x float> %value, <2 x float> %low, <2 x float> %high) noun…
146 …%1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %high) nounw…
147 …%2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %1, <2 x float> %low) nounwind r…
148   ret <2 x float> %2
151 define <2 x float> @_Z5clampDv2_fff(<2 x float> %value, float %low, float %high) nounwind readonly {
152   %_high = tail call <2 x float> @smear_2f(float %high) nounwind readnone
153   %_low = tail call <2 x float> @smear_2f(float %low) nounwind readnone
154 …%a = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %_high) noun…
155 …%b = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %_low) nounwind …
156   ret <2 x float> %b
169 define <4 x i32> @_Z5clampDv4_iS_S_(<4 x i32> %value, <4 x i32> %low, <4 x i32> %high) nounwind rea…
170 …%1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %value, <4 x i32> %high) nounwind re…
171 …%2 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %low) nounwind readnone
172   ret <4 x i32> %2
175 define <4 x i32> @_Z5clampDv4_iii(<4 x i32> %value, i32 %low, i32 %high) nounwind readonly {
176   %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone
177   %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone
178 …%1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %value, <4 x i32> %_high) nounwind r…
179 …%2 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %_low) nounwind readno…
180   ret <4 x i32> %2
183 define <3 x i32> @_Z5clampDv3_iS_S_(<3 x i32> %value, <3 x i32> %low, <3 x i32> %high) nounwind rea…
184   %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
185   %_low = shufflevector <3 x i32> %low, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
186   %_high = shufflevector <3 x i32> %high, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
187 …%a = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind …
188 …%b = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readno…
189   %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
190   ret <3 x i32> %c
193 define <3 x i32> @_Z5clampDv3_iii(<3 x i32> %value, i32 %low, i32 %high) nounwind readonly {
194   %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
195   %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone
196   %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone
197 …%a = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind …
198 …%b = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readno…
199   %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
200   ret <3 x i32> %c
203 define <2 x i32> @_Z5clampDv2_iS_S_(<2 x i32> %value, <2 x i32> %low, <2 x i32> %high) nounwind rea…
204 …%1 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %value, <2 x i32> %high) nounwind re…
205 …%2 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %low) nounwind readnone
206   ret <2 x i32> %2
209 define <2 x i32> @_Z5clampDv2_iii(<2 x i32> %value, i32 %low, i32 %high) nounwind readonly {
210   %_high = tail call <2 x i32> @smear_2i(i32 %high) nounwind readnone
211   %_low = tail call <2 x i32> @smear_2i(i32 %low) nounwind readnone
212 …%a = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %value, <2 x i32> %_high) nounwind r…
213 …%b = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %a, <2 x i32> %_low) nounwind readno…
214   ret <2 x i32> %b
219 define <4 x i32> @_Z5clampDv4_jS_S_(<4 x i32> %value, <4 x i32> %low, <4 x i32> %high) nounwind rea…
220 …%1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %value, <4 x i32> %high) nounwind re…
221 …%2 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %low) nounwind readnone
222   ret <4 x i32> %2
225 define <4 x i32> @_Z5clampDv4_jjj(<4 x i32> %value, i32 %low, i32 %high) nounwind readonly {
226   %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone
227   %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone
228 …%1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %value, <4 x i32> %_high) nounwind r…
229 …%2 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %_low) nounwind readno…
230   ret <4 x i32> %2
233 define <3 x i32> @_Z5clampDv3_jS_S_(<3 x i32> %value, <3 x i32> %low, <3 x i32> %high) nounwind rea…
234   %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
235   %_low = shufflevector <3 x i32> %low, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
236   %_high = shufflevector <3 x i32> %high, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
237 …%a = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind …
238 …%b = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readno…
239   %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
240   ret <3 x i32> %c
243 define <3 x i32> @_Z5clampDv3_jjj(<3 x i32> %value, i32 %low, i32 %high) nounwind readonly {
244   %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
245   %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone
246   %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone
247 …%a = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind …
248 …%b = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readno…
249   %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
250   ret <3 x i32> %c
253 define <2 x i32> @_Z5clampDv2_jS_S_(<2 x i32> %value, <2 x i32> %low, <2 x i32> %high) nounwind rea…
254 …%1 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %value, <2 x i32> %high) nounwind re…
255 …%2 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %low) nounwind readnone
256   ret <2 x i32> %2
259 define <2 x i32> @_Z5clampDv2_jjj(<2 x i32> %value, i32 %low, i32 %high) nounwind readonly {
260   %_high = tail call <2 x i32> @smear_2i(i32 %high) nounwind readnone
261   %_low = tail call <2 x i32> @smear_2i(i32 %low) nounwind readnone
262 …%a = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %value, <2 x i32> %_high) nounwind r…
263 …%b = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %a, <2 x i32> %_low) nounwind readno…
264   ret <2 x i32> %b
272 define <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
273 …%1 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind r…
274   ret <4 x float> %1
277 define <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
278   %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
279 …%2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %1) nounwind re…
280   ret <4 x float> %2
283 define <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
284   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
285   %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
286 …%3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind rea…
287   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
288   ret <3 x float> %4
291 define <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
292   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
293   %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
294 …%3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind rea…
295   %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
296   ret <3 x float> %c
299 define <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
300 …%1 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind r…
301   ret <2 x float> %1
304 define <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
305   %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
306 …%2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %1) nounwind re…
307   ret <2 x float> %2
321 define <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
322 …%1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind r…
323   ret <4 x float> %1
326 define <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
327   %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
328 …%2 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %1) nounwind re…
329   ret <4 x float> %2
332 define <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
333   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
334   %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
335 …%3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind rea…
336   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
337   ret <3 x float> %4
340 define <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
341   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
342   %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
343 …%3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind rea…
344   %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
345   ret <3 x float> %c
348 define <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
349 …%1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind r…
350   ret <2 x float> %1
353 define <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
354   %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
355 …%2 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %1) nounwind re…
356   ret <2 x float> %2
376 define <2 x i8> @_Z3maxDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
377   %1 = sext <2 x i8> %v1 to <2 x i32>
378   %2 = sext <2 x i8> %v2 to <2 x i32>
379   %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
380   %4 = trunc <2 x i32> %3 to <2 x i8>
381   ret <2 x i8> %4
384 define <3 x i8> @_Z3maxDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
385   %1 = sext <3 x i8> %v1 to <3 x i32>
386   %2 = sext <3 x i8> %v2 to <3 x i32>
387   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
388   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
389   %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
390   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
391   %7 = trunc <3 x i32> %6 to <3 x i8>
392   ret <3 x i8> %7
395 define <4 x i8> @_Z3maxDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
396   %1 = sext <4 x i8> %v1 to <4 x i32>
397   %2 = sext <4 x i8> %v2 to <4 x i32>
398   %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
399   %4 = trunc <4 x i32> %3 to <4 x i8>
400   ret <4 x i8> %4
409 define <2 x i16> @_Z3maxDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
410   %1 = sext <2 x i16> %v1 to <2 x i32>
411   %2 = sext <2 x i16> %v2 to <2 x i32>
412   %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
413   %4 = trunc <2 x i32> %3 to <2 x i16>
414   ret <2 x i16> %4
417 define <3 x i16> @_Z3maxDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
418   %1 = sext <3 x i16> %v1 to <3 x i32>
419   %2 = sext <3 x i16> %v2 to <3 x i32>
420   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
421   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
422   %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
423   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
424   %7 = trunc <3 x i32> %6 to <3 x i16>
425   ret <3 x i16> %7
428 define <4 x i16> @_Z3maxDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
429   %1 = sext <4 x i16> %v1 to <4 x i32>
430   %2 = sext <4 x i16> %v2 to <4 x i32>
431   %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
432   %4 = trunc <4 x i32> %3 to <4 x i16>
433   ret <4 x i16> %4
442 define <2 x i32> @_Z3maxDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
443 …%1 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
444   ret <2 x i32> %1
447 define <3 x i32> @_Z3maxDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
448   %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
449   %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
450 …%3 = tail call <4 x i32   > @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readno…
451   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
452   ret <3 x i32> %4
455 define <4 x i32> @_Z3maxDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
456 …%1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
457   ret <4 x i32> %1
474 define <2 x i8> @_Z3maxDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
475   %1 = zext <2 x i8> %v1 to <2 x i32>
476   %2 = zext <2 x i8> %v2 to <2 x i32>
477   %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
478   %4 = trunc <2 x i32> %3 to <2 x i8>
479   ret <2 x i8> %4
482 define <3 x i8> @_Z3maxDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
483   %1 = zext <3 x i8> %v1 to <3 x i32>
484   %2 = zext <3 x i8> %v2 to <3 x i32>
485   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
486   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
487   %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
488   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
489   %7 = trunc <3 x i32> %6 to <3 x i8>
490   ret <3 x i8> %7
493 define <4 x i8> @_Z3maxDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
494   %1 = zext <4 x i8> %v1 to <4 x i32>
495   %2 = zext <4 x i8> %v2 to <4 x i32>
496   %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
497   %4 = trunc <4 x i32> %3 to <4 x i8>
498   ret <4 x i8> %4
507 define <2 x i16> @_Z3maxDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
508   %1 = zext <2 x i16> %v1 to <2 x i32>
509   %2 = zext <2 x i16> %v2 to <2 x i32>
510   %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
511   %4 = trunc <2 x i32> %3 to <2 x i16>
512   ret <2 x i16> %4
515 define <3 x i16> @_Z3maxDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
516   %1 = zext <3 x i16> %v1 to <3 x i32>
517   %2 = zext <3 x i16> %v2 to <3 x i32>
518   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
519   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
520   %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
521   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
522   %7 = trunc <3 x i32> %6 to <3 x i16>
523   ret <3 x i16> %7
526 define <4 x i16> @_Z3maxDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
527   %1 = zext <4 x i16> %v1 to <4 x i32>
528   %2 = zext <4 x i16> %v2 to <4 x i32>
529   %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
530   %4 = trunc <4 x i32> %3 to <4 x i16>
531   ret <4 x i16> %4
540 define <2 x i32> @_Z3maxDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
541 …%1 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
542   ret <2 x i32> %1
545 define <3 x i32> @_Z3maxDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
546   %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
547   %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
548 …%3 = tail call <4 x i32   > @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readno…
549   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
550   ret <3 x i32> %4
553 define <4 x i32> @_Z3maxDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
554 …%1 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
555   ret <4 x i32> %1
566 define <2 x float> @_Z3maxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
567   %1 = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2)
568   ret <2 x float> %1
571 define <2 x float> @_Z3maxDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
572   %1 = tail call <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2)
573   ret <2 x float> %1
576 define <3 x float> @_Z3maxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
577   %1 = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2)
578   ret <3 x float> %1
581 define <3 x float> @_Z3maxDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
582   %1 = tail call <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2)
583   ret <3 x float> %1
586 define <4 x float> @_Z3maxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
587   %1 = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2)
588   ret <4 x float> %1
591 define <4 x float> @_Z3maxDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
592   %1 = tail call <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2)
593   ret <4 x float> %1
607 define <2 x i8> @_Z3minDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
608   %1 = sext <2 x i8> %v1 to <2 x i32>
609   %2 = sext <2 x i8> %v2 to <2 x i32>
610   %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
611   %4 = trunc <2 x i32> %3 to <2 x i8>
612   ret <2 x i8> %4
615 define <3 x i8> @_Z3minDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
616   %1 = sext <3 x i8> %v1 to <3 x i32>
617   %2 = sext <3 x i8> %v2 to <3 x i32>
618   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
619   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
620   %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
621   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
622   %7 = trunc <3 x i32> %6 to <3 x i8>
623   ret <3 x i8> %7
626 define <4 x i8> @_Z3minDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
627   %1 = sext <4 x i8> %v1 to <4 x i32>
628   %2 = sext <4 x i8> %v2 to <4 x i32>
629   %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
630   %4 = trunc <4 x i32> %3 to <4 x i8>
631   ret <4 x i8> %4
640 define <2 x i16> @_Z3minDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
641   %1 = sext <2 x i16> %v1 to <2 x i32>
642   %2 = sext <2 x i16> %v2 to <2 x i32>
643   %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
644   %4 = trunc <2 x i32> %3 to <2 x i16>
645   ret <2 x i16> %4
648 define <3 x i16> @_Z3minDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
649   %1 = sext <3 x i16> %v1 to <3 x i32>
650   %2 = sext <3 x i16> %v2 to <3 x i32>
651   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
652   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
653   %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
654   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
655   %7 = trunc <3 x i32> %6 to <3 x i16>
656   ret <3 x i16> %7
659 define <4 x i16> @_Z3minDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
660   %1 = sext <4 x i16> %v1 to <4 x i32>
661   %2 = sext <4 x i16> %v2 to <4 x i32>
662   %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
663   %4 = trunc <4 x i32> %3 to <4 x i16>
664   ret <4 x i16> %4
673 define <2 x i32> @_Z3minDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
674 …%1 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
675   ret <2 x i32> %1
678 define <3 x i32> @_Z3minDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
679   %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
680   %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
681 …%3 = tail call <4 x i32   > @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readno…
682   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
683   ret <3 x i32> %4
686 define <4 x i32> @_Z3minDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
687 …%1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
688   ret <4 x i32> %1
705 define <2 x i8> @_Z3minDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
706   %1 = zext <2 x i8> %v1 to <2 x i32>
707   %2 = zext <2 x i8> %v2 to <2 x i32>
708   %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
709   %4 = trunc <2 x i32> %3 to <2 x i8>
710   ret <2 x i8> %4
713 define <3 x i8> @_Z3minDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
714   %1 = zext <3 x i8> %v1 to <3 x i32>
715   %2 = zext <3 x i8> %v2 to <3 x i32>
716   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
717   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
718   %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
719   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
720   %7 = trunc <3 x i32> %6 to <3 x i8>
721   ret <3 x i8> %7
724 define <4 x i8> @_Z3minDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
725   %1 = zext <4 x i8> %v1 to <4 x i32>
726   %2 = zext <4 x i8> %v2 to <4 x i32>
727   %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
728   %4 = trunc <4 x i32> %3 to <4 x i8>
729   ret <4 x i8> %4
738 define <2 x i16> @_Z3minDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
739   %1 = zext <2 x i16> %v1 to <2 x i32>
740   %2 = zext <2 x i16> %v2 to <2 x i32>
741   %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
742   %4 = trunc <2 x i32> %3 to <2 x i16>
743   ret <2 x i16> %4
746 define <3 x i16> @_Z3minDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
747   %1 = zext <3 x i16> %v1 to <3 x i32>
748   %2 = zext <3 x i16> %v2 to <3 x i32>
749   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
750   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
751   %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
752   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
753   %7 = trunc <3 x i32> %6 to <3 x i16>
754   ret <3 x i16> %7
757 define <4 x i16> @_Z3minDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
758   %1 = zext <4 x i16> %v1 to <4 x i32>
759   %2 = zext <4 x i16> %v2 to <4 x i32>
760   %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
761   %4 = trunc <4 x i32> %3 to <4 x i16>
762   ret <4 x i16> %4
771 define <2 x i32> @_Z3minDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
772 …%1 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
773   ret <2 x i32> %1
776 define <3 x i32> @_Z3minDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
777   %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
778   %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
779 …%3 = tail call <4 x i32   > @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readno…
780   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
781   ret <3 x i32> %4
784 define <4 x i32> @_Z3minDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
785 …%1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
786   ret <4 x i32> %1
797 define <2 x float> @_Z3minDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
798   %1 = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2)
799   ret <2 x float> %1
802 define <2 x float> @_Z3minDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
803   %1 = tail call <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2)
804   ret <2 x float> %1
807 define <3 x float> @_Z3minDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
808   %1 = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2)
809   ret <3 x float> %1
812 define <3 x float> @_Z3minDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
813   %1 = tail call <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2)
814   ret <3 x float> %1
817 define <4 x float> @_Z3minDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
818   %1 = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2)
819   ret <4 x float> %1
822 define <4 x float> @_Z3minDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
823   %1 = tail call <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2)
824   ret <4 x float> %1
832 @yuv_U = internal constant <4 x i32> <i32 0, i32 -100, i32 516, i32 0>, align 16
833 @yuv_V = internal constant <4 x i32> <i32 409, i32 -208, i32 0, i32 0>, align 16
834 @yuv_0 = internal constant <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
835 @yuv_255 = internal constant <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, align 16
838 define <4 x i8> @_Z18rsYuvToRGBA_uchar4hhh(i8 %pY, i8 %pU, i8 %pV) nounwind readnone alwaysinline {
847   %_y = tail call <4 x i32> @smear_4i32(i32 %_sy3) nounwind readnone
848   %_u = tail call <4 x i32> @smear_4i32(i32 %_su2) nounwind readnone
849   %_v = tail call <4 x i32> @smear_4i32(i32 %_sv2) nounwind readnone
851   %mu = load <4 x i32>, <4 x i32>* @yuv_U, align 8
852   %mv = load <4 x i32>, <4 x i32>* @yuv_V, align 8
853   %_u2 = mul <4 x i32> %_u, %mu
854   %_v2 = mul <4 x i32> %_v, %mv
855   %_y2 = add <4 x i32> %_y, %_u2
856   %_y3 = add <4 x i32> %_y2, %_v2
858 …; %r1 = tail call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %_y3, <4 x i32> <i32 8, i32 …
859 ;  %r2 = trunc <4 x i16> %r1 to <4 x i8>
860 ;  ret <4 x i8> %r2
862   %c0 = load <4 x i32>, <4 x i32>* @yuv_0, align 8
863   %c255 = load <4 x i32>, <4 x i32>* @yuv_255, align 8
864 …%r1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %_y3, <4 x i32> %c0) nounwind readn…
865 …%r2 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %r1, <4 x i32> %c255) nounwind read…
866   %r3 = lshr <4 x i32> %r2, <i32 8, i32 8, i32 8, i32 8>
867   %r4 = trunc <4 x i32> %r3 to <4 x i8>
868   ret <4 x i8> %r4
875 define <2 x float> @_Z10half_recipDv2_f(<2 x float> %v) nounwind readnone {
876   %1 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %v) nounwind readnone
877 …%2 = tail call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %1, <2 x float> %v) nounwind re…
878   %3 = fmul <2 x float> %1, %2
879 …%4 = tail call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %3, <2 x float> %v) nounwind re…
880   %5 = fmul <2 x float> %4, %3
881   ret <2 x float> %5
884 define <4 x float> @_Z10half_recipDv4_f(<4 x float> %v) nounwind readnone {
885   %1 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %v) nounwind readnone
886 …%2 = tail call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %1, <4 x float> %v) nounwind re…
887   %3 = fmul <4 x float> %1, %2
888 …%4 = tail call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %3, <4 x float> %v) nounwind re…
889   %5 = fmul <4 x float> %4, %3
890   ret <4 x float> %5
893 define <3 x float> @_Z10half_recipDv3_f(<3 x float> %v) nounwind readnone {
894   %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
895   %2 = tail call <4 x float> @_Z10half_recipDv4_f(<4 x float> %1) nounwind readnone
896   %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
897   ret <3 x float> %3
906   %1 = insertelement <2 x float> undef, float %v, i32 0
907   %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone
908   %3 = extractelement <2 x float> %2, i32 0
912 define <2 x float> @_Z10half_rsqrtDv2_f(<2 x float> %v) nounwind readnone {
913   %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone
914   ret <2 x float> %1
917 define <3 x float> @_Z10half_rsqrtDv3_f(<3 x float> %v) nounwind readnone {
918   %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
919   %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone
920   %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
921   ret <3 x float> %3
924 define <4 x float> @_Z10half_rsqrtDv4_f(<4 x float> %v) nounwind readnone {
925   %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone
926   ret <4 x float> %1
933 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
935 %struct.rs_matrix4x4 = type { [16 x float] }
936 %struct.rs_matrix3x3 = type { [9 x float] }
937 %struct.rs_matrix2x2 = type { [4 x float] }
939 define internal <4 x float> @smear_f(float %in) nounwind readnone alwaysinline {
940   %1 = insertelement <4 x float> undef, float %in, i32 0
941   %2 = insertelement <4 x float> %1, float %in, i32 1
942   %3 = insertelement <4 x float> %2, float %in, i32 2
943   %4 = insertelement <4 x float> %3, float %in, i32 3
944   ret <4 x float> %4
948 define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <…
949   %x0 = extractelement <3 x float> %in, i32 0
950   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
951   %y0 = extractelement <3 x float> %in, i32 1
952   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
953   %z0 = extractelement <3 x float> %in, i32 2
954   %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
958   %xm = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %px2, i32 4) nounwind
962   %ym = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %py2, i32 4) nounwind
966   %zm2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %pz2, i32 4) nounwind
967   %zm = shufflevector <4 x float> %zm2, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
969   %a1 = fmul <4 x float> %x, %xm
970   %a2 = fmul <4 x float> %y, %ym
971   %a3 = fadd <4 x float> %a1, %a2
972   %a4 = fmul <4 x float> %z, %zm
973   %a5 = fadd <4 x float> %a4, %a3
974   %a6 = shufflevector <4 x float> %a5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
975   ret <3 x float> %a6
978 define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <…
979   %x0 = extractelement <2 x float> %in, i32 0
980   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
981   %y0 = extractelement <2 x float> %in, i32 1
982   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
985   %px2 = bitcast float* %px to <4 x float>*
986   %xm = load <4 x float>, <4 x float>* %px2, align 4
988   %py2 = bitcast float* %py to <4 x float>*
989   %ym = load <4 x float>, <4 x float>* %py2, align 4
991   %a1 = fmul <4 x float> %x, %xm
992   %a2 = fmul <4 x float> %y, %ym
993   %a3 = fadd <4 x float> %a1, %a2
994   %a4 = shufflevector <4 x float> %a3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
995   ret <3 x float> %a4
998 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <…
999   %x0 = extractelement <4 x float> %in, i32 0
1000   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1001   %y0 = extractelement <4 x float> %in, i32 1
1002   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1003   %z0 = extractelement <4 x float> %in, i32 2
1004   %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
1005   %w0 = extractelement <4 x float> %in, i32 3
1006   %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone
1009   %px2 = bitcast float* %px to <4 x float>*
1010   %xm = load <4 x float>, <4 x float>* %px2, align 4
1012   %py2 = bitcast float* %py to <4 x float>*
1013   %ym = load <4 x float>, <4 x float>* %py2, align 4
1015   %pz2 = bitcast float* %pz to <4 x float>*
1016   %zm = load <4 x float>, <4 x float>* %pz2, align 4
1018   %pw2 = bitcast float* %pw to <4 x float>*
1019   %wm = load <4 x float>, <4 x float>* %pw2, align 4
1021   %a1 = fmul <4 x float> %x, %xm
1022   %a2 = fmul <4 x float> %y, %ym
1023   %a3 = fadd <4 x float> %a1, %a2
1024   %a4 = fmul <4 x float> %z, %zm
1025   %a5 = fadd <4 x float> %a3, %a4
1026   %a6 = fmul <4 x float> %w, %wm
1027   %a7 = fadd <4 x float> %a5, %a6
1028   ret <4 x float> %a7
1031 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <…
1032   %x0 = extractelement <3 x float> %in, i32 0
1033   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1034   %y0 = extractelement <3 x float> %in, i32 1
1035   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1036   %z0 = extractelement <3 x float> %in, i32 2
1037   %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
1040   %px2 = bitcast float* %px to <4 x float>*
1041   %xm = load <4 x float>, <4 x float>* %px2, align 4
1043   %py2 = bitcast float* %py to <4 x float>*
1044   %ym = load <4 x float>, <4 x float>* %py2, align 4
1046   %pz2 = bitcast float* %pz to <4 x float>*
1047   %zm = load <4 x float>, <4 x float>* %pz2, align 4
1049   %pw2 = bitcast float* %pw to <4 x float>*
1050   %wm = load <4 x float>, <4 x float>* %pw2, align 4
1052   %a1 = fmul <4 x float> %x, %xm
1053   %a2 = fadd <4 x float> %wm, %a1
1054   %a3 = fmul <4 x float> %y, %ym
1055   %a4 = fadd <4 x float> %a2, %a3
1056   %a5 = fmul <4 x float> %z, %zm
1057   %a6 = fadd <4 x float> %a4, %a5
1058   ret <4 x float> %a6
1061 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <…
1062   %x0 = extractelement <2 x float> %in, i32 0
1063   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1064   %y0 = extractelement <2 x float> %in, i32 1
1065   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1068   %px2 = bitcast float* %px to <4 x float>*
1069   %xm = load <4 x float>, <4 x float>* %px2, align 4
1071   %py2 = bitcast float* %py to <4 x float>*
1072   %ym = load <4 x float>, <4 x float>* %py2, align 4
1074   %pw2 = bitcast float* %pw to <4 x float>*
1075   %wm = load <4 x float>, <4 x float>* %pw2, align 4
1077   %a1 = fmul <4 x float> %x, %xm
1078   %a2 = fadd <4 x float> %wm, %a1
1079   %a3 = fmul <4 x float> %y, %ym
1080   %a4 = fadd <4 x float> %a2, %a3
1081   ret <4 x float> %a4
1091 @fc_255.0 = internal constant <4 x float> <float 255.0, float 255.0, float 255.0, float 255.0>, ali…
1092 @fc_0.5 = internal constant <4 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, align 16
1093 @fc_0 = internal constant <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, align 16
1095 declare <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %in) nounwind readnone
1096 declare <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> %in) nounwind readnone
1099 define <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %color) nounwind readnone {
1100     %f255 = load <4 x float>, <4 x float>* @fc_255.0, align 16
1101     %f05 = load <4 x float>, <4 x float>* @fc_0.5, align 16
1102     %f0 = load <4 x float>, <4 x float>* @fc_0, align 16
1103     %v1 = fmul <4 x float> %f255, %color
1104     %v2 = fadd <4 x float> %f05, %v1
1105 …%v3 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %v2, <4 x float> %f0, <4 x float> %f255…
1106     %v4 = tail call <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %v3) nounwind readnone
1107     ret <4 x i8> %v4
1111 define <4 x i8> @_Z17rsPackColorTo8888Dv3_f(<3 x float> %color) nounwind readnone {
1112     %1 = shufflevector <3 x float> %color, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1113     %2 = insertelement <4 x float> %1, float 1.0, i32 3
1114     %3 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %2) nounwind readnone
1115     ret <4 x i8> %3
1119 define <4 x i8> @_Z17rsPackColorTo8888fff(float %r, float %g, float %b) nounwind readnone {
1120     %1 = insertelement <4 x float> undef, float %r, i32 0
1121     %2 = insertelement <4 x float> %1, float %g, i32 1
1122     %3 = insertelement <4 x float> %2, float %b, i32 2
1123     %4 = insertelement <4 x float> %3, float 1.0, i32 3
1124     %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
1125     ret <4 x i8> %5
1129 define <4 x i8> @_Z17rsPackColorTo8888ffff(float %r, float %g, float %b, float %a) nounwind readnon…
1130     %1 = insertelement <4 x float> undef, float %r, i32 0
1131     %2 = insertelement <4 x float> %1, float %g, i32 1
1132     %3 = insertelement <4 x float> %2, float %b, i32 2
1133     %4 = insertelement <4 x float> %3, float %a, i32 3
1134     %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
1135     ret <4 x i8> %5