• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2 // RUN:     -fallow-half-arguments-and-returns -ffp-contract=fast -S -emit-llvm -o - %s \
3 // RUN: | opt -S -mem2reg \
4 // RUN: | FileCheck %s
5 
6 // Test new aarch64 intrinsics and types
7 
8 #include <arm_neon.h>
9 
10 // CHECK-LABEL: define <8 x i8> @test_vadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
11 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
12 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vadd_s8(int8x8_t v1,int8x8_t v2)13 int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
14   return vadd_s8(v1, v2);
15 }
16 
17 // CHECK-LABEL: define <4 x i16> @test_vadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
18 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
19 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vadd_s16(int16x4_t v1,int16x4_t v2)20 int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
21   return vadd_s16(v1, v2);
22 }
23 
24 // CHECK-LABEL: define <2 x i32> @test_vadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
25 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
26 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vadd_s32(int32x2_t v1,int32x2_t v2)27 int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
28   return vadd_s32(v1, v2);
29 }
30 
31 // CHECK-LABEL: define <1 x i64> @test_vadd_s64(<1 x i64> %v1, <1 x i64> %v2) #0 {
32 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
33 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vadd_s64(int64x1_t v1,int64x1_t v2)34 int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) {
35   return vadd_s64(v1, v2);
36 }
37 
38 // CHECK-LABEL: define <2 x float> @test_vadd_f32(<2 x float> %v1, <2 x float> %v2) #0 {
39 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2
40 // CHECK:   ret <2 x float> [[ADD_I]]
test_vadd_f32(float32x2_t v1,float32x2_t v2)41 float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
42   return vadd_f32(v1, v2);
43 }
44 
45 // CHECK-LABEL: define <8 x i8> @test_vadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
46 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
47 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vadd_u8(uint8x8_t v1,uint8x8_t v2)48 uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
49   return vadd_u8(v1, v2);
50 }
51 
52 // CHECK-LABEL: define <4 x i16> @test_vadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
53 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
54 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vadd_u16(uint16x4_t v1,uint16x4_t v2)55 uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
56   return vadd_u16(v1, v2);
57 }
58 
59 // CHECK-LABEL: define <2 x i32> @test_vadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
60 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
61 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vadd_u32(uint32x2_t v1,uint32x2_t v2)62 uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) {
63   return vadd_u32(v1, v2);
64 }
65 
66 // CHECK-LABEL: define <1 x i64> @test_vadd_u64(<1 x i64> %v1, <1 x i64> %v2) #0 {
67 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
68 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vadd_u64(uint64x1_t v1,uint64x1_t v2)69 uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) {
70   return vadd_u64(v1, v2);
71 }
72 
73 // CHECK-LABEL: define <16 x i8> @test_vaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
74 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
75 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vaddq_s8(int8x16_t v1,int8x16_t v2)76 int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
77   return vaddq_s8(v1, v2);
78 }
79 
80 // CHECK-LABEL: define <8 x i16> @test_vaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
81 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
82 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddq_s16(int16x8_t v1,int16x8_t v2)83 int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
84   return vaddq_s16(v1, v2);
85 }
86 
87 // CHECK-LABEL: define <4 x i32> @test_vaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
88 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
89 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddq_s32(int32x4_t v1,int32x4_t v2)90 int32x4_t test_vaddq_s32(int32x4_t v1,int32x4_t  v2) {
91   return vaddq_s32(v1, v2);
92 }
93 
94 // CHECK-LABEL: define <2 x i64> @test_vaddq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
95 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
96 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddq_s64(int64x2_t v1,int64x2_t v2)97 int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
98   return vaddq_s64(v1, v2);
99 }
100 
101 // CHECK-LABEL: define <4 x float> @test_vaddq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
102 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2
103 // CHECK:   ret <4 x float> [[ADD_I]]
test_vaddq_f32(float32x4_t v1,float32x4_t v2)104 float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
105   return vaddq_f32(v1, v2);
106 }
107 
108 // CHECK-LABEL: define <2 x double> @test_vaddq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
109 // CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2
110 // CHECK:   ret <2 x double> [[ADD_I]]
test_vaddq_f64(float64x2_t v1,float64x2_t v2)111 float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) {
112   return vaddq_f64(v1, v2);
113 }
114 
115 // CHECK-LABEL: define <16 x i8> @test_vaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
116 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
117 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vaddq_u8(uint8x16_t v1,uint8x16_t v2)118 uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) {
119   return vaddq_u8(v1, v2);
120 }
121 
122 // CHECK-LABEL: define <8 x i16> @test_vaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
123 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
124 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddq_u16(uint16x8_t v1,uint16x8_t v2)125 uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) {
126   return vaddq_u16(v1, v2);
127 }
128 
129 // CHECK-LABEL: define <4 x i32> @test_vaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
130 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
131 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddq_u32(uint32x4_t v1,uint32x4_t v2)132 uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) {
133   return vaddq_u32(v1, v2);
134 }
135 
136 // CHECK-LABEL: define <2 x i64> @test_vaddq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
137 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
138 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddq_u64(uint64x2_t v1,uint64x2_t v2)139 uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) {
140   return vaddq_u64(v1, v2);
141 }
142 
143 // CHECK-LABEL: define <8 x i8> @test_vsub_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
144 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
145 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vsub_s8(int8x8_t v1,int8x8_t v2)146 int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) {
147   return vsub_s8(v1, v2);
148 }
149 // CHECK-LABEL: define <4 x i16> @test_vsub_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
150 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
151 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vsub_s16(int16x4_t v1,int16x4_t v2)152 int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) {
153   return vsub_s16(v1, v2);
154 }
155 // CHECK-LABEL: define <2 x i32> @test_vsub_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
156 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
157 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vsub_s32(int32x2_t v1,int32x2_t v2)158 int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) {
159   return vsub_s32(v1, v2);
160 }
161 
162 // CHECK-LABEL: define <1 x i64> @test_vsub_s64(<1 x i64> %v1, <1 x i64> %v2) #0 {
163 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
164 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vsub_s64(int64x1_t v1,int64x1_t v2)165 int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) {
166   return vsub_s64(v1, v2);
167 }
168 
169 // CHECK-LABEL: define <2 x float> @test_vsub_f32(<2 x float> %v1, <2 x float> %v2) #0 {
170 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2
171 // CHECK:   ret <2 x float> [[SUB_I]]
test_vsub_f32(float32x2_t v1,float32x2_t v2)172 float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
173   return vsub_f32(v1, v2);
174 }
175 
176 // CHECK-LABEL: define <8 x i8> @test_vsub_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
177 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
178 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vsub_u8(uint8x8_t v1,uint8x8_t v2)179 uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) {
180   return vsub_u8(v1, v2);
181 }
182 
183 // CHECK-LABEL: define <4 x i16> @test_vsub_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
184 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
185 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vsub_u16(uint16x4_t v1,uint16x4_t v2)186 uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) {
187   return vsub_u16(v1, v2);
188 }
189 
190 // CHECK-LABEL: define <2 x i32> @test_vsub_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
191 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
192 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vsub_u32(uint32x2_t v1,uint32x2_t v2)193 uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) {
194   return vsub_u32(v1, v2);
195 }
196 
197 // CHECK-LABEL: define <1 x i64> @test_vsub_u64(<1 x i64> %v1, <1 x i64> %v2) #0 {
198 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
199 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vsub_u64(uint64x1_t v1,uint64x1_t v2)200 uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) {
201   return vsub_u64(v1, v2);
202 }
203 
204 // CHECK-LABEL: define <16 x i8> @test_vsubq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
205 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
206 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vsubq_s8(int8x16_t v1,int8x16_t v2)207 int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) {
208   return vsubq_s8(v1, v2);
209 }
210 
211 // CHECK-LABEL: define <8 x i16> @test_vsubq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
212 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
213 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubq_s16(int16x8_t v1,int16x8_t v2)214 int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) {
215   return vsubq_s16(v1, v2);
216 }
217 
218 // CHECK-LABEL: define <4 x i32> @test_vsubq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
219 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
220 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubq_s32(int32x4_t v1,int32x4_t v2)221 int32x4_t test_vsubq_s32(int32x4_t v1,int32x4_t  v2) {
222   return vsubq_s32(v1, v2);
223 }
224 
225 // CHECK-LABEL: define <2 x i64> @test_vsubq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
226 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
227 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubq_s64(int64x2_t v1,int64x2_t v2)228 int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) {
229   return vsubq_s64(v1, v2);
230 }
231 
232 // CHECK-LABEL: define <4 x float> @test_vsubq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
233 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2
234 // CHECK:   ret <4 x float> [[SUB_I]]
test_vsubq_f32(float32x4_t v1,float32x4_t v2)235 float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
236   return vsubq_f32(v1, v2);
237 }
238 
239 // CHECK-LABEL: define <2 x double> @test_vsubq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
240 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2
241 // CHECK:   ret <2 x double> [[SUB_I]]
test_vsubq_f64(float64x2_t v1,float64x2_t v2)242 float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
243   return vsubq_f64(v1, v2);
244 }
245 
246 // CHECK-LABEL: define <16 x i8> @test_vsubq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
247 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
248 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vsubq_u8(uint8x16_t v1,uint8x16_t v2)249 uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) {
250   return vsubq_u8(v1, v2);
251 }
252 
253 // CHECK-LABEL: define <8 x i16> @test_vsubq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
254 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
255 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubq_u16(uint16x8_t v1,uint16x8_t v2)256 uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) {
257   return vsubq_u16(v1, v2);
258 }
259 
260 // CHECK-LABEL: define <4 x i32> @test_vsubq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
261 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
262 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubq_u32(uint32x4_t v1,uint32x4_t v2)263 uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) {
264   return vsubq_u32(v1, v2);
265 }
266 
267 // CHECK-LABEL: define <2 x i64> @test_vsubq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
268 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
269 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubq_u64(uint64x2_t v1,uint64x2_t v2)270 uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) {
271   return vsubq_u64(v1, v2);
272 }
273 
274 // CHECK-LABEL: define <8 x i8> @test_vmul_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
275 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
276 // CHECK:   ret <8 x i8> [[MUL_I]]
test_vmul_s8(int8x8_t v1,int8x8_t v2)277 int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) {
278   return vmul_s8(v1, v2);
279 }
280 
281 // CHECK-LABEL: define <4 x i16> @test_vmul_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
282 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
283 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_s16(int16x4_t v1,int16x4_t v2)284 int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) {
285   return vmul_s16(v1, v2);
286 }
287 
288 // CHECK-LABEL: define <2 x i32> @test_vmul_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
289 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
290 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_s32(int32x2_t v1,int32x2_t v2)291 int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) {
292   return vmul_s32(v1, v2);
293 }
294 
295 // CHECK-LABEL: define <2 x float> @test_vmul_f32(<2 x float> %v1, <2 x float> %v2) #0 {
296 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2
297 // CHECK:   ret <2 x float> [[MUL_I]]
test_vmul_f32(float32x2_t v1,float32x2_t v2)298 float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
299   return vmul_f32(v1, v2);
300 }
301 
302 
303 // CHECK-LABEL: define <8 x i8> @test_vmul_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
304 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
305 // CHECK:   ret <8 x i8> [[MUL_I]]
test_vmul_u8(uint8x8_t v1,uint8x8_t v2)306 uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) {
307   return vmul_u8(v1, v2);
308 }
309 
310 // CHECK-LABEL: define <4 x i16> @test_vmul_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
311 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
312 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_u16(uint16x4_t v1,uint16x4_t v2)313 uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) {
314   return vmul_u16(v1, v2);
315 }
316 
317 // CHECK-LABEL: define <2 x i32> @test_vmul_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
318 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
319 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_u32(uint32x2_t v1,uint32x2_t v2)320 uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) {
321   return vmul_u32(v1, v2);
322 }
323 
324 // CHECK-LABEL: define <16 x i8> @test_vmulq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
325 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
326 // CHECK:   ret <16 x i8> [[MUL_I]]
test_vmulq_s8(int8x16_t v1,int8x16_t v2)327 int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) {
328   return vmulq_s8(v1, v2);
329 }
330 
331 // CHECK-LABEL: define <8 x i16> @test_vmulq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
332 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
333 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_s16(int16x8_t v1,int16x8_t v2)334 int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) {
335   return vmulq_s16(v1, v2);
336 }
337 
338 // CHECK-LABEL: define <4 x i32> @test_vmulq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
339 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
340 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_s32(int32x4_t v1,int32x4_t v2)341 int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) {
342   return vmulq_s32(v1, v2);
343 }
344 
345 // CHECK-LABEL: define <16 x i8> @test_vmulq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
346 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
347 // CHECK:   ret <16 x i8> [[MUL_I]]
test_vmulq_u8(uint8x16_t v1,uint8x16_t v2)348 uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) {
349   return vmulq_u8(v1, v2);
350 }
351 
352 // CHECK-LABEL: define <8 x i16> @test_vmulq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
353 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
354 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_u16(uint16x8_t v1,uint16x8_t v2)355 uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) {
356   return vmulq_u16(v1, v2);
357 }
358 
359 // CHECK-LABEL: define <4 x i32> @test_vmulq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
360 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
361 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_u32(uint32x4_t v1,uint32x4_t v2)362 uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) {
363   return vmulq_u32(v1, v2);
364 }
365 
366 // CHECK-LABEL: define <4 x float> @test_vmulq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
367 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2
368 // CHECK:   ret <4 x float> [[MUL_I]]
test_vmulq_f32(float32x4_t v1,float32x4_t v2)369 float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
370   return vmulq_f32(v1, v2);
371 }
372 
373 // CHECK-LABEL: define <2 x double> @test_vmulq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
374 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2
375 // CHECK:   ret <2 x double> [[MUL_I]]
test_vmulq_f64(float64x2_t v1,float64x2_t v2)376 float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
377   return vmulq_f64(v1, v2);
378 }
379 
380 // CHECK-LABEL: define <8 x i8> @test_vmul_p8(<8 x i8> %v1, <8 x i8> %v2) #0 {
381 // CHECK:   [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
382 // CHECK:   ret <8 x i8> [[VMUL_V_I]]
test_vmul_p8(poly8x8_t v1,poly8x8_t v2)383 poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) {
384   //  test_vmul_p8
385   return vmul_p8(v1, v2);
386   //  pmul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
387 }
388 
389 // CHECK-LABEL: define <16 x i8> @test_vmulq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 {
390 // CHECK:   [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
391 // CHECK:   ret <16 x i8> [[VMULQ_V_I]]
test_vmulq_p8(poly8x16_t v1,poly8x16_t v2)392 poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) {
393   // test_vmulq_p8
394   return vmulq_p8(v1, v2);
395   // pmul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
396 }
397 
398 
399 // CHECK-LABEL: define <8 x i8> @test_vmla_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
400 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
401 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
402 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vmla_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)403 int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
404   return vmla_s8(v1, v2, v3);
405 }
406 
407 // CHECK-LABEL: define <8 x i8> @test_vmla_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
408 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
409 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
410 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
411 // CHECK:   ret <8 x i8> [[TMP0]]
test_vmla_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)412 int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
413   return vmla_s16(v1, v2, v3);
414 }
415 
416 // CHECK-LABEL: define <2 x i32> @test_vmla_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
417 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
418 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
419 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)420 int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
421   return vmla_s32(v1, v2, v3);
422 }
423 
424 // CHECK-LABEL: define <2 x float> @test_vmla_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
425 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
426 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]]
427 // CHECK:   ret <2 x float> [[ADD_I]]
test_vmla_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)428 float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
429   return vmla_f32(v1, v2, v3);
430 }
431 
432 // CHECK-LABEL: define <8 x i8> @test_vmla_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
433 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
434 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
435 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vmla_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)436 uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
437   return vmla_u8(v1, v2, v3);
438 }
439 
440 // CHECK-LABEL: define <4 x i16> @test_vmla_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
441 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
442 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
443 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vmla_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)444 uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
445   return vmla_u16(v1, v2, v3);
446 }
447 
448 // CHECK-LABEL: define <2 x i32> @test_vmla_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
449 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
450 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
451 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)452 uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
453   return vmla_u32(v1, v2, v3);
454 }
455 
456 // CHECK-LABEL: define <16 x i8> @test_vmlaq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
457 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
458 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
459 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vmlaq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)460 int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
461   return vmlaq_s8(v1, v2, v3);
462 }
463 
464 // CHECK-LABEL: define <8 x i16> @test_vmlaq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
465 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
466 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
467 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)468 int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
469   return vmlaq_s16(v1, v2, v3);
470 }
471 
472 // CHECK-LABEL: define <4 x i32> @test_vmlaq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
473 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
474 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
475 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)476 int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
477   return vmlaq_s32(v1, v2, v3);
478 }
479 
480 // CHECK-LABEL: define <4 x float> @test_vmlaq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
481 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
482 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]]
483 // CHECK:   ret <4 x float> [[ADD_I]]
test_vmlaq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)484 float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
485   return vmlaq_f32(v1, v2, v3);
486 }
487 
488 // CHECK-LABEL: define <16 x i8> @test_vmlaq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
489 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
490 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
491 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vmlaq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)492 uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
493   return vmlaq_u8(v1, v2, v3);
494 }
495 
496 // CHECK-LABEL: define <8 x i16> @test_vmlaq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
497 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
498 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
499 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)500 uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
501   return vmlaq_u16(v1, v2, v3);
502 }
503 
504 // CHECK-LABEL: define <4 x i32> @test_vmlaq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
505 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
506 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
507 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)508 uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
509   return vmlaq_u32(v1, v2, v3);
510 }
511 
512 // CHECK-LABEL: define <2 x double> @test_vmlaq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
513 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
514 // CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]]
515 // CHECK:   ret <2 x double> [[ADD_I]]
test_vmlaq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)516 float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
517   return vmlaq_f64(v1, v2, v3);
518 }
519 
520 // CHECK-LABEL: define <8 x i8> @test_vmls_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
521 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
522 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
523 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vmls_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)524 int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
525   return vmls_s8(v1, v2, v3);
526 }
527 
528 // CHECK-LABEL: define <8 x i8> @test_vmls_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
529 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
530 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
531 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
532 // CHECK:   ret <8 x i8> [[TMP0]]
test_vmls_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)533 int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
534   return vmls_s16(v1, v2, v3);
535 }
536 
537 // CHECK-LABEL: define <2 x i32> @test_vmls_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
538 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
539 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
540 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)541 int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
542   return vmls_s32(v1, v2, v3);
543 }
544 
545 // CHECK-LABEL: define <2 x float> @test_vmls_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
546 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
547 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]]
548 // CHECK:   ret <2 x float> [[SUB_I]]
test_vmls_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)549 float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
550   return vmls_f32(v1, v2, v3);
551 }
552 
553 // CHECK-LABEL: define <8 x i8> @test_vmls_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
554 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
555 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
556 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vmls_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)557 uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
558   return vmls_u8(v1, v2, v3);
559 }
560 
561 // CHECK-LABEL: define <4 x i16> @test_vmls_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
562 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
563 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
564 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vmls_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)565 uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
566   return vmls_u16(v1, v2, v3);
567 }
568 
569 // CHECK-LABEL: define <2 x i32> @test_vmls_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
570 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
571 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
572 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)573 uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
574   return vmls_u32(v1, v2, v3);
575 }
576 // CHECK-LABEL: define <16 x i8> @test_vmlsq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
577 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
578 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
579 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vmlsq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)580 int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
581   return vmlsq_s8(v1, v2, v3);
582 }
583 
584 // CHECK-LABEL: define <8 x i16> @test_vmlsq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
585 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
586 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
587 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)588 int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
589   return vmlsq_s16(v1, v2, v3);
590 }
591 
592 // CHECK-LABEL: define <4 x i32> @test_vmlsq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
593 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
594 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
595 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)596 int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
597   return vmlsq_s32(v1, v2, v3);
598 }
599 
600 // CHECK-LABEL: define <4 x float> @test_vmlsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
601 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
602 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]]
603 // CHECK:   ret <4 x float> [[SUB_I]]
test_vmlsq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)604 float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
605   return vmlsq_f32(v1, v2, v3);
606 }
607 // CHECK-LABEL: define <16 x i8> @test_vmlsq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
608 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
609 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
610 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vmlsq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)611 uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
612   return vmlsq_u8(v1, v2, v3);
613 }
614 
615 // CHECK-LABEL: define <8 x i16> @test_vmlsq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
616 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
617 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
618 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)619 uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
620   return vmlsq_u16(v1, v2, v3);
621 }
622 
623 // CHECK-LABEL: define <4 x i32> @test_vmlsq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
624 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
625 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
626 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)627 uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
628   return vmlsq_u32(v1, v2, v3);
629 }
630 
631 // CHECK-LABEL: define <2 x double> @test_vmlsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
632 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
633 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]]
634 // CHECK:   ret <2 x double> [[SUB_I]]
test_vmlsq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)635 float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
636   return vmlsq_f64(v1, v2, v3);
637 }
638 // CHECK-LABEL: define <2 x float> @test_vfma_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
639 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
640 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
641 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
642 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
643 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
644 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
645 // CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4
646 // CHECK:   ret <2 x float> [[TMP6]]
test_vfma_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)647 float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
648   return vfma_f32(v1, v2, v3);
649 }
650 
651 // CHECK-LABEL: define <4 x float> @test_vfmaq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
652 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
653 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
654 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
655 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
656 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
657 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
658 // CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4
659 // CHECK:   ret <4 x float> [[TMP6]]
test_vfmaq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)660 float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
661   return vfmaq_f32(v1, v2, v3);
662 }
663 
664 // CHECK-LABEL: define <2 x double> @test_vfmaq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
665 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
666 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
667 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
668 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
669 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
670 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
671 // CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #4
672 // CHECK:   ret <2 x double> [[TMP6]]
test_vfmaq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)673 float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
674   return vfmaq_f64(v1, v2, v3);
675 }
676 // CHECK-LABEL: define <2 x float> @test_vfms_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
677 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v2
678 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
679 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
680 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
681 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
682 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
683 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
684 // CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4
685 // CHECK:   ret <2 x float> [[TMP6]]
test_vfms_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)686 float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
687   return vfms_f32(v1, v2, v3);
688 }
689 
690 // CHECK-LABEL: define <4 x float> @test_vfmsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
691 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v2
692 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
693 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
694 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
695 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
696 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
697 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
698 // CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4
699 // CHECK:   ret <4 x float> [[TMP6]]
test_vfmsq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)700 float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
701   return vfmsq_f32(v1, v2, v3);
702 }
703 
704 // CHECK-LABEL: define <2 x double> @test_vfmsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
705 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v2
706 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
707 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
708 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
709 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
710 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
711 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
712 // CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #4
713 // CHECK:   ret <2 x double> [[TMP6]]
test_vfmsq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)714 float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
715   return vfmsq_f64(v1, v2, v3);
716 }
717 
718 // CHECK-LABEL: define <2 x double> @test_vdivq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
719 // CHECK:   [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2
720 // CHECK:   ret <2 x double> [[DIV_I]]
test_vdivq_f64(float64x2_t v1,float64x2_t v2)721 float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
722   return vdivq_f64(v1, v2);
723 }
724 
725 // CHECK-LABEL: define <4 x float> @test_vdivq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
726 // CHECK:   [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2
727 // CHECK:   ret <4 x float> [[DIV_I]]
test_vdivq_f32(float32x4_t v1,float32x4_t v2)728 float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
729   return vdivq_f32(v1, v2);
730 }
731 
732 // CHECK-LABEL: define <2 x float> @test_vdiv_f32(<2 x float> %v1, <2 x float> %v2) #0 {
733 // CHECK:   [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2
734 // CHECK:   ret <2 x float> [[DIV_I]]
test_vdiv_f32(float32x2_t v1,float32x2_t v2)735 float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
736   return vdiv_f32(v1, v2);
737 }
738 
739 // CHECK-LABEL: define <8 x i8> @test_vaba_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
740 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4
741 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
742 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vaba_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)743 int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
744   return vaba_s8(v1, v2, v3);
745 }
746 
747 // CHECK-LABEL: define <4 x i16> @test_vaba_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
748 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
749 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
750 // CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
751 // CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
752 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
753 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
754 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vaba_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)755 int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
756   return vaba_s16(v1, v2, v3);
757 }
758 
759 // CHECK-LABEL: define <2 x i32> @test_vaba_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
760 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
761 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
762 // CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
763 // CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
764 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
765 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
766 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vaba_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)767 int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
768   return vaba_s32(v1, v2, v3);
769 }
770 
771 // CHECK-LABEL: define <8 x i8> @test_vaba_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
772 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4
773 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
774 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vaba_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)775 uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
776   return vaba_u8(v1, v2, v3);
777 }
778 
779 // CHECK-LABEL: define <4 x i16> @test_vaba_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
780 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
781 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
782 // CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
783 // CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
784 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
785 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
786 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vaba_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)787 uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
788   return vaba_u16(v1, v2, v3);
789 }
790 
791 // CHECK-LABEL: define <2 x i32> @test_vaba_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
792 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
793 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
794 // CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
795 // CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
796 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
797 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
798 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vaba_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)799 uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
800   return vaba_u32(v1, v2, v3);
801 }
802 
803 // CHECK-LABEL: define <16 x i8> @test_vabaq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
804 // CHECK:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4
805 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
806 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vabaq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)807 int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
808   return vabaq_s8(v1, v2, v3);
809 }
810 
811 // CHECK-LABEL: define <8 x i16> @test_vabaq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
812 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
813 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
814 // CHECK:   [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
815 // CHECK:   [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
816 // CHECK:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4
817 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
818 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabaq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)819 int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
820   return vabaq_s16(v1, v2, v3);
821 }
822 
823 // CHECK-LABEL: define <4 x i32> @test_vabaq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
824 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
825 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
826 // CHECK:   [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
827 // CHECK:   [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
828 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4
829 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
830 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabaq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)831 int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
832   return vabaq_s32(v1, v2, v3);
833 }
834 
835 // CHECK-LABEL: define <16 x i8> @test_vabaq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
836 // CHECK:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4
837 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
838 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vabaq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)839 uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
840   return vabaq_u8(v1, v2, v3);
841 }
842 
843 // CHECK-LABEL: define <8 x i16> @test_vabaq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
844 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
845 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
846 // CHECK:   [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
847 // CHECK:   [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
848 // CHECK:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4
849 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
850 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabaq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)851 uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
852   return vabaq_u16(v1, v2, v3);
853 }
854 
855 // CHECK-LABEL: define <4 x i32> @test_vabaq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
856 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
857 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
858 // CHECK:   [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
859 // CHECK:   [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
860 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4
861 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
862 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabaq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)863 uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
864   return vabaq_u32(v1, v2, v3);
865 }
866 
867 // CHECK-LABEL: define <8 x i8> @test_vabd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
868 // CHECK:   [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
869 // CHECK:   ret <8 x i8> [[VABD_I]]
test_vabd_s8(int8x8_t v1,int8x8_t v2)870 int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
871   return vabd_s8(v1, v2);
872 }
873 
874 // CHECK-LABEL: define <4 x i16> @test_vabd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
875 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
876 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
877 // CHECK:   [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
878 // CHECK:   [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
879 // CHECK:   [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4
880 // CHECK:   ret <4 x i16> [[VABD2_I]]
test_vabd_s16(int16x4_t v1,int16x4_t v2)881 int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
882   return vabd_s16(v1, v2);
883 }
884 
885 // CHECK-LABEL: define <2 x i32> @test_vabd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
886 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
887 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
888 // CHECK:   [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
889 // CHECK:   [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
890 // CHECK:   [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4
891 // CHECK:   ret <2 x i32> [[VABD2_I]]
test_vabd_s32(int32x2_t v1,int32x2_t v2)892 int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
893   return vabd_s32(v1, v2);
894 }
895 
896 // CHECK-LABEL: define <8 x i8> @test_vabd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
897 // CHECK:   [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
898 // CHECK:   ret <8 x i8> [[VABD_I]]
test_vabd_u8(uint8x8_t v1,uint8x8_t v2)899 uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
900   return vabd_u8(v1, v2);
901 }
902 
903 // CHECK-LABEL: define <4 x i16> @test_vabd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
904 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
905 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
906 // CHECK:   [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
907 // CHECK:   [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
908 // CHECK:   [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4
909 // CHECK:   ret <4 x i16> [[VABD2_I]]
test_vabd_u16(uint16x4_t v1,uint16x4_t v2)910 uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
911   return vabd_u16(v1, v2);
912 }
913 
914 // CHECK-LABEL: define <2 x i32> @test_vabd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
915 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
916 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
917 // CHECK:   [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
918 // CHECK:   [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
919 // CHECK:   [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4
920 // CHECK:   ret <2 x i32> [[VABD2_I]]
test_vabd_u32(uint32x2_t v1,uint32x2_t v2)921 uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
922   return vabd_u32(v1, v2);
923 }
924 
925 // CHECK-LABEL: define <2 x float> @test_vabd_f32(<2 x float> %v1, <2 x float> %v2) #0 {
926 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
927 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
928 // CHECK:   [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
929 // CHECK:   [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
930 // CHECK:   [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> [[VABD_I]], <2 x float> [[VABD1_I]]) #4
931 // CHECK:   ret <2 x float> [[VABD2_I]]
test_vabd_f32(float32x2_t v1,float32x2_t v2)932 float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
933   return vabd_f32(v1, v2);
934 }
935 
936 // CHECK-LABEL: define <16 x i8> @test_vabdq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
937 // CHECK:   [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
938 // CHECK:   ret <16 x i8> [[VABD_I]]
test_vabdq_s8(int8x16_t v1,int8x16_t v2)939 int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
940   return vabdq_s8(v1, v2);
941 }
942 
943 // CHECK-LABEL: define <8 x i16> @test_vabdq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
944 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
945 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
946 // CHECK:   [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
947 // CHECK:   [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
948 // CHECK:   [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4
949 // CHECK:   ret <8 x i16> [[VABD2_I]]
test_vabdq_s16(int16x8_t v1,int16x8_t v2)950 int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
951   return vabdq_s16(v1, v2);
952 }
953 
954 // CHECK-LABEL: define <4 x i32> @test_vabdq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
955 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
956 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
957 // CHECK:   [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
958 // CHECK:   [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
959 // CHECK:   [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4
960 // CHECK:   ret <4 x i32> [[VABD2_I]]
test_vabdq_s32(int32x4_t v1,int32x4_t v2)961 int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
962   return vabdq_s32(v1, v2);
963 }
964 
965 // CHECK-LABEL: define <16 x i8> @test_vabdq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
966 // CHECK:   [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
967 // CHECK:   ret <16 x i8> [[VABD_I]]
test_vabdq_u8(uint8x16_t v1,uint8x16_t v2)968 uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
969   return vabdq_u8(v1, v2);
970 }
971 
972 // CHECK-LABEL: define <8 x i16> @test_vabdq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
973 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
974 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
975 // CHECK:   [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
976 // CHECK:   [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
977 // CHECK:   [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4
978 // CHECK:   ret <8 x i16> [[VABD2_I]]
test_vabdq_u16(uint16x8_t v1,uint16x8_t v2)979 uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
980   return vabdq_u16(v1, v2);
981 }
982 
983 // CHECK-LABEL: define <4 x i32> @test_vabdq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
984 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
985 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
986 // CHECK:   [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
987 // CHECK:   [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
988 // CHECK:   [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4
989 // CHECK:   ret <4 x i32> [[VABD2_I]]
test_vabdq_u32(uint32x4_t v1,uint32x4_t v2)990 uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
991   return vabdq_u32(v1, v2);
992 }
993 
994 // CHECK-LABEL: define <4 x float> @test_vabdq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
995 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
996 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
997 // CHECK:   [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
998 // CHECK:   [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
999 // CHECK:   [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> [[VABD_I]], <4 x float> [[VABD1_I]]) #4
1000 // CHECK:   ret <4 x float> [[VABD2_I]]
test_vabdq_f32(float32x4_t v1,float32x4_t v2)1001 float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
1002   return vabdq_f32(v1, v2);
1003 }
1004 
1005 // CHECK-LABEL: define <2 x double> @test_vabdq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1006 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1007 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1008 // CHECK:   [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1009 // CHECK:   [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1010 // CHECK:   [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> [[VABD_I]], <2 x double> [[VABD1_I]]) #4
1011 // CHECK:   ret <2 x double> [[VABD2_I]]
test_vabdq_f64(float64x2_t v1,float64x2_t v2)1012 float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
1013   return vabdq_f64(v1, v2);
1014 }
1015 
1016 
1017 // CHECK-LABEL: define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
1018 // CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1019 // CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1020 // CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1021 // CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1022 // CHECK:   ret <8 x i8> [[VBSL2_I]]
test_vbsl_s8(uint8x8_t v1,int8x8_t v2,int8x8_t v3)1023 int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
1024   return vbsl_s8(v1, v2, v3);
1025 }
1026 
1027 // CHECK-LABEL: define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
1028 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1029 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1030 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1031 // CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1032 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1033 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1034 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
1035 // CHECK:   [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
1036 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
1037 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1038 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
1039 // CHECK:   ret <8 x i8> [[TMP4]]
test_vbsl_s16(uint16x4_t v1,int16x4_t v2,int16x4_t v3)1040 int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
1041   return vbsl_s16(v1, v2, v3);
1042 }
1043 
1044 // CHECK-LABEL: define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
1045 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1046 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1047 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
1048 // CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1049 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1050 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1051 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
1052 // CHECK:   [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1>
1053 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]]
1054 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1055 // CHECK:   ret <2 x i32> [[VBSL5_I]]
test_vbsl_s32(uint32x2_t v1,int32x2_t v2,int32x2_t v3)1056 int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) {
1057   return vbsl_s32(v1, v2, v3);
1058 }
1059 
1060 // CHECK-LABEL: define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) #0 {
1061 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1062 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1063 // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1064 // CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1065 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1066 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1067 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
1068 // CHECK:   [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
1069 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1070 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1071 // CHECK:   ret <1 x i64> [[VBSL5_I]]
test_vbsl_s64(uint64x1_t v1,uint64x1_t v2,uint64x1_t v3)1072 uint64x1_t test_vbsl_s64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
1073   return vbsl_s64(v1, v2, v3);
1074 }
1075 
1076 // CHECK-LABEL: define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
1077 // CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1078 // CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1079 // CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1080 // CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1081 // CHECK:   ret <8 x i8> [[VBSL2_I]]
test_vbsl_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)1082 uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
1083   return vbsl_u8(v1, v2, v3);
1084 }
1085 
1086 // CHECK-LABEL: define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
1087 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1088 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1089 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1090 // CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1091 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1092 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1093 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
1094 // CHECK:   [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
1095 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
1096 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1097 // CHECK:   ret <4 x i16> [[VBSL5_I]]
test_vbsl_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)1098 uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
1099   return vbsl_u16(v1, v2, v3);
1100 }
1101 
1102 // CHECK-LABEL: define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
1103 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1104 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1105 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
1106 // CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1107 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1108 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1109 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
1110 // CHECK:   [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1>
1111 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]]
1112 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1113 // CHECK:   ret <2 x i32> [[VBSL5_I]]
test_vbsl_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)1114 uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
1115   return vbsl_u32(v1, v2, v3);
1116 }
1117 
1118 // CHECK-LABEL: define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) #0 {
1119 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1120 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1121 // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1122 // CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1123 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1124 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1125 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
1126 // CHECK:   [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
1127 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1128 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1129 // CHECK:   ret <1 x i64> [[VBSL5_I]]
test_vbsl_u64(uint64x1_t v1,uint64x1_t v2,uint64x1_t v3)1130 uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
1131   return vbsl_u64(v1, v2, v3);
1132 }
1133 
1134 // CHECK-LABEL: define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
1135 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <2 x i32>
1136 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
1137 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1138 // CHECK:   [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
1139 // CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1140 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1141 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
1142 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
1143 // CHECK:   [[TMP4:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1>
1144 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]]
1145 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1146 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
1147 // CHECK:   ret <2 x float> [[TMP5]]
test_vbsl_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)1148 float32x2_t test_vbsl_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
1149   return vbsl_f32(v1, v2, v3);
1150 }
1151 
1152 // CHECK-LABEL: define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) #0 {
1153 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1154 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8>
1155 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8>
1156 // CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1157 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1158 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1159 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
1160 // CHECK:   [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
1161 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1162 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1163 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double>
1164 // CHECK:   ret <1 x double> [[TMP4]]
test_vbsl_f64(uint64x1_t v1,float64x1_t v2,float64x1_t v3)1165 float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) {
1166   return vbsl_f64(v1, v2, v3);
1167 }
1168 
1169 // CHECK-LABEL: define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
1170 // CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1171 // CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1172 // CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1173 // CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1174 // CHECK:   ret <8 x i8> [[VBSL2_I]]
test_vbsl_p8(uint8x8_t v1,poly8x8_t v2,poly8x8_t v3)1175 poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) {
1176   return vbsl_p8(v1, v2, v3);
1177 }
1178 
1179 // CHECK-LABEL: define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
1180 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1181 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1182 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1183 // CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1184 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1185 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1186 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
1187 // CHECK:   [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
1188 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
1189 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1190 // CHECK:   ret <4 x i16> [[VBSL5_I]]
test_vbsl_p16(uint16x4_t v1,poly16x4_t v2,poly16x4_t v3)1191 poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
1192   return vbsl_p16(v1, v2, v3);
1193 }
1194 
1195 // CHECK-LABEL: define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
1196 // CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1197 // CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1198 // CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1199 // CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1200 // CHECK:   ret <16 x i8> [[VBSL2_I]]
test_vbslq_s8(uint8x16_t v1,int8x16_t v2,int8x16_t v3)1201 int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
1202   return vbslq_s8(v1, v2, v3);
1203 }
1204 
1205 // CHECK-LABEL: define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
1206 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1207 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1208 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1209 // CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1210 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1211 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1212 // CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
1213 // CHECK:   [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1214 // CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
1215 // CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1216 // CHECK:   ret <8 x i16> [[VBSL5_I]]
test_vbslq_s16(uint16x8_t v1,int16x8_t v2,int16x8_t v3)1217 int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
1218   return vbslq_s16(v1, v2, v3);
1219 }
1220 
1221 // CHECK-LABEL: define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
1222 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1223 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1224 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1225 // CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1226 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1227 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1228 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
1229 // CHECK:   [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1>
1230 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1231 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1232 // CHECK:   ret <4 x i32> [[VBSL5_I]]
test_vbslq_s32(uint32x4_t v1,int32x4_t v2,int32x4_t v3)1233 int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1234   return vbslq_s32(v1, v2, v3);
1235 }
1236 
1237 // CHECK-LABEL: define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) #0 {
1238 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1239 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1240 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1241 // CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1242 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1243 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1244 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
1245 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
1246 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1247 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1248 // CHECK:   ret <2 x i64> [[VBSL5_I]]
test_vbslq_s64(uint64x2_t v1,int64x2_t v2,int64x2_t v3)1249 int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) {
1250   return vbslq_s64(v1, v2, v3);
1251 }
1252 
1253 // CHECK-LABEL: define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
1254 // CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1255 // CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1256 // CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1257 // CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1258 // CHECK:   ret <16 x i8> [[VBSL2_I]]
test_vbslq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)1259 uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
1260   return vbslq_u8(v1, v2, v3);
1261 }
1262 
1263 // CHECK-LABEL: define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
1264 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1265 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1266 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1267 // CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1268 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1269 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1270 // CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
1271 // CHECK:   [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1272 // CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
1273 // CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1274 // CHECK:   ret <8 x i16> [[VBSL5_I]]
test_vbslq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)1275 uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
1276   return vbslq_u16(v1, v2, v3);
1277 }
1278 
1279 // CHECK-LABEL: define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
1280 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1281 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1282 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1283 // CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1284 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1285 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1286 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
1287 // CHECK:   [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1>
1288 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1289 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1290 // CHECK:   ret <4 x i32> [[VBSL5_I]]
test_vbslq_u32(uint32x4_t v1,int32x4_t v2,int32x4_t v3)1291 int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1292   return vbslq_s32(v1, v2, v3);
1293 }
1294 
1295 // CHECK-LABEL: define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) #0 {
1296 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1297 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1298 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1299 // CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1300 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1301 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1302 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
1303 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
1304 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1305 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1306 // CHECK:   ret <2 x i64> [[VBSL5_I]]
test_vbslq_u64(uint64x2_t v1,uint64x2_t v2,uint64x2_t v3)1307 uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
1308   return vbslq_u64(v1, v2, v3);
1309 }
1310 
1311 // CHECK-LABEL: define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
1312 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1313 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1314 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
1315 // CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1316 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1317 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1318 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
1319 // CHECK:   [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1>
1320 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1321 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1322 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
1323 // CHECK:   ret <4 x float> [[TMP4]]
test_vbslq_f32(uint32x4_t v1,float32x4_t v2,float32x4_t v3)1324 float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
1325   return vbslq_f32(v1, v2, v3);
1326 }
1327 
1328 // CHECK-LABEL: define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
1329 // CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1330 // CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1331 // CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1332 // CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1333 // CHECK:   ret <16 x i8> [[VBSL2_I]]
test_vbslq_p8(uint8x16_t v1,poly8x16_t v2,poly8x16_t v3)1334 poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) {
1335   return vbslq_p8(v1, v2, v3);
1336 }
1337 
1338 // CHECK-LABEL: define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
1339 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1340 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1341 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1342 // CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1343 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1344 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1345 // CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
1346 // CHECK:   [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1347 // CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
1348 // CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1349 // CHECK:   ret <8 x i16> [[VBSL5_I]]
test_vbslq_p16(uint16x8_t v1,poly16x8_t v2,poly16x8_t v3)1350 poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) {
1351   return vbslq_p16(v1, v2, v3);
1352 }
1353 
1354 // CHECK-LABEL: define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
1355 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1356 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1357 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
1358 // CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1359 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1360 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1361 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
1362 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
1363 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1364 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1365 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double>
1366 // CHECK:   ret <2 x double> [[TMP4]]
test_vbslq_f64(uint64x2_t v1,float64x2_t v2,float64x2_t v3)1367 float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) {
1368   return vbslq_f64(v1, v2, v3);
1369 }
1370 
1371 // CHECK-LABEL: define <2 x float> @test_vrecps_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1372 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1373 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1374 // CHECK:   [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1375 // CHECK:   [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1376 // CHECK:   [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> [[VRECPS_V_I]], <2 x float> [[VRECPS_V1_I]]) #4
1377 // CHECK:   [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8>
1378 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <2 x float>
1379 // CHECK:   ret <2 x float> [[TMP2]]
test_vrecps_f32(float32x2_t v1,float32x2_t v2)1380 float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) {
1381    return vrecps_f32(v1, v2);
1382 }
1383 
1384 // CHECK-LABEL: define <4 x float> @test_vrecpsq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1385 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1386 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1387 // CHECK:   [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1388 // CHECK:   [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1389 // CHECK:   [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> [[VRECPSQ_V_I]], <4 x float> [[VRECPSQ_V1_I]]) #4
1390 // CHECK:   [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
1391 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <4 x float>
1392 // CHECK:   ret <4 x float> [[TMP2]]
test_vrecpsq_f32(float32x4_t v1,float32x4_t v2)1393 float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) {
1394    return vrecpsq_f32(v1, v2);
1395 }
1396 
1397 // CHECK-LABEL: define <2 x double> @test_vrecpsq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1398 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1399 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1400 // CHECK:   [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1401 // CHECK:   [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1402 // CHECK:   [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> [[VRECPSQ_V_I]], <2 x double> [[VRECPSQ_V1_I]]) #4
1403 // CHECK:   [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8>
1404 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <2 x double>
1405 // CHECK:   ret <2 x double> [[TMP2]]
test_vrecpsq_f64(float64x2_t v1,float64x2_t v2)1406 float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) {
1407   return vrecpsq_f64(v1, v2);
1408 }
1409 
1410 // CHECK-LABEL: define <2 x float> @test_vrsqrts_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1411 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1412 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1413 // CHECK:   [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1414 // CHECK:   [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1415 // CHECK:   [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> [[VRSQRTS_V_I]], <2 x float> [[VRSQRTS_V1_I]]) #4
1416 // CHECK:   [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
1417 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <2 x float>
1418 // CHECK:   ret <2 x float> [[TMP2]]
test_vrsqrts_f32(float32x2_t v1,float32x2_t v2)1419 float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) {
1420   return vrsqrts_f32(v1, v2);
1421 }
1422 
1423 // CHECK-LABEL: define <4 x float> @test_vrsqrtsq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1424 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1425 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1426 // CHECK:   [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1427 // CHECK:   [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1428 // CHECK:   [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> [[VRSQRTSQ_V_I]], <4 x float> [[VRSQRTSQ_V1_I]]) #4
1429 // CHECK:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
1430 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <4 x float>
1431 // CHECK:   ret <4 x float> [[TMP2]]
test_vrsqrtsq_f32(float32x4_t v1,float32x4_t v2)1432 float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) {
1433   return vrsqrtsq_f32(v1, v2);
1434 }
1435 
1436 // CHECK-LABEL: define <2 x double> @test_vrsqrtsq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1437 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1438 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1439 // CHECK:   [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1440 // CHECK:   [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1441 // CHECK:   [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> [[VRSQRTSQ_V_I]], <2 x double> [[VRSQRTSQ_V1_I]]) #4
1442 // CHECK:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8>
1443 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <2 x double>
1444 // CHECK:   ret <2 x double> [[TMP2]]
test_vrsqrtsq_f64(float64x2_t v1,float64x2_t v2)1445 float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) {
1446   return vrsqrtsq_f64(v1, v2);
1447 }
1448 
1449 // CHECK-LABEL: define <2 x i32> @test_vcage_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1450 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1451 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1452 // CHECK:   [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1453 // CHECK:   [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1454 // CHECK:   [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCAGE_V_I]], <2 x float> [[VCAGE_V1_I]]) #4
1455 // CHECK:   ret <2 x i32> [[VCAGE_V2_I]]
test_vcage_f32(float32x2_t v1,float32x2_t v2)1456 uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) {
1457   return vcage_f32(v1, v2);
1458 }
1459 
1460 // CHECK-LABEL: define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 {
1461 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1462 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1463 // CHECK:   [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
1464 // CHECK:   [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
1465 // CHECK:   [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCAGE_V_I]], <1 x double> [[VCAGE_V1_I]]) #4
1466 // CHECK:   ret <1 x i64> [[VCAGE_V2_I]]
test_vcage_f64(float64x1_t a,float64x1_t b)1467 uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) {
1468   return vcage_f64(a, b);
1469 }
1470 
1471 // CHECK-LABEL: define <4 x i32> @test_vcageq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1472 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1473 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1474 // CHECK:   [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1475 // CHECK:   [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1476 // CHECK:   [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCAGEQ_V_I]], <4 x float> [[VCAGEQ_V1_I]]) #4
1477 // CHECK:   ret <4 x i32> [[VCAGEQ_V2_I]]
test_vcageq_f32(float32x4_t v1,float32x4_t v2)1478 uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) {
1479   return vcageq_f32(v1, v2);
1480 }
1481 
1482 // CHECK-LABEL: define <2 x i64> @test_vcageq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1483 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1484 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1485 // CHECK:   [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1486 // CHECK:   [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1487 // CHECK:   [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCAGEQ_V_I]], <2 x double> [[VCAGEQ_V1_I]]) #4
1488 // CHECK:   ret <2 x i64> [[VCAGEQ_V2_I]]
test_vcageq_f64(float64x2_t v1,float64x2_t v2)1489 uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) {
1490   return vcageq_f64(v1, v2);
1491 }
1492 
1493 // CHECK-LABEL: define <2 x i32> @test_vcagt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1494 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1495 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1496 // CHECK:   [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1497 // CHECK:   [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1498 // CHECK:   [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCAGT_V_I]], <2 x float> [[VCAGT_V1_I]]) #4
1499 // CHECK:   ret <2 x i32> [[VCAGT_V2_I]]
test_vcagt_f32(float32x2_t v1,float32x2_t v2)1500 uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) {
1501   return vcagt_f32(v1, v2);
1502 }
1503 
1504 // CHECK-LABEL: define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 {
1505 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1506 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1507 // CHECK:   [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
1508 // CHECK:   [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
1509 // CHECK:   [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCAGT_V_I]], <1 x double> [[VCAGT_V1_I]]) #4
1510 // CHECK:   ret <1 x i64> [[VCAGT_V2_I]]
test_vcagt_f64(float64x1_t a,float64x1_t b)1511 uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) {
1512   return vcagt_f64(a, b);
1513 }
1514 
1515 // CHECK-LABEL: define <4 x i32> @test_vcagtq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1516 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1517 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1518 // CHECK:   [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1519 // CHECK:   [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1520 // CHECK:   [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCAGTQ_V_I]], <4 x float> [[VCAGTQ_V1_I]]) #4
1521 // CHECK:   ret <4 x i32> [[VCAGTQ_V2_I]]
test_vcagtq_f32(float32x4_t v1,float32x4_t v2)1522 uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) {
1523   return vcagtq_f32(v1, v2);
1524 }
1525 
1526 // CHECK-LABEL: define <2 x i64> @test_vcagtq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1527 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1528 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1529 // CHECK:   [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1530 // CHECK:   [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1531 // CHECK:   [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCAGTQ_V_I]], <2 x double> [[VCAGTQ_V1_I]]) #4
1532 // CHECK:   ret <2 x i64> [[VCAGTQ_V2_I]]
test_vcagtq_f64(float64x2_t v1,float64x2_t v2)1533 uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) {
1534   return vcagtq_f64(v1, v2);
1535 }
1536 
1537 // CHECK-LABEL: define <2 x i32> @test_vcale_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1538 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1539 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1540 // CHECK:   [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1541 // CHECK:   [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1542 // CHECK:   [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCALE_V_I]], <2 x float> [[VCALE_V1_I]]) #4
1543 // CHECK:   ret <2 x i32> [[VCALE_V2_I]]
test_vcale_f32(float32x2_t v1,float32x2_t v2)1544 uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) {
1545   return vcale_f32(v1, v2);
1546   // Using registers other than v0, v1 are possible, but would be odd.
1547 }
1548 
1549 // CHECK-LABEL: define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 {
1550 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1551 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1552 // CHECK:   [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
1553 // CHECK:   [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
1554 // CHECK:   [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCALE_V_I]], <1 x double> [[VCALE_V1_I]]) #4
1555 // CHECK:   ret <1 x i64> [[VCALE_V2_I]]
test_vcale_f64(float64x1_t a,float64x1_t b)1556 uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) {
1557   return vcale_f64(a, b);
1558 }
1559 
1560 // CHECK-LABEL: define <4 x i32> @test_vcaleq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1561 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1562 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1563 // CHECK:   [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1564 // CHECK:   [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1565 // CHECK:   [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCALEQ_V_I]], <4 x float> [[VCALEQ_V1_I]]) #4
1566 // CHECK:   ret <4 x i32> [[VCALEQ_V2_I]]
test_vcaleq_f32(float32x4_t v1,float32x4_t v2)1567 uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) {
1568   return vcaleq_f32(v1, v2);
1569   // Using registers other than v0, v1 are possible, but would be odd.
1570 }
1571 
1572 // CHECK-LABEL: define <2 x i64> @test_vcaleq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1573 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1574 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1575 // CHECK:   [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1576 // CHECK:   [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1577 // CHECK:   [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCALEQ_V_I]], <2 x double> [[VCALEQ_V1_I]]) #4
1578 // CHECK:   ret <2 x i64> [[VCALEQ_V2_I]]
test_vcaleq_f64(float64x2_t v1,float64x2_t v2)1579 uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) {
1580   return vcaleq_f64(v1, v2);
1581   // Using registers other than v0, v1 are possible, but would be odd.
1582 }
1583 
1584 // CHECK-LABEL: define <2 x i32> @test_vcalt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1585 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1586 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1587 // CHECK:   [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1588 // CHECK:   [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1589 // CHECK:   [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCALT_V_I]], <2 x float> [[VCALT_V1_I]]) #4
1590 // CHECK:   ret <2 x i32> [[VCALT_V2_I]]
test_vcalt_f32(float32x2_t v1,float32x2_t v2)1591 uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) {
1592   return vcalt_f32(v1, v2);
1593   // Using registers other than v0, v1 are possible, but would be odd.
1594 }
1595 
1596 // CHECK-LABEL: define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 {
1597 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1598 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1599 // CHECK:   [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
1600 // CHECK:   [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
1601 // CHECK:   [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCALT_V_I]], <1 x double> [[VCALT_V1_I]]) #4
1602 // CHECK:   ret <1 x i64> [[VCALT_V2_I]]
test_vcalt_f64(float64x1_t a,float64x1_t b)1603 uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) {
1604   return vcalt_f64(a, b);
1605 }
1606 
1607 // CHECK-LABEL: define <4 x i32> @test_vcaltq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1608 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1609 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1610 // CHECK:   [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1611 // CHECK:   [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1612 // CHECK:   [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCALTQ_V_I]], <4 x float> [[VCALTQ_V1_I]]) #4
1613 // CHECK:   ret <4 x i32> [[VCALTQ_V2_I]]
test_vcaltq_f32(float32x4_t v1,float32x4_t v2)1614 uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) {
1615   return vcaltq_f32(v1, v2);
1616   // Using registers other than v0, v1 are possible, but would be odd.
1617 }
1618 
1619 // CHECK-LABEL: define <2 x i64> @test_vcaltq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1620 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1621 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1622 // CHECK:   [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1623 // CHECK:   [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1624 // CHECK:   [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCALTQ_V_I]], <2 x double> [[VCALTQ_V1_I]]) #4
1625 // CHECK:   ret <2 x i64> [[VCALTQ_V2_I]]
test_vcaltq_f64(float64x2_t v1,float64x2_t v2)1626 uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) {
1627   return vcaltq_f64(v1, v2);
1628   // Using registers other than v0, v1 are possible, but would be odd.
1629 }
1630 
1631 // CHECK-LABEL: define <8 x i8> @test_vtst_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1632 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1633 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1634 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1635 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_s8(int8x8_t v1,int8x8_t v2)1636 uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
1637   return vtst_s8(v1, v2);
1638 }
1639 
1640 // CHECK-LABEL: define <4 x i16> @test_vtst_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
1641 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1642 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1643 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1644 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1645 // CHECK:   [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
1646 // CHECK:   [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
1647 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
1648 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_s16(int16x4_t v1,int16x4_t v2)1649 uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
1650   return vtst_s16(v1, v2);
1651 }
1652 
1653 // CHECK-LABEL: define <2 x i32> @test_vtst_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
1654 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1655 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1656 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1657 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1658 // CHECK:   [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]]
1659 // CHECK:   [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer
1660 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32>
1661 // CHECK:   ret <2 x i32> [[VTST_I]]
test_vtst_s32(int32x2_t v1,int32x2_t v2)1662 uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
1663   return vtst_s32(v1, v2);
1664 }
1665 
1666 // CHECK-LABEL: define <8 x i8> @test_vtst_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1667 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1668 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1669 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1670 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_u8(uint8x8_t v1,uint8x8_t v2)1671 uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
1672   return vtst_u8(v1, v2);
1673 }
1674 
1675 // CHECK-LABEL: define <4 x i16> @test_vtst_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
1676 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1677 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1678 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1679 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1680 // CHECK:   [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
1681 // CHECK:   [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
1682 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
1683 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_u16(uint16x4_t v1,uint16x4_t v2)1684 uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
1685   return vtst_u16(v1, v2);
1686 }
1687 
1688 // CHECK-LABEL: define <2 x i32> @test_vtst_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
1689 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1690 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1691 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1692 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1693 // CHECK:   [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]]
1694 // CHECK:   [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer
1695 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32>
1696 // CHECK:   ret <2 x i32> [[VTST_I]]
test_vtst_u32(uint32x2_t v1,uint32x2_t v2)1697 uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
1698   return vtst_u32(v1, v2);
1699 }
1700 
1701 // CHECK-LABEL: define <16 x i8> @test_vtstq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
1702 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1703 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1704 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1705 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_s8(int8x16_t v1,int8x16_t v2)1706 uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
1707   return vtstq_s8(v1, v2);
1708 }
1709 
1710 // CHECK-LABEL: define <8 x i16> @test_vtstq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
1711 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1712 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1713 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1714 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1715 // CHECK:   [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
1716 // CHECK:   [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
1717 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
1718 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_s16(int16x8_t v1,int16x8_t v2)1719 uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
1720   return vtstq_s16(v1, v2);
1721 }
1722 
1723 // CHECK-LABEL: define <4 x i32> @test_vtstq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
1724 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1725 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1726 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1727 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1728 // CHECK:   [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]]
1729 // CHECK:   [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
1730 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
1731 // CHECK:   ret <4 x i32> [[VTST_I]]
test_vtstq_s32(int32x4_t v1,int32x4_t v2)1732 uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
1733   return vtstq_s32(v1, v2);
1734 }
1735 
1736 // CHECK-LABEL: define <16 x i8> @test_vtstq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
1737 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1738 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1739 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1740 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_u8(uint8x16_t v1,uint8x16_t v2)1741 uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
1742   return vtstq_u8(v1, v2);
1743 }
1744 
1745 // CHECK-LABEL: define <8 x i16> @test_vtstq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
1746 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1747 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1748 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1749 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1750 // CHECK:   [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
1751 // CHECK:   [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
1752 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
1753 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_u16(uint16x8_t v1,uint16x8_t v2)1754 uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
1755   return vtstq_u16(v1, v2);
1756 }
1757 
1758 // CHECK-LABEL: define <4 x i32> @test_vtstq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
1759 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1760 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1761 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1762 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1763 // CHECK:   [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]]
1764 // CHECK:   [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
1765 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
1766 // CHECK:   ret <4 x i32> [[VTST_I]]
test_vtstq_u32(uint32x4_t v1,uint32x4_t v2)1767 uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
1768   return vtstq_u32(v1, v2);
1769 }
1770 
1771 // CHECK-LABEL: define <2 x i64> @test_vtstq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
1772 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1773 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1774 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1775 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1776 // CHECK:   [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]]
1777 // CHECK:   [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
1778 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
1779 // CHECK:   ret <2 x i64> [[VTST_I]]
test_vtstq_s64(int64x2_t v1,int64x2_t v2)1780 uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
1781   return vtstq_s64(v1, v2);
1782 }
1783 
1784 // CHECK-LABEL: define <2 x i64> @test_vtstq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
1785 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1786 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1787 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1788 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1789 // CHECK:   [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]]
1790 // CHECK:   [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
1791 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
1792 // CHECK:   ret <2 x i64> [[VTST_I]]
test_vtstq_u64(uint64x2_t v1,uint64x2_t v2)1793 uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
1794   return vtstq_u64(v1, v2);
1795 }
1796 
1797 // CHECK-LABEL: define <8 x i8> @test_vtst_p8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1798 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1799 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1800 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1801 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_p8(poly8x8_t v1,poly8x8_t v2)1802 uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) {
1803   return vtst_p8(v1, v2);
1804 }
1805 
1806 // CHECK-LABEL: define <4 x i16> @test_vtst_p16(<4 x i16> %v1, <4 x i16> %v2) #0 {
1807 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1808 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1809 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1810 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1811 // CHECK:   [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
1812 // CHECK:   [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
1813 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
1814 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_p16(poly16x4_t v1,poly16x4_t v2)1815 uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) {
1816   return vtst_p16(v1, v2);
1817 }
1818 
1819 // CHECK-LABEL: define <16 x i8> @test_vtstq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 {
1820 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1821 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1822 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1823 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_p8(poly8x16_t v1,poly8x16_t v2)1824 uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) {
1825   return vtstq_p8(v1, v2);
1826 }
1827 
1828 // CHECK-LABEL: define <8 x i16> @test_vtstq_p16(<8 x i16> %v1, <8 x i16> %v2) #0 {
1829 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1830 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1831 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1832 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1833 // CHECK:   [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
1834 // CHECK:   [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
1835 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
1836 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_p16(poly16x8_t v1,poly16x8_t v2)1837 uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) {
1838   return vtstq_p16(v1, v2);
1839 }
1840 
1841 // CHECK-LABEL: define <1 x i64> @test_vtst_s64(<1 x i64> %a, <1 x i64> %b) #0 {
1842 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1843 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1844 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1845 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1846 // CHECK:   [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]]
1847 // CHECK:   [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
1848 // CHECK:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
1849 // CHECK:   ret <1 x i64> [[VTST_I]]
test_vtst_s64(int64x1_t a,int64x1_t b)1850 uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) {
1851   return vtst_s64(a, b);
1852 }
1853 
1854 // CHECK-LABEL: define <1 x i64> @test_vtst_u64(<1 x i64> %a, <1 x i64> %b) #0 {
1855 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1856 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1857 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1858 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1859 // CHECK:   [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]]
1860 // CHECK:   [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
1861 // CHECK:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
1862 // CHECK:   ret <1 x i64> [[VTST_I]]
test_vtst_u64(uint64x1_t a,uint64x1_t b)1863 uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) {
1864   return vtst_u64(a, b);
1865 }
1866 
1867 // CHECK-LABEL: define <8 x i8> @test_vceq_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1868 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1869 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1870 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_s8(int8x8_t v1,int8x8_t v2)1871 uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) {
1872   return vceq_s8(v1, v2);
1873 }
1874 
1875 // CHECK-LABEL: define <4 x i16> @test_vceq_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
1876 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1877 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1878 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vceq_s16(int16x4_t v1,int16x4_t v2)1879 uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) {
1880   return vceq_s16(v1, v2);
1881 }
1882 
1883 // CHECK-LABEL: define <2 x i32> @test_vceq_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
1884 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1885 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1886 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_s32(int32x2_t v1,int32x2_t v2)1887 uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) {
1888   return vceq_s32(v1, v2);
1889 }
1890 
1891 // CHECK-LABEL: define <1 x i64> @test_vceq_s64(<1 x i64> %a, <1 x i64> %b) #0 {
1892 // CHECK:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1893 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1894 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vceq_s64(int64x1_t a,int64x1_t b)1895 uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) {
1896   return vceq_s64(a, b);
1897 }
1898 
1899 // CHECK-LABEL: define <1 x i64> @test_vceq_u64(<1 x i64> %a, <1 x i64> %b) #0 {
1900 // CHECK:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1901 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1902 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vceq_u64(uint64x1_t a,uint64x1_t b)1903 uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) {
1904   return vceq_u64(a, b);
1905 }
1906 
1907 // CHECK-LABEL: define <2 x i32> @test_vceq_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1908 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2
1909 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1910 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_f32(float32x2_t v1,float32x2_t v2)1911 uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
1912   return vceq_f32(v1, v2);
1913 }
1914 
1915 // CHECK-LABEL: define <1 x i64> @test_vceq_f64(<1 x double> %a, <1 x double> %b) #0 {
1916 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b
1917 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1918 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vceq_f64(float64x1_t a,float64x1_t b)1919 uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) {
1920   return vceq_f64(a, b);
1921 }
1922 
1923 // CHECK-LABEL: define <8 x i8> @test_vceq_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1924 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1925 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1926 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_u8(uint8x8_t v1,uint8x8_t v2)1927 uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) {
1928   return vceq_u8(v1, v2);
1929 }
1930 
1931 // CHECK-LABEL: define <4 x i16> @test_vceq_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
1932 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1933 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1934 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vceq_u16(uint16x4_t v1,uint16x4_t v2)1935 uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) {
1936   return vceq_u16(v1, v2);
1937 }
1938 
1939 // CHECK-LABEL: define <2 x i32> @test_vceq_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
1940 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1941 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1942 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_u32(uint32x2_t v1,uint32x2_t v2)1943 uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) {
1944   return vceq_u32(v1, v2);
1945 }
1946 
1947 // CHECK-LABEL: define <8 x i8> @test_vceq_p8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1948 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1949 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1950 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_p8(poly8x8_t v1,poly8x8_t v2)1951 uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) {
1952   return vceq_p8(v1, v2);
1953 }
1954 
1955 // CHECK-LABEL: define <16 x i8> @test_vceqq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
1956 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1957 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1958 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_s8(int8x16_t v1,int8x16_t v2)1959 uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) {
1960   return vceqq_s8(v1, v2);
1961 }
1962 
1963 // CHECK-LABEL: define <8 x i16> @test_vceqq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
1964 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1965 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1966 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vceqq_s16(int16x8_t v1,int16x8_t v2)1967 uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) {
1968   return vceqq_s16(v1, v2);
1969 }
1970 
1971 // CHECK-LABEL: define <4 x i32> @test_vceqq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
1972 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1973 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1974 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_s32(int32x4_t v1,int32x4_t v2)1975 uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) {
1976   return vceqq_s32(v1, v2);
1977 }
1978 
1979 // CHECK-LABEL: define <4 x i32> @test_vceqq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1980 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2
1981 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1982 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_f32(float32x4_t v1,float32x4_t v2)1983 uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
1984   return vceqq_f32(v1, v2);
1985 }
1986 
1987 // CHECK-LABEL: define <16 x i8> @test_vceqq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
1988 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1989 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1990 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_u8(uint8x16_t v1,uint8x16_t v2)1991 uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) {
1992   return vceqq_u8(v1, v2);
1993 }
1994 
1995 // CHECK-LABEL: define <8 x i16> @test_vceqq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
1996 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1997 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1998 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vceqq_u16(uint16x8_t v1,uint16x8_t v2)1999 uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) {
2000   return vceqq_u16(v1, v2);
2001 }
2002 
2003 // CHECK-LABEL: define <4 x i32> @test_vceqq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2004 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
2005 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2006 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_u32(uint32x4_t v1,uint32x4_t v2)2007 uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) {
2008   return vceqq_u32(v1, v2);
2009 }
2010 
2011 // CHECK-LABEL: define <16 x i8> @test_vceqq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2012 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
2013 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2014 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_p8(poly8x16_t v1,poly8x16_t v2)2015 uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) {
2016   return vceqq_p8(v1, v2);
2017 }
2018 
2019 
2020 // CHECK-LABEL: define <2 x i64> @test_vceqq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2021 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
2022 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2023 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vceqq_s64(int64x2_t v1,int64x2_t v2)2024 uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) {
2025   return vceqq_s64(v1, v2);
2026 }
2027 
2028 // CHECK-LABEL: define <2 x i64> @test_vceqq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2029 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
2030 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2031 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vceqq_u64(uint64x2_t v1,uint64x2_t v2)2032 uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) {
2033   return vceqq_u64(v1, v2);
2034 }
2035 
2036 // CHECK-LABEL: define <2 x i64> @test_vceqq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
2037 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2
2038 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2039 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vceqq_f64(float64x2_t v1,float64x2_t v2)2040 uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
2041   return vceqq_f64(v1, v2);
2042 }
2043 // CHECK-LABEL: define <8 x i8> @test_vcge_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2044 // CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2
2045 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2046 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcge_s8(int8x8_t v1,int8x8_t v2)2047 uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) {
2048   return vcge_s8(v1, v2);
2049 }
2050 
2051 // CHECK-LABEL: define <4 x i16> @test_vcge_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2052 // CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2
2053 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2054 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcge_s16(int16x4_t v1,int16x4_t v2)2055 uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) {
2056   return vcge_s16(v1, v2);
2057 }
2058 
2059 // CHECK-LABEL: define <2 x i32> @test_vcge_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2060 // CHECK:   [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2
2061 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2062 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_s32(int32x2_t v1,int32x2_t v2)2063 uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) {
2064   return vcge_s32(v1, v2);
2065 }
2066 
2067 // CHECK-LABEL: define <1 x i64> @test_vcge_s64(<1 x i64> %a, <1 x i64> %b) #0 {
2068 // CHECK:   [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b
2069 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2070 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcge_s64(int64x1_t a,int64x1_t b)2071 uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) {
2072   return vcge_s64(a, b);
2073 }
2074 
2075 // CHECK-LABEL: define <1 x i64> @test_vcge_u64(<1 x i64> %a, <1 x i64> %b) #0 {
2076 // CHECK:   [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b
2077 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2078 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcge_u64(uint64x1_t a,uint64x1_t b)2079 uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) {
2080   return vcge_u64(a, b);
2081 }
2082 
2083 // CHECK-LABEL: define <2 x i32> @test_vcge_f32(<2 x float> %v1, <2 x float> %v2) #0 {
2084 // CHECK:   [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2
2085 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2086 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_f32(float32x2_t v1,float32x2_t v2)2087 uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
2088   return vcge_f32(v1, v2);
2089 }
2090 
2091 // CHECK-LABEL: define <1 x i64> @test_vcge_f64(<1 x double> %a, <1 x double> %b) #0 {
2092 // CHECK:   [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b
2093 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2094 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcge_f64(float64x1_t a,float64x1_t b)2095 uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) {
2096   return vcge_f64(a, b);
2097 }
2098 
2099 // CHECK-LABEL: define <8 x i8> @test_vcge_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2100 // CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2
2101 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2102 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcge_u8(uint8x8_t v1,uint8x8_t v2)2103 uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) {
2104   return vcge_u8(v1, v2);
2105 }
2106 
2107 // CHECK-LABEL: define <4 x i16> @test_vcge_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2108 // CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2
2109 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2110 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcge_u16(uint16x4_t v1,uint16x4_t v2)2111 uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) {
2112   return vcge_u16(v1, v2);
2113 }
2114 
2115 // CHECK-LABEL: define <2 x i32> @test_vcge_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2116 // CHECK:   [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2
2117 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2118 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_u32(uint32x2_t v1,uint32x2_t v2)2119 uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) {
2120   return vcge_u32(v1, v2);
2121 }
2122 
2123 // CHECK-LABEL: define <16 x i8> @test_vcgeq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2124 // CHECK:   [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2
2125 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2126 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgeq_s8(int8x16_t v1,int8x16_t v2)2127 uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) {
2128   return vcgeq_s8(v1, v2);
2129 }
2130 
2131 // CHECK-LABEL: define <8 x i16> @test_vcgeq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2132 // CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2
2133 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2134 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgeq_s16(int16x8_t v1,int16x8_t v2)2135 uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) {
2136   return vcgeq_s16(v1, v2);
2137 }
2138 
2139 // CHECK-LABEL: define <4 x i32> @test_vcgeq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2140 // CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2
2141 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2142 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_s32(int32x4_t v1,int32x4_t v2)2143 uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) {
2144   return vcgeq_s32(v1, v2);
2145 }
2146 
2147 // CHECK-LABEL: define <4 x i32> @test_vcgeq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
2148 // CHECK:   [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2
2149 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2150 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_f32(float32x4_t v1,float32x4_t v2)2151 uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
2152   return vcgeq_f32(v1, v2);
2153 }
2154 
2155 // CHECK-LABEL: define <16 x i8> @test_vcgeq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2156 // CHECK:   [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2
2157 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2158 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgeq_u8(uint8x16_t v1,uint8x16_t v2)2159 uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) {
2160   return vcgeq_u8(v1, v2);
2161 }
2162 
2163 // CHECK-LABEL: define <8 x i16> @test_vcgeq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2164 // CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2
2165 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2166 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgeq_u16(uint16x8_t v1,uint16x8_t v2)2167 uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) {
2168   return vcgeq_u16(v1, v2);
2169 }
2170 
2171 // CHECK-LABEL: define <4 x i32> @test_vcgeq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2172 // CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2
2173 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2174 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_u32(uint32x4_t v1,uint32x4_t v2)2175 uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) {
2176   return vcgeq_u32(v1, v2);
2177 }
2178 
2179 // CHECK-LABEL: define <2 x i64> @test_vcgeq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2180 // CHECK:   [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2
2181 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2182 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgeq_s64(int64x2_t v1,int64x2_t v2)2183 uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) {
2184   return vcgeq_s64(v1, v2);
2185 }
2186 
2187 // CHECK-LABEL: define <2 x i64> @test_vcgeq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2188 // CHECK:   [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2
2189 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2190 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgeq_u64(uint64x2_t v1,uint64x2_t v2)2191 uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) {
2192   return vcgeq_u64(v1, v2);
2193 }
2194 
2195 // CHECK-LABEL: define <2 x i64> @test_vcgeq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
2196 // CHECK:   [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2
2197 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2198 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgeq_f64(float64x2_t v1,float64x2_t v2)2199 uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
2200   return vcgeq_f64(v1, v2);
2201 }
2202 
2203 // Notes about vcle:
2204 // LE condition predicate implemented as GE, so check reversed operands.
2205 // Using registers other than v0, v1 are possible, but would be odd.
2206 // CHECK-LABEL: define <8 x i8> @test_vcle_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2207 // CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2
2208 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2209 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcle_s8(int8x8_t v1,int8x8_t v2)2210 uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) {
2211   return vcle_s8(v1, v2);
2212 }
2213 
2214 // CHECK-LABEL: define <4 x i16> @test_vcle_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2215 // CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2
2216 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2217 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcle_s16(int16x4_t v1,int16x4_t v2)2218 uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) {
2219   return vcle_s16(v1, v2);
2220 }
2221 
2222 // CHECK-LABEL: define <2 x i32> @test_vcle_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2223 // CHECK:   [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2
2224 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2225 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_s32(int32x2_t v1,int32x2_t v2)2226 uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) {
2227   return vcle_s32(v1, v2);
2228 }
2229 
2230 // CHECK-LABEL: define <1 x i64> @test_vcle_s64(<1 x i64> %a, <1 x i64> %b) #0 {
2231 // CHECK:   [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b
2232 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2233 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcle_s64(int64x1_t a,int64x1_t b)2234 uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) {
2235   return vcle_s64(a, b);
2236 }
2237 
2238 // CHECK-LABEL: define <1 x i64> @test_vcle_u64(<1 x i64> %a, <1 x i64> %b) #0 {
2239 // CHECK:   [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b
2240 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2241 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcle_u64(uint64x1_t a,uint64x1_t b)2242 uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) {
2243   return vcle_u64(a, b);
2244 }
2245 
2246 // CHECK-LABEL: define <2 x i32> @test_vcle_f32(<2 x float> %v1, <2 x float> %v2) #0 {
2247 // CHECK:   [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2
2248 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2249 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_f32(float32x2_t v1,float32x2_t v2)2250 uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
2251   return vcle_f32(v1, v2);
2252 }
2253 
2254 // CHECK-LABEL: define <1 x i64> @test_vcle_f64(<1 x double> %a, <1 x double> %b) #0 {
2255 // CHECK:   [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b
2256 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2257 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcle_f64(float64x1_t a,float64x1_t b)2258 uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) {
2259   return vcle_f64(a, b);
2260 }
2261 
2262 // CHECK-LABEL: define <8 x i8> @test_vcle_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2263 // CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2
2264 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2265 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcle_u8(uint8x8_t v1,uint8x8_t v2)2266 uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) {
2267   return vcle_u8(v1, v2);
2268 }
2269 
2270 // CHECK-LABEL: define <4 x i16> @test_vcle_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2271 // CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2
2272 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2273 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcle_u16(uint16x4_t v1,uint16x4_t v2)2274 uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) {
2275   return vcle_u16(v1, v2);
2276 }
2277 
2278 // CHECK-LABEL: define <2 x i32> @test_vcle_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2279 // CHECK:   [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2
2280 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2281 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_u32(uint32x2_t v1,uint32x2_t v2)2282 uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) {
2283   return vcle_u32(v1, v2);
2284 }
2285 
2286 // CHECK-LABEL: define <16 x i8> @test_vcleq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2287 // CHECK:   [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2
2288 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2289 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcleq_s8(int8x16_t v1,int8x16_t v2)2290 uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) {
2291   return vcleq_s8(v1, v2);
2292 }
2293 
2294 // CHECK-LABEL: define <8 x i16> @test_vcleq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2295 // CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2
2296 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2297 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcleq_s16(int16x8_t v1,int16x8_t v2)2298 uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) {
2299   return vcleq_s16(v1, v2);
2300 }
2301 
2302 // CHECK-LABEL: define <4 x i32> @test_vcleq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2303 // CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2
2304 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2305 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_s32(int32x4_t v1,int32x4_t v2)2306 uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) {
2307   return vcleq_s32(v1, v2);
2308 }
2309 
2310 // CHECK-LABEL: define <4 x i32> @test_vcleq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
2311 // CHECK:   [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2
2312 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2313 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_f32(float32x4_t v1,float32x4_t v2)2314 uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
2315   return vcleq_f32(v1, v2);
2316 }
2317 
2318 // CHECK-LABEL: define <16 x i8> @test_vcleq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2319 // CHECK:   [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2
2320 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2321 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcleq_u8(uint8x16_t v1,uint8x16_t v2)2322 uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) {
2323   return vcleq_u8(v1, v2);
2324 }
2325 
2326 // CHECK-LABEL: define <8 x i16> @test_vcleq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2327 // CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2
2328 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2329 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcleq_u16(uint16x8_t v1,uint16x8_t v2)2330 uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) {
2331   return vcleq_u16(v1, v2);
2332 }
2333 
2334 // CHECK-LABEL: define <4 x i32> @test_vcleq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2335 // CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2
2336 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2337 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_u32(uint32x4_t v1,uint32x4_t v2)2338 uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) {
2339   return vcleq_u32(v1, v2);
2340 }
2341 
2342 // CHECK-LABEL: define <2 x i64> @test_vcleq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2343 // CHECK:   [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2
2344 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2345 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcleq_s64(int64x2_t v1,int64x2_t v2)2346 uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) {
2347   return vcleq_s64(v1, v2);
2348 }
2349 
2350 // CHECK-LABEL: define <2 x i64> @test_vcleq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2351 // CHECK:   [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2
2352 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2353 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcleq_u64(uint64x2_t v1,uint64x2_t v2)2354 uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) {
2355   return vcleq_u64(v1, v2);
2356 }
2357 
2358 // CHECK-LABEL: define <2 x i64> @test_vcleq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
2359 // CHECK:   [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2
2360 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2361 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcleq_f64(float64x2_t v1,float64x2_t v2)2362 uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
2363   return vcleq_f64(v1, v2);
2364 }
2365 
2366 
2367 // CHECK-LABEL: define <8 x i8> @test_vcgt_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2368 // CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2
2369 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2370 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcgt_s8(int8x8_t v1,int8x8_t v2)2371 uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) {
2372   return vcgt_s8(v1, v2);
2373 }
2374 
2375 // CHECK-LABEL: define <4 x i16> @test_vcgt_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2376 // CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2
2377 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2378 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcgt_s16(int16x4_t v1,int16x4_t v2)2379 uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) {
2380   return vcgt_s16(v1, v2);
2381 }
2382 
2383 // CHECK-LABEL: define <2 x i32> @test_vcgt_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2384 // CHECK:   [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2
2385 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2386 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_s32(int32x2_t v1,int32x2_t v2)2387 uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) {
2388   return vcgt_s32(v1, v2);
2389 }
2390 
2391 // CHECK-LABEL: define <1 x i64> @test_vcgt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
2392 // CHECK:   [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b
2393 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2394 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcgt_s64(int64x1_t a,int64x1_t b)2395 uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) {
2396   return vcgt_s64(a, b);
2397 }
2398 
2399 // CHECK-LABEL: define <1 x i64> @test_vcgt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
2400 // CHECK:   [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b
2401 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2402 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcgt_u64(uint64x1_t a,uint64x1_t b)2403 uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) {
2404   return vcgt_u64(a, b);
2405 }
2406 
2407 // CHECK-LABEL: define <2 x i32> @test_vcgt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
2408 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2
2409 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2410 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_f32(float32x2_t v1,float32x2_t v2)2411 uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
2412   return vcgt_f32(v1, v2);
2413 }
2414 
2415 // CHECK-LABEL: define <1 x i64> @test_vcgt_f64(<1 x double> %a, <1 x double> %b) #0 {
2416 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b
2417 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2418 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcgt_f64(float64x1_t a,float64x1_t b)2419 uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) {
2420   return vcgt_f64(a, b);
2421 }
2422 
2423 // CHECK-LABEL: define <8 x i8> @test_vcgt_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2424 // CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2
2425 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2426 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcgt_u8(uint8x8_t v1,uint8x8_t v2)2427 uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) {
2428   return vcgt_u8(v1, v2);
2429 }
2430 
2431 // CHECK-LABEL: define <4 x i16> @test_vcgt_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2432 // CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2
2433 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2434 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcgt_u16(uint16x4_t v1,uint16x4_t v2)2435 uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) {
2436   return vcgt_u16(v1, v2);
2437 }
2438 
2439 // CHECK-LABEL: define <2 x i32> @test_vcgt_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2440 // CHECK:   [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2
2441 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2442 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_u32(uint32x2_t v1,uint32x2_t v2)2443 uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) {
2444   return vcgt_u32(v1, v2);
2445 }
2446 
2447 // CHECK-LABEL: define <16 x i8> @test_vcgtq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2448 // CHECK:   [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2
2449 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2450 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgtq_s8(int8x16_t v1,int8x16_t v2)2451 uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) {
2452   return vcgtq_s8(v1, v2);
2453 }
2454 
2455 // CHECK-LABEL: define <8 x i16> @test_vcgtq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2456 // CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2
2457 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2458 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgtq_s16(int16x8_t v1,int16x8_t v2)2459 uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) {
2460   return vcgtq_s16(v1, v2);
2461 }
2462 
2463 // CHECK-LABEL: define <4 x i32> @test_vcgtq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2464 // CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2
2465 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2466 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_s32(int32x4_t v1,int32x4_t v2)2467 uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) {
2468   return vcgtq_s32(v1, v2);
2469 }
2470 
2471 // CHECK-LABEL: define <4 x i32> @test_vcgtq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
2472 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2
2473 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2474 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_f32(float32x4_t v1,float32x4_t v2)2475 uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
2476   return vcgtq_f32(v1, v2);
2477 }
2478 
2479 // CHECK-LABEL: define <16 x i8> @test_vcgtq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2480 // CHECK:   [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2
2481 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2482 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgtq_u8(uint8x16_t v1,uint8x16_t v2)2483 uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) {
2484   return vcgtq_u8(v1, v2);
2485 }
2486 
2487 // CHECK-LABEL: define <8 x i16> @test_vcgtq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2488 // CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2
2489 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2490 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgtq_u16(uint16x8_t v1,uint16x8_t v2)2491 uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) {
2492   return vcgtq_u16(v1, v2);
2493 }
2494 
2495 // CHECK-LABEL: define <4 x i32> @test_vcgtq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2496 // CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2
2497 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2498 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_u32(uint32x4_t v1,uint32x4_t v2)2499 uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) {
2500   return vcgtq_u32(v1, v2);
2501 }
2502 
2503 // CHECK-LABEL: define <2 x i64> @test_vcgtq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2504 // CHECK:   [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2
2505 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2506 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgtq_s64(int64x2_t v1,int64x2_t v2)2507 uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) {
2508   return vcgtq_s64(v1, v2);
2509 }
2510 
2511 // CHECK-LABEL: define <2 x i64> @test_vcgtq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2512 // CHECK:   [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2
2513 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2514 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgtq_u64(uint64x2_t v1,uint64x2_t v2)2515 uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) {
2516   return vcgtq_u64(v1, v2);
2517 }
2518 
2519 // CHECK-LABEL: define <2 x i64> @test_vcgtq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
2520 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2
2521 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2522 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgtq_f64(float64x2_t v1,float64x2_t v2)2523 uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
2524   return vcgtq_f64(v1, v2);
2525 }
2526 
2527 
2528 // Notes about vclt:
2529 // LT condition predicate implemented as GT, so check reversed operands.
2530 // Using registers other than v0, v1 are possible, but would be odd.
2531 
2532 // CHECK-LABEL: define <8 x i8> @test_vclt_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2533 // CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2
2534 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2535 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vclt_s8(int8x8_t v1,int8x8_t v2)2536 uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) {
2537   return vclt_s8(v1, v2);
2538 }
2539 
2540 // CHECK-LABEL: define <4 x i16> @test_vclt_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2541 // CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2
2542 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2543 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vclt_s16(int16x4_t v1,int16x4_t v2)2544 uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) {
2545   return vclt_s16(v1, v2);
2546 }
2547 
2548 // CHECK-LABEL: define <2 x i32> @test_vclt_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2549 // CHECK:   [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2
2550 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2551 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_s32(int32x2_t v1,int32x2_t v2)2552 uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) {
2553   return vclt_s32(v1, v2);
2554 }
2555 
2556 // CHECK-LABEL: define <1 x i64> @test_vclt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
2557 // CHECK:   [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b
2558 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2559 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vclt_s64(int64x1_t a,int64x1_t b)2560 uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) {
2561   return vclt_s64(a, b);
2562 }
2563 
2564 // CHECK-LABEL: define <1 x i64> @test_vclt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
2565 // CHECK:   [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b
2566 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2567 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vclt_u64(uint64x1_t a,uint64x1_t b)2568 uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) {
2569   return vclt_u64(a, b);
2570 }
2571 
2572 // CHECK-LABEL: define <2 x i32> @test_vclt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
2573 // CHECK:   [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2
2574 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2575 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_f32(float32x2_t v1,float32x2_t v2)2576 uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
2577   return vclt_f32(v1, v2);
2578 }
2579 
2580 // CHECK-LABEL: define <1 x i64> @test_vclt_f64(<1 x double> %a, <1 x double> %b) #0 {
2581 // CHECK:   [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b
2582 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2583 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vclt_f64(float64x1_t a,float64x1_t b)2584 uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) {
2585   return vclt_f64(a, b);
2586 }
2587 
2588 // CHECK-LABEL: define <8 x i8> @test_vclt_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2589 // CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2
2590 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2591 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vclt_u8(uint8x8_t v1,uint8x8_t v2)2592 uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) {
2593   return vclt_u8(v1, v2);
2594 }
2595 
2596 // CHECK-LABEL: define <4 x i16> @test_vclt_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2597 // CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2
2598 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2599 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vclt_u16(uint16x4_t v1,uint16x4_t v2)2600 uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) {
2601   return vclt_u16(v1, v2);
2602 }
2603 
2604 // CHECK-LABEL: define <2 x i32> @test_vclt_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2605 // CHECK:   [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2
2606 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2607 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_u32(uint32x2_t v1,uint32x2_t v2)2608 uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) {
2609   return vclt_u32(v1, v2);
2610 }
2611 
2612 // CHECK-LABEL: define <16 x i8> @test_vcltq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2613 // CHECK:   [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2
2614 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2615 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcltq_s8(int8x16_t v1,int8x16_t v2)2616 uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) {
2617   return vcltq_s8(v1, v2);
2618 }
2619 
2620 // CHECK-LABEL: define <8 x i16> @test_vcltq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2621 // CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2
2622 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2623 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcltq_s16(int16x8_t v1,int16x8_t v2)2624 uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) {
2625   return vcltq_s16(v1, v2);
2626 }
2627 
2628 // CHECK-LABEL: define <4 x i32> @test_vcltq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2629 // CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2
2630 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2631 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_s32(int32x4_t v1,int32x4_t v2)2632 uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) {
2633   return vcltq_s32(v1, v2);
2634 }
2635 
2636 // CHECK-LABEL: define <4 x i32> @test_vcltq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
2637 // CHECK:   [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2
2638 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2639 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_f32(float32x4_t v1,float32x4_t v2)2640 uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
2641   return vcltq_f32(v1, v2);
2642 }
2643 
2644 // CHECK-LABEL: define <16 x i8> @test_vcltq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2645 // CHECK:   [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2
2646 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2647 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcltq_u8(uint8x16_t v1,uint8x16_t v2)2648 uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) {
2649   return vcltq_u8(v1, v2);
2650 }
2651 
2652 // CHECK-LABEL: define <8 x i16> @test_vcltq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2653 // CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2
2654 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2655 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcltq_u16(uint16x8_t v1,uint16x8_t v2)2656 uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) {
2657   return vcltq_u16(v1, v2);
2658 }
2659 
2660 // CHECK-LABEL: define <4 x i32> @test_vcltq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2661 // CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2
2662 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2663 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_u32(uint32x4_t v1,uint32x4_t v2)2664 uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) {
2665   return vcltq_u32(v1, v2);
2666 }
2667 
2668 // CHECK-LABEL: define <2 x i64> @test_vcltq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2669 // CHECK:   [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2
2670 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2671 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcltq_s64(int64x2_t v1,int64x2_t v2)2672 uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) {
2673   return vcltq_s64(v1, v2);
2674 }
2675 
2676 // CHECK-LABEL: define <2 x i64> @test_vcltq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2677 // CHECK:   [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2
2678 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2679 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcltq_u64(uint64x2_t v1,uint64x2_t v2)2680 uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) {
2681   return vcltq_u64(v1, v2);
2682 }
2683 
2684 // CHECK-LABEL: define <2 x i64> @test_vcltq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
2685 // CHECK:   [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2
2686 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2687 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcltq_f64(float64x2_t v1,float64x2_t v2)2688 uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
2689   return vcltq_f64(v1, v2);
2690 }
2691 
2692 
2693 // CHECK-LABEL: define <8 x i8> @test_vhadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2694 // CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2695 // CHECK:   ret <8 x i8> [[VHADD_V_I]]
test_vhadd_s8(int8x8_t v1,int8x8_t v2)2696 int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
2697   return vhadd_s8(v1, v2);
2698 }
2699 
2700 // CHECK-LABEL: define <4 x i16> @test_vhadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2701 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2702 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2703 // CHECK:   [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2704 // CHECK:   [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2705 // CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4
2706 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2707 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
2708 // CHECK:   ret <4 x i16> [[TMP2]]
test_vhadd_s16(int16x4_t v1,int16x4_t v2)2709 int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
2710   return vhadd_s16(v1, v2);
2711 }
2712 
2713 // CHECK-LABEL: define <2 x i32> @test_vhadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2714 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2715 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2716 // CHECK:   [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2717 // CHECK:   [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2718 // CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4
2719 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2720 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
2721 // CHECK:   ret <2 x i32> [[TMP2]]
test_vhadd_s32(int32x2_t v1,int32x2_t v2)2722 int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
2723   return vhadd_s32(v1, v2);
2724 }
2725 
2726 // CHECK-LABEL: define <8 x i8> @test_vhadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2727 // CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2728 // CHECK:   ret <8 x i8> [[VHADD_V_I]]
test_vhadd_u8(uint8x8_t v1,uint8x8_t v2)2729 uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2730   return vhadd_u8(v1, v2);
2731 }
2732 
2733 // CHECK-LABEL: define <4 x i16> @test_vhadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2734 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2735 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2736 // CHECK:   [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2737 // CHECK:   [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2738 // CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4
2739 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2740 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
2741 // CHECK:   ret <4 x i16> [[TMP2]]
test_vhadd_u16(uint16x4_t v1,uint16x4_t v2)2742 uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
2743   return vhadd_u16(v1, v2);
2744 }
2745 
2746 // CHECK-LABEL: define <2 x i32> @test_vhadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2747 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2748 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2749 // CHECK:   [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2750 // CHECK:   [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2751 // CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4
2752 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2753 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
2754 // CHECK:   ret <2 x i32> [[TMP2]]
test_vhadd_u32(uint32x2_t v1,uint32x2_t v2)2755 uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
2756   return vhadd_u32(v1, v2);
2757 }
2758 
2759 // CHECK-LABEL: define <16 x i8> @test_vhaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2760 // CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
2761 // CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_s8(int8x16_t v1,int8x16_t v2)2762 int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
2763   return vhaddq_s8(v1, v2);
2764 }
2765 
2766 // CHECK-LABEL: define <8 x i16> @test_vhaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2767 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2768 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2769 // CHECK:   [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2770 // CHECK:   [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2771 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4
2772 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2773 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
2774 // CHECK:   ret <8 x i16> [[TMP2]]
test_vhaddq_s16(int16x8_t v1,int16x8_t v2)2775 int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
2776   return vhaddq_s16(v1, v2);
2777 }
2778 
2779 // CHECK-LABEL: define <4 x i32> @test_vhaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2780 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2781 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2782 // CHECK:   [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2783 // CHECK:   [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2784 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4
2785 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2786 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
2787 // CHECK:   ret <4 x i32> [[TMP2]]
test_vhaddq_s32(int32x4_t v1,int32x4_t v2)2788 int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
2789   return vhaddq_s32(v1, v2);
2790 }
2791 
2792 // CHECK-LABEL: define <16 x i8> @test_vhaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2793 // CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
2794 // CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_u8(uint8x16_t v1,uint8x16_t v2)2795 uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
2796   return vhaddq_u8(v1, v2);
2797 }
2798 
2799 // CHECK-LABEL: define <8 x i16> @test_vhaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2800 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2801 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2802 // CHECK:   [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2803 // CHECK:   [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2804 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4
2805 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2806 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
2807 // CHECK:   ret <8 x i16> [[TMP2]]
test_vhaddq_u16(uint16x8_t v1,uint16x8_t v2)2808 uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
2809   return vhaddq_u16(v1, v2);
2810 }
2811 
2812 // CHECK-LABEL: define <4 x i32> @test_vhaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2813 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2814 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2815 // CHECK:   [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2816 // CHECK:   [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2817 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4
2818 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2819 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
2820 // CHECK:   ret <4 x i32> [[TMP2]]
test_vhaddq_u32(uint32x4_t v1,uint32x4_t v2)2821 uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
2822   return vhaddq_u32(v1, v2);
2823 }
2824 
2825 
2826 // CHECK-LABEL: define <8 x i8> @test_vhsub_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2827 // CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2828 // CHECK:   ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_s8(int8x8_t v1,int8x8_t v2)2829 int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) {
2830   return vhsub_s8(v1, v2);
2831 }
2832 
2833 // CHECK-LABEL: define <4 x i16> @test_vhsub_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2834 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2835 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2836 // CHECK:   [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2837 // CHECK:   [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2838 // CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4
2839 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2840 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
2841 // CHECK:   ret <4 x i16> [[TMP2]]
test_vhsub_s16(int16x4_t v1,int16x4_t v2)2842 int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) {
2843   return vhsub_s16(v1, v2);
2844 }
2845 
2846 // CHECK-LABEL: define <2 x i32> @test_vhsub_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2847 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2848 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2849 // CHECK:   [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2850 // CHECK:   [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2851 // CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4
2852 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2853 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
2854 // CHECK:   ret <2 x i32> [[TMP2]]
test_vhsub_s32(int32x2_t v1,int32x2_t v2)2855 int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) {
2856   return vhsub_s32(v1, v2);
2857 }
2858 
2859 // CHECK-LABEL: define <8 x i8> @test_vhsub_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2860 // CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2861 // CHECK:   ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_u8(uint8x8_t v1,uint8x8_t v2)2862 uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) {
2863   return vhsub_u8(v1, v2);
2864 }
2865 
2866 // CHECK-LABEL: define <4 x i16> @test_vhsub_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2867 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2868 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2869 // CHECK:   [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2870 // CHECK:   [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2871 // CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4
2872 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2873 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
2874 // CHECK:   ret <4 x i16> [[TMP2]]
test_vhsub_u16(uint16x4_t v1,uint16x4_t v2)2875 uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) {
2876   return vhsub_u16(v1, v2);
2877 }
2878 
2879 // CHECK-LABEL: define <2 x i32> @test_vhsub_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2880 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2881 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2882 // CHECK:   [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2883 // CHECK:   [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2884 // CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4
2885 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2886 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
2887 // CHECK:   ret <2 x i32> [[TMP2]]
test_vhsub_u32(uint32x2_t v1,uint32x2_t v2)2888 uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) {
2889   return vhsub_u32(v1, v2);
2890 }
2891 
2892 // CHECK-LABEL: define <16 x i8> @test_vhsubq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2893 // CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
2894 // CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_s8(int8x16_t v1,int8x16_t v2)2895 int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) {
2896   return vhsubq_s8(v1, v2);
2897 }
2898 
2899 // CHECK-LABEL: define <8 x i16> @test_vhsubq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2900 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2901 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2902 // CHECK:   [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2903 // CHECK:   [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2904 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4
2905 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2906 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
2907 // CHECK:   ret <8 x i16> [[TMP2]]
test_vhsubq_s16(int16x8_t v1,int16x8_t v2)2908 int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) {
2909   return vhsubq_s16(v1, v2);
2910 }
2911 
2912 // CHECK-LABEL: define <4 x i32> @test_vhsubq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2913 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2914 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2915 // CHECK:   [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2916 // CHECK:   [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2917 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4
2918 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2919 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
2920 // CHECK:   ret <4 x i32> [[TMP2]]
test_vhsubq_s32(int32x4_t v1,int32x4_t v2)2921 int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) {
2922   return vhsubq_s32(v1, v2);
2923 }
2924 
2925 // CHECK-LABEL: define <16 x i8> @test_vhsubq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2926 // CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
2927 // CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_u8(uint8x16_t v1,uint8x16_t v2)2928 uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) {
2929   return vhsubq_u8(v1, v2);
2930 }
2931 
2932 // CHECK-LABEL: define <8 x i16> @test_vhsubq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2933 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2934 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2935 // CHECK:   [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2936 // CHECK:   [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2937 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4
2938 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2939 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
2940 // CHECK:   ret <8 x i16> [[TMP2]]
test_vhsubq_u16(uint16x8_t v1,uint16x8_t v2)2941 uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) {
2942   return vhsubq_u16(v1, v2);
2943 }
2944 
2945 // CHECK-LABEL: define <4 x i32> @test_vhsubq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2946 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2947 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2948 // CHECK:   [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2949 // CHECK:   [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2950 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4
2951 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2952 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
2953 // CHECK:   ret <4 x i32> [[TMP2]]
test_vhsubq_u32(uint32x4_t v1,uint32x4_t v2)2954 uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) {
2955   return vhsubq_u32(v1, v2);
2956 }
2957 
2958 
2959 // CHECK-LABEL: define <8 x i8> @test_vrhadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2960 // CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2961 // CHECK:   ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_s8(int8x8_t v1,int8x8_t v2)2962 int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) {
2963   return vrhadd_s8(v1, v2);
2964 }
2965 
2966 // CHECK-LABEL: define <4 x i16> @test_vrhadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2967 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2968 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2969 // CHECK:   [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2970 // CHECK:   [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2971 // CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4
2972 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2973 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
2974 // CHECK:   ret <4 x i16> [[TMP2]]
test_vrhadd_s16(int16x4_t v1,int16x4_t v2)2975 int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) {
2976   return vrhadd_s16(v1, v2);
2977 }
2978 
2979 // CHECK-LABEL: define <2 x i32> @test_vrhadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2980 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2981 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2982 // CHECK:   [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2983 // CHECK:   [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2984 // CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4
2985 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2986 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
2987 // CHECK:   ret <2 x i32> [[TMP2]]
test_vrhadd_s32(int32x2_t v1,int32x2_t v2)2988 int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) {
2989   return vrhadd_s32(v1, v2);
2990 }
2991 
2992 // CHECK-LABEL: define <8 x i8> @test_vrhadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2993 // CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2994 // CHECK:   ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_u8(uint8x8_t v1,uint8x8_t v2)2995 uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2996   return vrhadd_u8(v1, v2);
2997 }
2998 
2999 // CHECK-LABEL: define <4 x i16> @test_vrhadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
3000 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
3001 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
3002 // CHECK:   [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3003 // CHECK:   [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3004 // CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4
3005 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
3006 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
3007 // CHECK:   ret <4 x i16> [[TMP2]]
test_vrhadd_u16(uint16x4_t v1,uint16x4_t v2)3008 uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) {
3009   return vrhadd_u16(v1, v2);
3010 }
3011 
3012 // CHECK-LABEL: define <2 x i32> @test_vrhadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
3013 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
3014 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
3015 // CHECK:   [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3016 // CHECK:   [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3017 // CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4
3018 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
3019 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
3020 // CHECK:   ret <2 x i32> [[TMP2]]
test_vrhadd_u32(uint32x2_t v1,uint32x2_t v2)3021 uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) {
3022   return vrhadd_u32(v1, v2);
3023 }
3024 
3025 // CHECK-LABEL: define <16 x i8> @test_vrhaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
3026 // CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
3027 // CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_s8(int8x16_t v1,int8x16_t v2)3028 int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) {
3029   return vrhaddq_s8(v1, v2);
3030 }
3031 
3032 // CHECK-LABEL: define <8 x i16> @test_vrhaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
3033 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
3034 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
3035 // CHECK:   [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3036 // CHECK:   [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3037 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4
3038 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
3039 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
3040 // CHECK:   ret <8 x i16> [[TMP2]]
test_vrhaddq_s16(int16x8_t v1,int16x8_t v2)3041 int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) {
3042   return vrhaddq_s16(v1, v2);
3043 }
3044 
3045 // CHECK-LABEL: define <4 x i32> @test_vrhaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
3046 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
3047 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
3048 // CHECK:   [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3049 // CHECK:   [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3050 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4
3051 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
3052 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
3053 // CHECK:   ret <4 x i32> [[TMP2]]
test_vrhaddq_s32(int32x4_t v1,int32x4_t v2)3054 int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) {
3055   return vrhaddq_s32(v1, v2);
3056 }
3057 
3058 // CHECK-LABEL: define <16 x i8> @test_vrhaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
3059 // CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
3060 // CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_u8(uint8x16_t v1,uint8x16_t v2)3061 uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
3062   return vrhaddq_u8(v1, v2);
3063 }
3064 
3065 // CHECK-LABEL: define <8 x i16> @test_vrhaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
3066 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
3067 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
3068 // CHECK:   [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3069 // CHECK:   [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3070 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4
3071 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
3072 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
3073 // CHECK:   ret <8 x i16> [[TMP2]]
test_vrhaddq_u16(uint16x8_t v1,uint16x8_t v2)3074 uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
3075   return vrhaddq_u16(v1, v2);
3076 }
3077 
3078 // CHECK-LABEL: define <4 x i32> @test_vrhaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
3079 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
3080 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
3081 // CHECK:   [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3082 // CHECK:   [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3083 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4
3084 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
3085 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
3086 // CHECK:   ret <4 x i32> [[TMP2]]
test_vrhaddq_u32(uint32x4_t v1,uint32x4_t v2)3087 uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
3088   return vrhaddq_u32(v1, v2);
3089 }
3090 // CHECK-LABEL: define <8 x i8> @test_vqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3091 // CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3092 // CHECK:   ret <8 x i8> [[VQADD_V_I]]
test_vqadd_s8(int8x8_t a,int8x8_t b)3093 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
3094   return vqadd_s8(a, b);
3095 }
3096 
3097 // CHECK-LABEL: define <4 x i16> @test_vqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3098 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3099 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3100 // CHECK:   [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3101 // CHECK:   [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3102 // CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4
3103 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
3104 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
3105 // CHECK:   ret <4 x i16> [[TMP2]]
test_vqadd_s16(int16x4_t a,int16x4_t b)3106 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
3107   return vqadd_s16(a, b);
3108 }
3109 
3110 // CHECK-LABEL: define <2 x i32> @test_vqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3111 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3112 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3113 // CHECK:   [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3114 // CHECK:   [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3115 // CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4
3116 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
3117 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
3118 // CHECK:   ret <2 x i32> [[TMP2]]
test_vqadd_s32(int32x2_t a,int32x2_t b)3119 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
3120   return vqadd_s32(a, b);
3121 }
3122 
3123 // CHECK-LABEL: define <1 x i64> @test_vqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3124 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3125 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3126 // CHECK:   [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3127 // CHECK:   [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3128 // CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4
3129 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
3130 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64>
3131 // CHECK:   ret <1 x i64> [[TMP2]]
test_vqadd_s64(int64x1_t a,int64x1_t b)3132 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
3133   return vqadd_s64(a, b);
3134 }
3135 
3136 // CHECK-LABEL: define <8 x i8> @test_vqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3137 // CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3138 // CHECK:   ret <8 x i8> [[VQADD_V_I]]
test_vqadd_u8(uint8x8_t a,uint8x8_t b)3139 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
3140   return vqadd_u8(a, b);
3141 }
3142 
3143 // CHECK-LABEL: define <4 x i16> @test_vqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3144 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3145 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3146 // CHECK:   [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3147 // CHECK:   [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3148 // CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4
3149 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
3150 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
3151 // CHECK:   ret <4 x i16> [[TMP2]]
test_vqadd_u16(uint16x4_t a,uint16x4_t b)3152 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
3153   return vqadd_u16(a, b);
3154 }
3155 
3156 // CHECK-LABEL: define <2 x i32> @test_vqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3157 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3158 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3159 // CHECK:   [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3160 // CHECK:   [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3161 // CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4
3162 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
3163 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
3164 // CHECK:   ret <2 x i32> [[TMP2]]
test_vqadd_u32(uint32x2_t a,uint32x2_t b)3165 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
3166   return vqadd_u32(a, b);
3167 }
3168 
3169 // CHECK-LABEL: define <1 x i64> @test_vqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3170 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3171 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3172 // CHECK:   [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3173 // CHECK:   [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3174 // CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4
3175 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
3176 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64>
3177 // CHECK:   ret <1 x i64> [[TMP2]]
test_vqadd_u64(uint64x1_t a,uint64x1_t b)3178 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
3179   return vqadd_u64(a, b);
3180 }
3181 
3182 // CHECK-LABEL: define <16 x i8> @test_vqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3183 // CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3184 // CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_s8(int8x16_t a,int8x16_t b)3185 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
3186   return vqaddq_s8(a, b);
3187 }
3188 
3189 // CHECK-LABEL: define <8 x i16> @test_vqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3190 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3191 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3192 // CHECK:   [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3193 // CHECK:   [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3194 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4
3195 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
3196 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
3197 // CHECK:   ret <8 x i16> [[TMP2]]
test_vqaddq_s16(int16x8_t a,int16x8_t b)3198 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
3199   return vqaddq_s16(a, b);
3200 }
3201 
3202 // CHECK-LABEL: define <4 x i32> @test_vqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3203 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3204 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3205 // CHECK:   [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3206 // CHECK:   [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3207 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4
3208 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
3209 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
3210 // CHECK:   ret <4 x i32> [[TMP2]]
test_vqaddq_s32(int32x4_t a,int32x4_t b)3211 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
3212   return vqaddq_s32(a, b);
3213 }
3214 
3215 // CHECK-LABEL: define <2 x i64> @test_vqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3216 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3217 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3218 // CHECK:   [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3219 // CHECK:   [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3220 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4
3221 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
3222 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
3223 // CHECK:   ret <2 x i64> [[TMP2]]
test_vqaddq_s64(int64x2_t a,int64x2_t b)3224 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
3225   return vqaddq_s64(a, b);
3226 }
3227 
3228 // CHECK-LABEL: define <16 x i8> @test_vqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3229 // CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3230 // CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_u8(uint8x16_t a,uint8x16_t b)3231 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
3232   return vqaddq_u8(a, b);
3233 }
3234 
3235 // CHECK-LABEL: define <8 x i16> @test_vqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3236 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3237 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3238 // CHECK:   [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3239 // CHECK:   [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3240 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4
3241 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
3242 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
3243 // CHECK:   ret <8 x i16> [[TMP2]]
test_vqaddq_u16(uint16x8_t a,uint16x8_t b)3244 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
3245   return vqaddq_u16(a, b);
3246 }
3247 
3248 // CHECK-LABEL: define <4 x i32> @test_vqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3249 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3250 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3251 // CHECK:   [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3252 // CHECK:   [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3253 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4
3254 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
3255 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
3256 // CHECK:   ret <4 x i32> [[TMP2]]
test_vqaddq_u32(uint32x4_t a,uint32x4_t b)3257 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
3258   return vqaddq_u32(a, b);
3259 }
3260 
3261 // CHECK-LABEL: define <2 x i64> @test_vqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
3262 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3263 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3264 // CHECK:   [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3265 // CHECK:   [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3266 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4
3267 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
3268 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
3269 // CHECK:   ret <2 x i64> [[TMP2]]
test_vqaddq_u64(uint64x2_t a,uint64x2_t b)3270 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
3271   return vqaddq_u64(a, b);
3272 }
3273 
3274 
3275 // CHECK-LABEL: define <8 x i8> @test_vqsub_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3276 // CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3277 // CHECK:   ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_s8(int8x8_t a,int8x8_t b)3278 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
3279   return vqsub_s8(a, b);
3280 }
3281 
3282 // CHECK-LABEL: define <4 x i16> @test_vqsub_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3283 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3284 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3285 // CHECK:   [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3286 // CHECK:   [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3287 // CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4
3288 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
3289 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
3290 // CHECK:   ret <4 x i16> [[TMP2]]
test_vqsub_s16(int16x4_t a,int16x4_t b)3291 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
3292   return vqsub_s16(a, b);
3293 }
3294 
3295 // CHECK-LABEL: define <2 x i32> @test_vqsub_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3296 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3297 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3298 // CHECK:   [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3299 // CHECK:   [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3300 // CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4
3301 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3302 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
3303 // CHECK:   ret <2 x i32> [[TMP2]]
test_vqsub_s32(int32x2_t a,int32x2_t b)3304 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
3305   return vqsub_s32(a, b);
3306 }
3307 
3308 // CHECK-LABEL: define <1 x i64> @test_vqsub_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3309 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3310 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3311 // CHECK:   [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3312 // CHECK:   [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3313 // CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4
3314 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3315 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64>
3316 // CHECK:   ret <1 x i64> [[TMP2]]
test_vqsub_s64(int64x1_t a,int64x1_t b)3317 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
3318   return vqsub_s64(a, b);
3319 }
3320 
3321 // CHECK-LABEL: define <8 x i8> @test_vqsub_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3322 // CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3323 // CHECK:   ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_u8(uint8x8_t a,uint8x8_t b)3324 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
3325   return vqsub_u8(a, b);
3326 }
3327 
3328 // CHECK-LABEL: define <4 x i16> @test_vqsub_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3329 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3330 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3331 // CHECK:   [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3332 // CHECK:   [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3333 // CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4
3334 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
3335 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
3336 // CHECK:   ret <4 x i16> [[TMP2]]
test_vqsub_u16(uint16x4_t a,uint16x4_t b)3337 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
3338   return vqsub_u16(a, b);
3339 }
3340 
3341 // CHECK-LABEL: define <2 x i32> @test_vqsub_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3342 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3343 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3344 // CHECK:   [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3345 // CHECK:   [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3346 // CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4
3347 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3348 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
3349 // CHECK:   ret <2 x i32> [[TMP2]]
test_vqsub_u32(uint32x2_t a,uint32x2_t b)3350 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
3351   return vqsub_u32(a, b);
3352 }
3353 
3354 // CHECK-LABEL: define <1 x i64> @test_vqsub_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3355 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3356 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3357 // CHECK:   [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3358 // CHECK:   [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3359 // CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4
3360 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3361 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64>
3362 // CHECK:   ret <1 x i64> [[TMP2]]
test_vqsub_u64(uint64x1_t a,uint64x1_t b)3363 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
3364   return vqsub_u64(a, b);
3365 }
3366 
3367 // CHECK-LABEL: define <16 x i8> @test_vqsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3368 // CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3369 // CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_s8(int8x16_t a,int8x16_t b)3370 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
3371   return vqsubq_s8(a, b);
3372 }
3373 
3374 // CHECK-LABEL: define <8 x i16> @test_vqsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3375 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3376 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3377 // CHECK:   [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3378 // CHECK:   [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3379 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4
3380 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3381 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
3382 // CHECK:   ret <8 x i16> [[TMP2]]
test_vqsubq_s16(int16x8_t a,int16x8_t b)3383 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
3384   return vqsubq_s16(a, b);
3385 }
3386 
3387 // CHECK-LABEL: define <4 x i32> @test_vqsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3388 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3389 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3390 // CHECK:   [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3391 // CHECK:   [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3392 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4
3393 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3394 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
3395 // CHECK:   ret <4 x i32> [[TMP2]]
test_vqsubq_s32(int32x4_t a,int32x4_t b)3396 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
3397   return vqsubq_s32(a, b);
3398 }
3399 
3400 // CHECK-LABEL: define <2 x i64> @test_vqsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3401 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3402 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3403 // CHECK:   [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3404 // CHECK:   [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3405 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4
3406 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3407 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
3408 // CHECK:   ret <2 x i64> [[TMP2]]
test_vqsubq_s64(int64x2_t a,int64x2_t b)3409 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
3410   return vqsubq_s64(a, b);
3411 }
3412 
3413 // CHECK-LABEL: define <16 x i8> @test_vqsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3414 // CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3415 // CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_u8(uint8x16_t a,uint8x16_t b)3416 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
3417   return vqsubq_u8(a, b);
3418 }
3419 
3420 // CHECK-LABEL: define <8 x i16> @test_vqsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3421 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3422 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3423 // CHECK:   [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3424 // CHECK:   [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3425 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4
3426 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3427 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
3428 // CHECK:   ret <8 x i16> [[TMP2]]
test_vqsubq_u16(uint16x8_t a,uint16x8_t b)3429 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
3430   return vqsubq_u16(a, b);
3431 }
3432 
3433 // CHECK-LABEL: define <4 x i32> @test_vqsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3434 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3435 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3436 // CHECK:   [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3437 // CHECK:   [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3438 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4
3439 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3440 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
3441 // CHECK:   ret <4 x i32> [[TMP2]]
test_vqsubq_u32(uint32x4_t a,uint32x4_t b)3442 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
3443   return vqsubq_u32(a, b);
3444 }
3445 
3446 // CHECK-LABEL: define <2 x i64> @test_vqsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
3447 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3448 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3449 // CHECK:   [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3450 // CHECK:   [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3451 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4
3452 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3453 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
3454 // CHECK:   ret <2 x i64> [[TMP2]]
test_vqsubq_u64(uint64x2_t a,uint64x2_t b)3455 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
3456   return vqsubq_u64(a, b);
3457 }
3458 
3459 
3460 // CHECK-LABEL: define <8 x i8> @test_vshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3461 // CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3462 // CHECK:   ret <8 x i8> [[VSHL_V_I]]
test_vshl_s8(int8x8_t a,int8x8_t b)3463 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
3464   return vshl_s8(a, b);
3465 }
3466 
3467 // CHECK-LABEL: define <4 x i16> @test_vshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3468 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3469 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3470 // CHECK:   [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3471 // CHECK:   [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3472 // CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4
3473 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3474 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16>
3475 // CHECK:   ret <4 x i16> [[TMP2]]
test_vshl_s16(int16x4_t a,int16x4_t b)3476 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
3477   return vshl_s16(a, b);
3478 }
3479 
3480 // CHECK-LABEL: define <2 x i32> @test_vshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3481 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3482 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3483 // CHECK:   [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3484 // CHECK:   [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3485 // CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4
3486 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3487 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32>
3488 // CHECK:   ret <2 x i32> [[TMP2]]
test_vshl_s32(int32x2_t a,int32x2_t b)3489 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
3490   return vshl_s32(a, b);
3491 }
3492 
3493 // CHECK-LABEL: define <1 x i64> @test_vshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3494 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3495 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3496 // CHECK:   [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3497 // CHECK:   [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3498 // CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4
3499 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3500 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64>
3501 // CHECK:   ret <1 x i64> [[TMP2]]
test_vshl_s64(int64x1_t a,int64x1_t b)3502 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
3503   return vshl_s64(a, b);
3504 }
3505 
3506 // CHECK-LABEL: define <8 x i8> @test_vshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3507 // CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3508 // CHECK:   ret <8 x i8> [[VSHL_V_I]]
test_vshl_u8(uint8x8_t a,int8x8_t b)3509 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
3510   return vshl_u8(a, b);
3511 }
3512 
3513 // CHECK-LABEL: define <4 x i16> @test_vshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3514 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3515 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3516 // CHECK:   [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3517 // CHECK:   [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3518 // CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4
3519 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3520 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16>
3521 // CHECK:   ret <4 x i16> [[TMP2]]
test_vshl_u16(uint16x4_t a,int16x4_t b)3522 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
3523   return vshl_u16(a, b);
3524 }
3525 
3526 // CHECK-LABEL: define <2 x i32> @test_vshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3527 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3528 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3529 // CHECK:   [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3530 // CHECK:   [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3531 // CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4
3532 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3533 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32>
3534 // CHECK:   ret <2 x i32> [[TMP2]]
test_vshl_u32(uint32x2_t a,int32x2_t b)3535 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
3536   return vshl_u32(a, b);
3537 }
3538 
3539 // CHECK-LABEL: define <1 x i64> @test_vshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3540 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3541 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3542 // CHECK:   [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3543 // CHECK:   [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3544 // CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4
3545 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3546 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64>
3547 // CHECK:   ret <1 x i64> [[TMP2]]
test_vshl_u64(uint64x1_t a,int64x1_t b)3548 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
3549   return vshl_u64(a, b);
3550 }
3551 
3552 // CHECK-LABEL: define <16 x i8> @test_vshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3553 // CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3554 // CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_s8(int8x16_t a,int8x16_t b)3555 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
3556   return vshlq_s8(a, b);
3557 }
3558 
3559 // CHECK-LABEL: define <8 x i16> @test_vshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3560 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3561 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3562 // CHECK:   [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3563 // CHECK:   [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3564 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4
3565 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3566 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16>
3567 // CHECK:   ret <8 x i16> [[TMP2]]
test_vshlq_s16(int16x8_t a,int16x8_t b)3568 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
3569   return vshlq_s16(a, b);
3570 }
3571 
3572 // CHECK-LABEL: define <4 x i32> @test_vshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3573 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3574 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3575 // CHECK:   [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3576 // CHECK:   [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3577 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4
3578 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3579 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32>
3580 // CHECK:   ret <4 x i32> [[TMP2]]
test_vshlq_s32(int32x4_t a,int32x4_t b)3581 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
3582   return vshlq_s32(a, b);
3583 }
3584 
3585 // CHECK-LABEL: define <2 x i64> @test_vshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3586 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3587 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3588 // CHECK:   [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3589 // CHECK:   [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3590 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4
3591 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3592 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64>
3593 // CHECK:   ret <2 x i64> [[TMP2]]
test_vshlq_s64(int64x2_t a,int64x2_t b)3594 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
3595   return vshlq_s64(a, b);
3596 }
3597 
3598 // CHECK-LABEL: define <16 x i8> @test_vshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3599 // CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3600 // CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_u8(uint8x16_t a,int8x16_t b)3601 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
3602   return vshlq_u8(a, b);
3603 }
3604 
3605 // CHECK-LABEL: define <8 x i16> @test_vshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3606 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3607 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3608 // CHECK:   [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3609 // CHECK:   [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3610 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4
3611 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3612 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16>
3613 // CHECK:   ret <8 x i16> [[TMP2]]
test_vshlq_u16(uint16x8_t a,int16x8_t b)3614 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
3615   return vshlq_u16(a, b);
3616 }
3617 
3618 // CHECK-LABEL: define <4 x i32> @test_vshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3619 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3620 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3621 // CHECK:   [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3622 // CHECK:   [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3623 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4
3624 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3625 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32>
3626 // CHECK:   ret <4 x i32> [[TMP2]]
test_vshlq_u32(uint32x4_t a,int32x4_t b)3627 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
3628   return vshlq_u32(a, b);
3629 }
3630 
3631 // CHECK-LABEL: define <2 x i64> @test_vshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
3632 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3633 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3634 // CHECK:   [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3635 // CHECK:   [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3636 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4
3637 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3638 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64>
3639 // CHECK:   ret <2 x i64> [[TMP2]]
test_vshlq_u64(uint64x2_t a,int64x2_t b)3640 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
3641   return vshlq_u64(a, b);
3642 }
3643 
3644 
3645 // CHECK-LABEL: define <8 x i8> @test_vqshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3646 // CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3647 // CHECK:   ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_s8(int8x8_t a,int8x8_t b)3648 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
3649   return vqshl_s8(a, b);
3650 }
3651 
3652 // CHECK-LABEL: define <4 x i16> @test_vqshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3653 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3654 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3655 // CHECK:   [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3656 // CHECK:   [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3657 // CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4
3658 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3659 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
3660 // CHECK:   ret <4 x i16> [[TMP2]]
test_vqshl_s16(int16x4_t a,int16x4_t b)3661 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
3662   return vqshl_s16(a, b);
3663 }
3664 
3665 // CHECK-LABEL: define <2 x i32> @test_vqshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3666 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3667 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3668 // CHECK:   [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3669 // CHECK:   [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3670 // CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4
3671 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3672 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
3673 // CHECK:   ret <2 x i32> [[TMP2]]
test_vqshl_s32(int32x2_t a,int32x2_t b)3674 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
3675   return vqshl_s32(a, b);
3676 }
3677 
3678 // CHECK-LABEL: define <1 x i64> @test_vqshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3679 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3680 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3681 // CHECK:   [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3682 // CHECK:   [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3683 // CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4
3684 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3685 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64>
3686 // CHECK:   ret <1 x i64> [[TMP2]]
test_vqshl_s64(int64x1_t a,int64x1_t b)3687 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
3688   return vqshl_s64(a, b);
3689 }
3690 
3691 // CHECK-LABEL: define <8 x i8> @test_vqshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3692 // CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3693 // CHECK:   ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_u8(uint8x8_t a,int8x8_t b)3694 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
3695   return vqshl_u8(a, b);
3696 }
3697 
3698 // CHECK-LABEL: define <4 x i16> @test_vqshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3699 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3700 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3701 // CHECK:   [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3702 // CHECK:   [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3703 // CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4
3704 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3705 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
3706 // CHECK:   ret <4 x i16> [[TMP2]]
test_vqshl_u16(uint16x4_t a,int16x4_t b)3707 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
3708   return vqshl_u16(a, b);
3709 }
3710 
3711 // CHECK-LABEL: define <2 x i32> @test_vqshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3712 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3713 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3714 // CHECK:   [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3715 // CHECK:   [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3716 // CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4
3717 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3718 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
3719 // CHECK:   ret <2 x i32> [[TMP2]]
test_vqshl_u32(uint32x2_t a,int32x2_t b)3720 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
3721   return vqshl_u32(a, b);
3722 }
3723 
3724 // CHECK-LABEL: define <1 x i64> @test_vqshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3725 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3726 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3727 // CHECK:   [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3728 // CHECK:   [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3729 // CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4
3730 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3731 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64>
3732 // CHECK:   ret <1 x i64> [[TMP2]]
test_vqshl_u64(uint64x1_t a,int64x1_t b)3733 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
3734   return vqshl_u64(a, b);
3735 }
3736 
3737 // CHECK-LABEL: define <16 x i8> @test_vqshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3738 // CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3739 // CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_s8(int8x16_t a,int8x16_t b)3740 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
3741   return vqshlq_s8(a, b);
3742 }
3743 
3744 // CHECK-LABEL: define <8 x i16> @test_vqshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3745 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3746 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3747 // CHECK:   [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3748 // CHECK:   [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3749 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4
3750 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3751 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
3752 // CHECK:   ret <8 x i16> [[TMP2]]
test_vqshlq_s16(int16x8_t a,int16x8_t b)3753 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
3754   return vqshlq_s16(a, b);
3755 }
3756 
3757 // CHECK-LABEL: define <4 x i32> @test_vqshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3758 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3759 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3760 // CHECK:   [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3761 // CHECK:   [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3762 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4
3763 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3764 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
3765 // CHECK:   ret <4 x i32> [[TMP2]]
test_vqshlq_s32(int32x4_t a,int32x4_t b)3766 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
3767   return vqshlq_s32(a, b);
3768 }
3769 
3770 // CHECK-LABEL: define <2 x i64> @test_vqshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3771 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3772 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3773 // CHECK:   [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3774 // CHECK:   [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3775 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4
3776 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3777 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
3778 // CHECK:   ret <2 x i64> [[TMP2]]
test_vqshlq_s64(int64x2_t a,int64x2_t b)3779 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
3780   return vqshlq_s64(a, b);
3781 }
3782 
3783 // CHECK-LABEL: define <16 x i8> @test_vqshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3784 // CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3785 // CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_u8(uint8x16_t a,int8x16_t b)3786 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
3787   return vqshlq_u8(a, b);
3788 }
3789 
3790 // CHECK-LABEL: define <8 x i16> @test_vqshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3791 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3792 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3793 // CHECK:   [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3794 // CHECK:   [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3795 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4
3796 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3797 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
3798 // CHECK:   ret <8 x i16> [[TMP2]]
test_vqshlq_u16(uint16x8_t a,int16x8_t b)3799 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
3800   return vqshlq_u16(a, b);
3801 }
3802 
3803 // CHECK-LABEL: define <4 x i32> @test_vqshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3804 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3805 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3806 // CHECK:   [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3807 // CHECK:   [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3808 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4
3809 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3810 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
3811 // CHECK:   ret <4 x i32> [[TMP2]]
test_vqshlq_u32(uint32x4_t a,int32x4_t b)3812 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
3813   return vqshlq_u32(a, b);
3814 }
3815 
3816 // CHECK-LABEL: define <2 x i64> @test_vqshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
3817 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3818 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3819 // CHECK:   [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3820 // CHECK:   [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3821 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4
3822 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3823 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
3824 // CHECK:   ret <2 x i64> [[TMP2]]
test_vqshlq_u64(uint64x2_t a,int64x2_t b)3825 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
3826   return vqshlq_u64(a, b);
3827 }
3828 
3829 // CHECK-LABEL: define <8 x i8> @test_vrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3830 // CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3831 // CHECK:   ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_s8(int8x8_t a,int8x8_t b)3832 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
3833   return vrshl_s8(a, b);
3834 }
3835 
3836 // CHECK-LABEL: define <4 x i16> @test_vrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3837 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3838 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3839 // CHECK:   [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3840 // CHECK:   [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3841 // CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4
3842 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3843 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16>
3844 // CHECK:   ret <4 x i16> [[TMP2]]
test_vrshl_s16(int16x4_t a,int16x4_t b)3845 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
3846   return vrshl_s16(a, b);
3847 }
3848 
3849 // CHECK-LABEL: define <2 x i32> @test_vrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3850 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3851 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3852 // CHECK:   [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3853 // CHECK:   [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3854 // CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4
3855 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3856 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32>
3857 // CHECK:   ret <2 x i32> [[TMP2]]
test_vrshl_s32(int32x2_t a,int32x2_t b)3858 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
3859   return vrshl_s32(a, b);
3860 }
3861 
3862 // CHECK-LABEL: define <1 x i64> @test_vrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3863 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3864 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3865 // CHECK:   [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3866 // CHECK:   [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3867 // CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4
3868 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3869 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64>
3870 // CHECK:   ret <1 x i64> [[TMP2]]
test_vrshl_s64(int64x1_t a,int64x1_t b)3871 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
3872   return vrshl_s64(a, b);
3873 }
3874 
3875 // CHECK-LABEL: define <8 x i8> @test_vrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3876 // CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3877 // CHECK:   ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_u8(uint8x8_t a,int8x8_t b)3878 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
3879   return vrshl_u8(a, b);
3880 }
3881 
3882 // CHECK-LABEL: define <4 x i16> @test_vrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3883 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3884 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3885 // CHECK:   [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3886 // CHECK:   [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3887 // CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4
3888 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3889 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16>
3890 // CHECK:   ret <4 x i16> [[TMP2]]
test_vrshl_u16(uint16x4_t a,int16x4_t b)3891 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
3892   return vrshl_u16(a, b);
3893 }
3894 
3895 // CHECK-LABEL: define <2 x i32> @test_vrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3896 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3897 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3898 // CHECK:   [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3899 // CHECK:   [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3900 // CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4
3901 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3902 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32>
3903 // CHECK:   ret <2 x i32> [[TMP2]]
test_vrshl_u32(uint32x2_t a,int32x2_t b)3904 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
3905   return vrshl_u32(a, b);
3906 }
3907 
3908 // CHECK-LABEL: define <1 x i64> @test_vrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3909 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3910 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3911 // CHECK:   [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3912 // CHECK:   [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3913 // CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4
3914 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3915 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64>
3916 // CHECK:   ret <1 x i64> [[TMP2]]
test_vrshl_u64(uint64x1_t a,int64x1_t b)3917 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
3918   return vrshl_u64(a, b);
3919 }
3920 
3921 // CHECK-LABEL: define <16 x i8> @test_vrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3922 // CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3923 // CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_s8(int8x16_t a,int8x16_t b)3924 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
3925   return vrshlq_s8(a, b);
3926 }
3927 
3928 // CHECK-LABEL: define <8 x i16> @test_vrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3929 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3930 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3931 // CHECK:   [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3932 // CHECK:   [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3933 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4
3934 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3935 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16>
3936 // CHECK:   ret <8 x i16> [[TMP2]]
test_vrshlq_s16(int16x8_t a,int16x8_t b)3937 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
3938   return vrshlq_s16(a, b);
3939 }
3940 
3941 // CHECK-LABEL: define <4 x i32> @test_vrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3942 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3943 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3944 // CHECK:   [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3945 // CHECK:   [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3946 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4
3947 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3948 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32>
3949 // CHECK:   ret <4 x i32> [[TMP2]]
test_vrshlq_s32(int32x4_t a,int32x4_t b)3950 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
3951   return vrshlq_s32(a, b);
3952 }
3953 
3954 // CHECK-LABEL: define <2 x i64> @test_vrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3955 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3956 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3957 // CHECK:   [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3958 // CHECK:   [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3959 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4
3960 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3961 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64>
3962 // CHECK:   ret <2 x i64> [[TMP2]]
test_vrshlq_s64(int64x2_t a,int64x2_t b)3963 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
3964   return vrshlq_s64(a, b);
3965 }
3966 
3967 // CHECK-LABEL: define <16 x i8> @test_vrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3968 // CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3969 // CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_u8(uint8x16_t a,int8x16_t b)3970 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
3971   return vrshlq_u8(a, b);
3972 }
3973 
3974 // CHECK-LABEL: define <8 x i16> @test_vrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3975 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3976 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3977 // CHECK:   [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3978 // CHECK:   [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3979 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4
3980 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3981 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16>
3982 // CHECK:   ret <8 x i16> [[TMP2]]
test_vrshlq_u16(uint16x8_t a,int16x8_t b)3983 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
3984   return vrshlq_u16(a, b);
3985 }
3986 
3987 // CHECK-LABEL: define <4 x i32> @test_vrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3988 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3989 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3990 // CHECK:   [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3991 // CHECK:   [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3992 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4
3993 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3994 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32>
3995 // CHECK:   ret <4 x i32> [[TMP2]]
test_vrshlq_u32(uint32x4_t a,int32x4_t b)3996 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
3997   return vrshlq_u32(a, b);
3998 }
3999 
4000 // CHECK-LABEL: define <2 x i64> @test_vrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
4001 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4002 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4003 // CHECK:   [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4004 // CHECK:   [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4005 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4
4006 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
4007 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64>
4008 // CHECK:   ret <2 x i64> [[TMP2]]
test_vrshlq_u64(uint64x2_t a,int64x2_t b)4009 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
4010   return vrshlq_u64(a, b);
4011 }
4012 
4013 
4014 // CHECK-LABEL: define <8 x i8> @test_vqrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4015 // CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4016 // CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_s8(int8x8_t a,int8x8_t b)4017 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
4018   return vqrshl_s8(a, b);
4019 }
4020 
4021 // CHECK-LABEL: define <4 x i16> @test_vqrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4022 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4023 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4024 // CHECK:   [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4025 // CHECK:   [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4026 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4
4027 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
4028 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
4029 // CHECK:   ret <4 x i16> [[TMP2]]
test_vqrshl_s16(int16x4_t a,int16x4_t b)4030 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
4031   return vqrshl_s16(a, b);
4032 }
4033 
4034 // CHECK-LABEL: define <2 x i32> @test_vqrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4035 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4036 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4037 // CHECK:   [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4038 // CHECK:   [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4039 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4
4040 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
4041 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
4042 // CHECK:   ret <2 x i32> [[TMP2]]
test_vqrshl_s32(int32x2_t a,int32x2_t b)4043 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
4044   return vqrshl_s32(a, b);
4045 }
4046 
4047 // CHECK-LABEL: define <1 x i64> @test_vqrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
4048 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
4049 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4050 // CHECK:   [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
4051 // CHECK:   [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4052 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4
4053 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
4054 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64>
4055 // CHECK:   ret <1 x i64> [[TMP2]]
test_vqrshl_s64(int64x1_t a,int64x1_t b)4056 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
4057   return vqrshl_s64(a, b);
4058 }
4059 
4060 // CHECK-LABEL: define <8 x i8> @test_vqrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4061 // CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4062 // CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_u8(uint8x8_t a,int8x8_t b)4063 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
4064   return vqrshl_u8(a, b);
4065 }
4066 
4067 // CHECK-LABEL: define <4 x i16> @test_vqrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4068 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4069 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4070 // CHECK:   [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4071 // CHECK:   [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4072 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4
4073 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
4074 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
4075 // CHECK:   ret <4 x i16> [[TMP2]]
test_vqrshl_u16(uint16x4_t a,int16x4_t b)4076 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
4077   return vqrshl_u16(a, b);
4078 }
4079 
4080 // CHECK-LABEL: define <2 x i32> @test_vqrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4081 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4082 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4083 // CHECK:   [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4084 // CHECK:   [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4085 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4
4086 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
4087 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
4088 // CHECK:   ret <2 x i32> [[TMP2]]
test_vqrshl_u32(uint32x2_t a,int32x2_t b)4089 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
4090   return vqrshl_u32(a, b);
4091 }
4092 
4093 // CHECK-LABEL: define <1 x i64> @test_vqrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
4094 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
4095 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4096 // CHECK:   [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
4097 // CHECK:   [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4098 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4
4099 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
4100 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64>
4101 // CHECK:   ret <1 x i64> [[TMP2]]
test_vqrshl_u64(uint64x1_t a,int64x1_t b)4102 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
4103   return vqrshl_u64(a, b);
4104 }
4105 
4106 // CHECK-LABEL: define <16 x i8> @test_vqrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4107 // CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4108 // CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_s8(int8x16_t a,int8x16_t b)4109 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
4110   return vqrshlq_s8(a, b);
4111 }
4112 
4113 // CHECK-LABEL: define <8 x i16> @test_vqrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4114 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4115 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4116 // CHECK:   [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4117 // CHECK:   [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4118 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4
4119 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
4120 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
4121 // CHECK:   ret <8 x i16> [[TMP2]]
test_vqrshlq_s16(int16x8_t a,int16x8_t b)4122 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
4123   return vqrshlq_s16(a, b);
4124 }
4125 
4126 // CHECK-LABEL: define <4 x i32> @test_vqrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4127 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4128 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4129 // CHECK:   [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4130 // CHECK:   [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4131 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4
4132 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
4133 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
4134 // CHECK:   ret <4 x i32> [[TMP2]]
test_vqrshlq_s32(int32x4_t a,int32x4_t b)4135 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
4136   return vqrshlq_s32(a, b);
4137 }
4138 
4139 // CHECK-LABEL: define <2 x i64> @test_vqrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
4140 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4141 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4142 // CHECK:   [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4143 // CHECK:   [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4144 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4
4145 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
4146 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
4147 // CHECK:   ret <2 x i64> [[TMP2]]
test_vqrshlq_s64(int64x2_t a,int64x2_t b)4148 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
4149   return vqrshlq_s64(a, b);
4150 }
4151 
4152 // CHECK-LABEL: define <16 x i8> @test_vqrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4153 // CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4154 // CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_u8(uint8x16_t a,int8x16_t b)4155 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
4156   return vqrshlq_u8(a, b);
4157 }
4158 
4159 // CHECK-LABEL: define <8 x i16> @test_vqrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4160 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4161 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4162 // CHECK:   [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4163 // CHECK:   [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4164 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4
4165 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
4166 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
4167 // CHECK:   ret <8 x i16> [[TMP2]]
test_vqrshlq_u16(uint16x8_t a,int16x8_t b)4168 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
4169   return vqrshlq_u16(a, b);
4170 }
4171 
4172 // CHECK-LABEL: define <4 x i32> @test_vqrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4173 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4174 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4175 // CHECK:   [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4176 // CHECK:   [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4177 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4
4178 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
4179 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
4180 // CHECK:   ret <4 x i32> [[TMP2]]
test_vqrshlq_u32(uint32x4_t a,int32x4_t b)4181 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
4182   return vqrshlq_u32(a, b);
4183 }
4184 
4185 // CHECK-LABEL: define <2 x i64> @test_vqrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
4186 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4187 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4188 // CHECK:   [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4189 // CHECK:   [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4190 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4
4191 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
4192 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
4193 // CHECK:   ret <2 x i64> [[TMP2]]
test_vqrshlq_u64(uint64x2_t a,int64x2_t b)4194 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
4195   return vqrshlq_u64(a, b);
4196 }
4197 
4198 // CHECK-LABEL: define <1 x i64> @test_vsli_n_p64(<1 x i64> %a, <1 x i64> %b) #0 {
4199 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
4200 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4201 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
4202 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4203 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0)
4204 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_p64(poly64x1_t a,poly64x1_t b)4205 poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) {
4206   return vsli_n_p64(a, b, 0);
4207 }
4208 
4209 // CHECK-LABEL: define <2 x i64> @test_vsliq_n_p64(<2 x i64> %a, <2 x i64> %b) #0 {
4210 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4211 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4212 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4213 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4214 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0)
4215 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_p64(poly64x2_t a,poly64x2_t b)4216 poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) {
4217   return vsliq_n_p64(a, b, 0);
4218 }
4219 
4220 // CHECK-LABEL: define <8 x i8> @test_vmax_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4221 // CHECK:   [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4222 // CHECK:   ret <8 x i8> [[VMAX_I]]
test_vmax_s8(int8x8_t a,int8x8_t b)4223 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
4224   return vmax_s8(a, b);
4225 }
4226 
4227 // CHECK-LABEL: define <4 x i16> @test_vmax_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4228 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4229 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4230 // CHECK:   [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4231 // CHECK:   [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4232 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4
4233 // CHECK:   ret <4 x i16> [[VMAX2_I]]
test_vmax_s16(int16x4_t a,int16x4_t b)4234 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
4235   return vmax_s16(a, b);
4236 }
4237 
4238 // CHECK-LABEL: define <2 x i32> @test_vmax_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4239 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4240 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4241 // CHECK:   [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4242 // CHECK:   [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4243 // CHECK:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4
4244 // CHECK:   ret <2 x i32> [[VMAX2_I]]
test_vmax_s32(int32x2_t a,int32x2_t b)4245 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
4246   return vmax_s32(a, b);
4247 }
4248 
4249 // CHECK-LABEL: define <8 x i8> @test_vmax_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4250 // CHECK:   [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4251 // CHECK:   ret <8 x i8> [[VMAX_I]]
test_vmax_u8(uint8x8_t a,uint8x8_t b)4252 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
4253   return vmax_u8(a, b);
4254 }
4255 
4256 // CHECK-LABEL: define <4 x i16> @test_vmax_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4257 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4258 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4259 // CHECK:   [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4260 // CHECK:   [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4261 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4
4262 // CHECK:   ret <4 x i16> [[VMAX2_I]]
test_vmax_u16(uint16x4_t a,uint16x4_t b)4263 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
4264   return vmax_u16(a, b);
4265 }
4266 
4267 // CHECK-LABEL: define <2 x i32> @test_vmax_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4268 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4269 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4270 // CHECK:   [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4271 // CHECK:   [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4272 // CHECK:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4
4273 // CHECK:   ret <2 x i32> [[VMAX2_I]]
test_vmax_u32(uint32x2_t a,uint32x2_t b)4274 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
4275   return vmax_u32(a, b);
4276 }
4277 
4278 // CHECK-LABEL: define <2 x float> @test_vmax_f32(<2 x float> %a, <2 x float> %b) #0 {
4279 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4280 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4281 // CHECK:   [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4282 // CHECK:   [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4283 // CHECK:   [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> [[VMAX_I]], <2 x float> [[VMAX1_I]]) #4
4284 // CHECK:   ret <2 x float> [[VMAX2_I]]
test_vmax_f32(float32x2_t a,float32x2_t b)4285 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
4286   return vmax_f32(a, b);
4287 }
4288 
4289 // CHECK-LABEL: define <16 x i8> @test_vmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4290 // CHECK:   [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4291 // CHECK:   ret <16 x i8> [[VMAX_I]]
test_vmaxq_s8(int8x16_t a,int8x16_t b)4292 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
4293   return vmaxq_s8(a, b);
4294 }
4295 
4296 // CHECK-LABEL: define <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4297 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4298 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4299 // CHECK:   [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4300 // CHECK:   [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4301 // CHECK:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4
4302 // CHECK:   ret <8 x i16> [[VMAX2_I]]
test_vmaxq_s16(int16x8_t a,int16x8_t b)4303 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
4304   return vmaxq_s16(a, b);
4305 }
4306 
4307 // CHECK-LABEL: define <4 x i32> @test_vmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4308 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4309 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4310 // CHECK:   [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4311 // CHECK:   [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4312 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4
4313 // CHECK:   ret <4 x i32> [[VMAX2_I]]
test_vmaxq_s32(int32x4_t a,int32x4_t b)4314 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
4315   return vmaxq_s32(a, b);
4316 }
4317 
4318 // CHECK-LABEL: define <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4319 // CHECK:   [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4320 // CHECK:   ret <16 x i8> [[VMAX_I]]
test_vmaxq_u8(uint8x16_t a,uint8x16_t b)4321 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
4322   return vmaxq_u8(a, b);
4323 }
4324 
4325 // CHECK-LABEL: define <8 x i16> @test_vmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4326 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4327 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4328 // CHECK:   [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4329 // CHECK:   [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4330 // CHECK:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4
4331 // CHECK:   ret <8 x i16> [[VMAX2_I]]
test_vmaxq_u16(uint16x8_t a,uint16x8_t b)4332 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
4333   return vmaxq_u16(a, b);
4334 }
4335 
4336 // CHECK-LABEL: define <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4337 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4338 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4339 // CHECK:   [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4340 // CHECK:   [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4341 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4
4342 // CHECK:   ret <4 x i32> [[VMAX2_I]]
test_vmaxq_u32(uint32x4_t a,uint32x4_t b)4343 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
4344   return vmaxq_u32(a, b);
4345 }
4346 
4347 // CHECK-LABEL: define <4 x float> @test_vmaxq_f32(<4 x float> %a, <4 x float> %b) #0 {
4348 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4349 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4350 // CHECK:   [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4351 // CHECK:   [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4352 // CHECK:   [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> [[VMAX_I]], <4 x float> [[VMAX1_I]]) #4
4353 // CHECK:   ret <4 x float> [[VMAX2_I]]
test_vmaxq_f32(float32x4_t a,float32x4_t b)4354 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
4355   return vmaxq_f32(a, b);
4356 }
4357 
4358 // CHECK-LABEL: define <2 x double> @test_vmaxq_f64(<2 x double> %a, <2 x double> %b) #0 {
4359 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4360 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4361 // CHECK:   [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4362 // CHECK:   [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4363 // CHECK:   [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> [[VMAX_I]], <2 x double> [[VMAX1_I]]) #4
4364 // CHECK:   ret <2 x double> [[VMAX2_I]]
test_vmaxq_f64(float64x2_t a,float64x2_t b)4365 float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
4366   return vmaxq_f64(a, b);
4367 }
4368 
4369 
4370 // CHECK-LABEL: define <8 x i8> @test_vmin_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4371 // CHECK:   [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4372 // CHECK:   ret <8 x i8> [[VMIN_I]]
test_vmin_s8(int8x8_t a,int8x8_t b)4373 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
4374   return vmin_s8(a, b);
4375 }
4376 
4377 // CHECK-LABEL: define <4 x i16> @test_vmin_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4378 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4379 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4380 // CHECK:   [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4381 // CHECK:   [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4382 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4
4383 // CHECK:   ret <4 x i16> [[VMIN2_I]]
test_vmin_s16(int16x4_t a,int16x4_t b)4384 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
4385   return vmin_s16(a, b);
4386 }
4387 
4388 // CHECK-LABEL: define <2 x i32> @test_vmin_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4389 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4390 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4391 // CHECK:   [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4392 // CHECK:   [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4393 // CHECK:   [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4
4394 // CHECK:   ret <2 x i32> [[VMIN2_I]]
test_vmin_s32(int32x2_t a,int32x2_t b)4395 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
4396   return vmin_s32(a, b);
4397 }
4398 
4399 // CHECK-LABEL: define <8 x i8> @test_vmin_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4400 // CHECK:   [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4401 // CHECK:   ret <8 x i8> [[VMIN_I]]
test_vmin_u8(uint8x8_t a,uint8x8_t b)4402 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
4403   return vmin_u8(a, b);
4404 }
4405 
4406 // CHECK-LABEL: define <4 x i16> @test_vmin_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4407 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4408 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4409 // CHECK:   [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4410 // CHECK:   [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4411 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4
4412 // CHECK:   ret <4 x i16> [[VMIN2_I]]
test_vmin_u16(uint16x4_t a,uint16x4_t b)4413 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
4414   return vmin_u16(a, b);
4415 }
4416 
4417 // CHECK-LABEL: define <2 x i32> @test_vmin_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4418 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4419 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4420 // CHECK:   [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4421 // CHECK:   [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4422 // CHECK:   [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4
4423 // CHECK:   ret <2 x i32> [[VMIN2_I]]
test_vmin_u32(uint32x2_t a,uint32x2_t b)4424 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
4425   return vmin_u32(a, b);
4426 }
4427 
4428 // CHECK-LABEL: define <2 x float> @test_vmin_f32(<2 x float> %a, <2 x float> %b) #0 {
4429 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4430 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4431 // CHECK:   [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4432 // CHECK:   [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4433 // CHECK:   [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> [[VMIN_I]], <2 x float> [[VMIN1_I]]) #4
4434 // CHECK:   ret <2 x float> [[VMIN2_I]]
test_vmin_f32(float32x2_t a,float32x2_t b)4435 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
4436   return vmin_f32(a, b);
4437 }
4438 
4439 // CHECK-LABEL: define <16 x i8> @test_vminq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4440 // CHECK:   [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4441 // CHECK:   ret <16 x i8> [[VMIN_I]]
test_vminq_s8(int8x16_t a,int8x16_t b)4442 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
4443   return vminq_s8(a, b);
4444 }
4445 
4446 // CHECK-LABEL: define <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4447 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4448 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4449 // CHECK:   [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4450 // CHECK:   [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4451 // CHECK:   [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4
4452 // CHECK:   ret <8 x i16> [[VMIN2_I]]
test_vminq_s16(int16x8_t a,int16x8_t b)4453 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
4454   return vminq_s16(a, b);
4455 }
4456 
4457 // CHECK-LABEL: define <4 x i32> @test_vminq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4458 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4459 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4460 // CHECK:   [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4461 // CHECK:   [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4462 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4
4463 // CHECK:   ret <4 x i32> [[VMIN2_I]]
test_vminq_s32(int32x4_t a,int32x4_t b)4464 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
4465   return vminq_s32(a, b);
4466 }
4467 
4468 // CHECK-LABEL: define <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4469 // CHECK:   [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4470 // CHECK:   ret <16 x i8> [[VMIN_I]]
test_vminq_u8(uint8x16_t a,uint8x16_t b)4471 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
4472   return vminq_u8(a, b);
4473 }
4474 
4475 // CHECK-LABEL: define <8 x i16> @test_vminq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4476 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4477 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4478 // CHECK:   [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4479 // CHECK:   [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4480 // CHECK:   [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4
4481 // CHECK:   ret <8 x i16> [[VMIN2_I]]
test_vminq_u16(uint16x8_t a,uint16x8_t b)4482 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
4483   return vminq_u16(a, b);
4484 }
4485 
4486 // CHECK-LABEL: define <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4487 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4488 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4489 // CHECK:   [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4490 // CHECK:   [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4491 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4
4492 // CHECK:   ret <4 x i32> [[VMIN2_I]]
test_vminq_u32(uint32x4_t a,uint32x4_t b)4493 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
4494   return vminq_u32(a, b);
4495 }
4496 
4497 // CHECK-LABEL: define <4 x float> @test_vminq_f32(<4 x float> %a, <4 x float> %b) #0 {
4498 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4499 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4500 // CHECK:   [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4501 // CHECK:   [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4502 // CHECK:   [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> [[VMIN_I]], <4 x float> [[VMIN1_I]]) #4
4503 // CHECK:   ret <4 x float> [[VMIN2_I]]
test_vminq_f32(float32x4_t a,float32x4_t b)4504 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
4505   return vminq_f32(a, b);
4506 }
4507 
4508 // CHECK-LABEL: define <2 x double> @test_vminq_f64(<2 x double> %a, <2 x double> %b) #0 {
4509 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4510 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4511 // CHECK:   [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4512 // CHECK:   [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4513 // CHECK:   [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> [[VMIN_I]], <2 x double> [[VMIN1_I]]) #4
4514 // CHECK:   ret <2 x double> [[VMIN2_I]]
test_vminq_f64(float64x2_t a,float64x2_t b)4515 float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) {
4516   return vminq_f64(a, b);
4517 }
4518 
4519 // CHECK-LABEL: define <2 x float> @test_vmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 {
4520 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4521 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4522 // CHECK:   [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4523 // CHECK:   [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4524 // CHECK:   [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> [[VMAXNM_I]], <2 x float> [[VMAXNM1_I]]) #4
4525 // CHECK:   ret <2 x float> [[VMAXNM2_I]]
test_vmaxnm_f32(float32x2_t a,float32x2_t b)4526 float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
4527   return vmaxnm_f32(a, b);
4528 }
4529 
4530 // CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
4531 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4532 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4533 // CHECK:   [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4534 // CHECK:   [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4535 // CHECK:   [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> [[VMAXNM_I]], <4 x float> [[VMAXNM1_I]]) #4
4536 // CHECK:   ret <4 x float> [[VMAXNM2_I]]
test_vmaxnmq_f32(float32x4_t a,float32x4_t b)4537 float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
4538   return vmaxnmq_f32(a, b);
4539 }
4540 
4541 // CHECK-LABEL: define <2 x double> @test_vmaxnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
4542 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4543 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4544 // CHECK:   [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4545 // CHECK:   [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4546 // CHECK:   [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> [[VMAXNM_I]], <2 x double> [[VMAXNM1_I]]) #4
4547 // CHECK:   ret <2 x double> [[VMAXNM2_I]]
test_vmaxnmq_f64(float64x2_t a,float64x2_t b)4548 float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) {
4549   return vmaxnmq_f64(a, b);
4550 }
4551 
4552 // CHECK-LABEL: define <2 x float> @test_vminnm_f32(<2 x float> %a, <2 x float> %b) #0 {
4553 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4554 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4555 // CHECK:   [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4556 // CHECK:   [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4557 // CHECK:   [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> [[VMINNM_I]], <2 x float> [[VMINNM1_I]]) #4
4558 // CHECK:   ret <2 x float> [[VMINNM2_I]]
test_vminnm_f32(float32x2_t a,float32x2_t b)4559 float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
4560   return vminnm_f32(a, b);
4561 }
4562 
4563 // CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
4564 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4565 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4566 // CHECK:   [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4567 // CHECK:   [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4568 // CHECK:   [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> [[VMINNM_I]], <4 x float> [[VMINNM1_I]]) #4
4569 // CHECK:   ret <4 x float> [[VMINNM2_I]]
test_vminnmq_f32(float32x4_t a,float32x4_t b)4570 float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
4571   return vminnmq_f32(a, b);
4572 }
4573 
4574 // CHECK-LABEL: define <2 x double> @test_vminnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
4575 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4576 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4577 // CHECK:   [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4578 // CHECK:   [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4579 // CHECK:   [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> [[VMINNM_I]], <2 x double> [[VMINNM1_I]]) #4
4580 // CHECK:   ret <2 x double> [[VMINNM2_I]]
test_vminnmq_f64(float64x2_t a,float64x2_t b)4581 float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) {
4582   return vminnmq_f64(a, b);
4583 }
4584 
4585 // CHECK-LABEL: define <8 x i8> @test_vpmax_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4586 // CHECK:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4587 // CHECK:   ret <8 x i8> [[VPMAX_I]]
test_vpmax_s8(int8x8_t a,int8x8_t b)4588 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
4589   return vpmax_s8(a, b);
4590 }
4591 
4592 // CHECK-LABEL: define <4 x i16> @test_vpmax_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4593 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4594 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4595 // CHECK:   [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4596 // CHECK:   [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4597 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4
4598 // CHECK:   ret <4 x i16> [[VPMAX2_I]]
test_vpmax_s16(int16x4_t a,int16x4_t b)4599 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
4600   return vpmax_s16(a, b);
4601 }
4602 
4603 // CHECK-LABEL: define <2 x i32> @test_vpmax_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4604 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4605 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4606 // CHECK:   [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4607 // CHECK:   [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4608 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4
4609 // CHECK:   ret <2 x i32> [[VPMAX2_I]]
test_vpmax_s32(int32x2_t a,int32x2_t b)4610 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
4611   return vpmax_s32(a, b);
4612 }
4613 
4614 // CHECK-LABEL: define <8 x i8> @test_vpmax_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4615 // CHECK:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4616 // CHECK:   ret <8 x i8> [[VPMAX_I]]
test_vpmax_u8(uint8x8_t a,uint8x8_t b)4617 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
4618   return vpmax_u8(a, b);
4619 }
4620 
4621 // CHECK-LABEL: define <4 x i16> @test_vpmax_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4622 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4623 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4624 // CHECK:   [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4625 // CHECK:   [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4626 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4
4627 // CHECK:   ret <4 x i16> [[VPMAX2_I]]
test_vpmax_u16(uint16x4_t a,uint16x4_t b)4628 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
4629   return vpmax_u16(a, b);
4630 }
4631 
4632 // CHECK-LABEL: define <2 x i32> @test_vpmax_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4633 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4634 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4635 // CHECK:   [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4636 // CHECK:   [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4637 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4
4638 // CHECK:   ret <2 x i32> [[VPMAX2_I]]
test_vpmax_u32(uint32x2_t a,uint32x2_t b)4639 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
4640   return vpmax_u32(a, b);
4641 }
4642 
4643 // CHECK-LABEL: define <2 x float> @test_vpmax_f32(<2 x float> %a, <2 x float> %b) #0 {
4644 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4645 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4646 // CHECK:   [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4647 // CHECK:   [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4648 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> [[VPMAX_I]], <2 x float> [[VPMAX1_I]]) #4
4649 // CHECK:   ret <2 x float> [[VPMAX2_I]]
test_vpmax_f32(float32x2_t a,float32x2_t b)4650 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
4651   return vpmax_f32(a, b);
4652 }
4653 
4654 // CHECK-LABEL: define <16 x i8> @test_vpmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4655 // CHECK:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4656 // CHECK:   ret <16 x i8> [[VPMAX_I]]
test_vpmaxq_s8(int8x16_t a,int8x16_t b)4657 int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
4658   return vpmaxq_s8(a, b);
4659 }
4660 
4661 // CHECK-LABEL: define <8 x i16> @test_vpmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4662 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4663 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4664 // CHECK:   [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4665 // CHECK:   [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4666 // CHECK:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) #4
4667 // CHECK:   ret <8 x i16> [[VPMAX2_I]]
test_vpmaxq_s16(int16x8_t a,int16x8_t b)4668 int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
4669   return vpmaxq_s16(a, b);
4670 }
4671 
4672 // CHECK-LABEL: define <4 x i32> @test_vpmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4673 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4674 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4675 // CHECK:   [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4676 // CHECK:   [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4677 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) #4
4678 // CHECK:   ret <4 x i32> [[VPMAX2_I]]
test_vpmaxq_s32(int32x4_t a,int32x4_t b)4679 int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
4680   return vpmaxq_s32(a, b);
4681 }
4682 
4683 // CHECK-LABEL: define <16 x i8> @test_vpmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4684 // CHECK:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4685 // CHECK:   ret <16 x i8> [[VPMAX_I]]
test_vpmaxq_u8(uint8x16_t a,uint8x16_t b)4686 uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
4687   return vpmaxq_u8(a, b);
4688 }
4689 
4690 // CHECK-LABEL: define <8 x i16> @test_vpmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4691 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4692 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4693 // CHECK:   [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4694 // CHECK:   [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4695 // CHECK:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) #4
4696 // CHECK:   ret <8 x i16> [[VPMAX2_I]]
test_vpmaxq_u16(uint16x8_t a,uint16x8_t b)4697 uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
4698   return vpmaxq_u16(a, b);
4699 }
4700 
4701 // CHECK-LABEL: define <4 x i32> @test_vpmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4702 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4703 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4704 // CHECK:   [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4705 // CHECK:   [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4706 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) #4
4707 // CHECK:   ret <4 x i32> [[VPMAX2_I]]
test_vpmaxq_u32(uint32x4_t a,uint32x4_t b)4708 uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
4709   return vpmaxq_u32(a, b);
4710 }
4711 
4712 // CHECK-LABEL: define <4 x float> @test_vpmaxq_f32(<4 x float> %a, <4 x float> %b) #0 {
4713 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4714 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4715 // CHECK:   [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4716 // CHECK:   [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4717 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> [[VPMAX_I]], <4 x float> [[VPMAX1_I]]) #4
4718 // CHECK:   ret <4 x float> [[VPMAX2_I]]
test_vpmaxq_f32(float32x4_t a,float32x4_t b)4719 float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
4720   return vpmaxq_f32(a, b);
4721 }
4722 
4723 // CHECK-LABEL: define <2 x double> @test_vpmaxq_f64(<2 x double> %a, <2 x double> %b) #0 {
4724 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4725 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4726 // CHECK:   [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4727 // CHECK:   [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4728 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> [[VPMAX_I]], <2 x double> [[VPMAX1_I]]) #4
4729 // CHECK:   ret <2 x double> [[VPMAX2_I]]
test_vpmaxq_f64(float64x2_t a,float64x2_t b)4730 float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
4731   return vpmaxq_f64(a, b);
4732 }
4733 
4734 // CHECK-LABEL: define <8 x i8> @test_vpmin_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4735 // CHECK:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4736 // CHECK:   ret <8 x i8> [[VPMIN_I]]
test_vpmin_s8(int8x8_t a,int8x8_t b)4737 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
4738   return vpmin_s8(a, b);
4739 }
4740 
4741 // CHECK-LABEL: define <4 x i16> @test_vpmin_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4742 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4743 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4744 // CHECK:   [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4745 // CHECK:   [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4746 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4
4747 // CHECK:   ret <4 x i16> [[VPMIN2_I]]
test_vpmin_s16(int16x4_t a,int16x4_t b)4748 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
4749   return vpmin_s16(a, b);
4750 }
4751 
4752 // CHECK-LABEL: define <2 x i32> @test_vpmin_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4753 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4754 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4755 // CHECK:   [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4756 // CHECK:   [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4757 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4
4758 // CHECK:   ret <2 x i32> [[VPMIN2_I]]
test_vpmin_s32(int32x2_t a,int32x2_t b)4759 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
4760   return vpmin_s32(a, b);
4761 }
4762 
4763 // CHECK-LABEL: define <8 x i8> @test_vpmin_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4764 // CHECK:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4765 // CHECK:   ret <8 x i8> [[VPMIN_I]]
test_vpmin_u8(uint8x8_t a,uint8x8_t b)4766 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
4767   return vpmin_u8(a, b);
4768 }
4769 
4770 // CHECK-LABEL: define <4 x i16> @test_vpmin_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4771 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4772 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4773 // CHECK:   [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4774 // CHECK:   [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4775 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4
4776 // CHECK:   ret <4 x i16> [[VPMIN2_I]]
test_vpmin_u16(uint16x4_t a,uint16x4_t b)4777 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
4778   return vpmin_u16(a, b);
4779 }
4780 
4781 // CHECK-LABEL: define <2 x i32> @test_vpmin_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4782 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4783 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4784 // CHECK:   [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4785 // CHECK:   [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4786 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4
4787 // CHECK:   ret <2 x i32> [[VPMIN2_I]]
test_vpmin_u32(uint32x2_t a,uint32x2_t b)4788 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
4789   return vpmin_u32(a, b);
4790 }
4791 
4792 // CHECK-LABEL: define <2 x float> @test_vpmin_f32(<2 x float> %a, <2 x float> %b) #0 {
4793 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4794 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4795 // CHECK:   [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4796 // CHECK:   [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4797 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> [[VPMIN_I]], <2 x float> [[VPMIN1_I]]) #4
4798 // CHECK:   ret <2 x float> [[VPMIN2_I]]
test_vpmin_f32(float32x2_t a,float32x2_t b)4799 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
4800   return vpmin_f32(a, b);
4801 }
4802 
4803 // CHECK-LABEL: define <16 x i8> @test_vpminq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4804 // CHECK:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4805 // CHECK:   ret <16 x i8> [[VPMIN_I]]
test_vpminq_s8(int8x16_t a,int8x16_t b)4806 int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
4807   return vpminq_s8(a, b);
4808 }
4809 
4810 // CHECK-LABEL: define <8 x i16> @test_vpminq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4811 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4812 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4813 // CHECK:   [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4814 // CHECK:   [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4815 // CHECK:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]]) #4
4816 // CHECK:   ret <8 x i16> [[VPMIN2_I]]
test_vpminq_s16(int16x8_t a,int16x8_t b)4817 int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
4818   return vpminq_s16(a, b);
4819 }
4820 
4821 // CHECK-LABEL: define <4 x i32> @test_vpminq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4822 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4823 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4824 // CHECK:   [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4825 // CHECK:   [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4826 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]]) #4
4827 // CHECK:   ret <4 x i32> [[VPMIN2_I]]
test_vpminq_s32(int32x4_t a,int32x4_t b)4828 int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
4829   return vpminq_s32(a, b);
4830 }
4831 
4832 // CHECK-LABEL: define <16 x i8> @test_vpminq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4833 // CHECK:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4834 // CHECK:   ret <16 x i8> [[VPMIN_I]]
test_vpminq_u8(uint8x16_t a,uint8x16_t b)4835 uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
4836   return vpminq_u8(a, b);
4837 }
4838 
4839 // CHECK-LABEL: define <8 x i16> @test_vpminq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4840 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4841 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4842 // CHECK:   [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4843 // CHECK:   [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4844 // CHECK:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]]) #4
4845 // CHECK:   ret <8 x i16> [[VPMIN2_I]]
test_vpminq_u16(uint16x8_t a,uint16x8_t b)4846 uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
4847   return vpminq_u16(a, b);
4848 }
4849 
4850 // CHECK-LABEL: define <4 x i32> @test_vpminq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4851 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4852 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4853 // CHECK:   [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4854 // CHECK:   [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4855 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]]) #4
4856 // CHECK:   ret <4 x i32> [[VPMIN2_I]]
test_vpminq_u32(uint32x4_t a,uint32x4_t b)4857 uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
4858   return vpminq_u32(a, b);
4859 }
4860 
4861 // CHECK-LABEL: define <4 x float> @test_vpminq_f32(<4 x float> %a, <4 x float> %b) #0 {
4862 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4863 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4864 // CHECK:   [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4865 // CHECK:   [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4866 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> [[VPMIN_I]], <4 x float> [[VPMIN1_I]]) #4
4867 // CHECK:   ret <4 x float> [[VPMIN2_I]]
test_vpminq_f32(float32x4_t a,float32x4_t b)4868 float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
4869   return vpminq_f32(a, b);
4870 }
4871 
4872 // CHECK-LABEL: define <2 x double> @test_vpminq_f64(<2 x double> %a, <2 x double> %b) #0 {
4873 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4874 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4875 // CHECK:   [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4876 // CHECK:   [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4877 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> [[VPMIN_I]], <2 x double> [[VPMIN1_I]]) #4
4878 // CHECK:   ret <2 x double> [[VPMIN2_I]]
test_vpminq_f64(float64x2_t a,float64x2_t b)4879 float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
4880   return vpminq_f64(a, b);
4881 }
4882 
4883 // CHECK-LABEL: define <2 x float> @test_vpmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 {
4884 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4885 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4886 // CHECK:   [[VPMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4887 // CHECK:   [[VPMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4888 // CHECK:   [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> [[VPMAXNM_I]], <2 x float> [[VPMAXNM1_I]]) #4
4889 // CHECK:   ret <2 x float> [[VPMAXNM2_I]]
test_vpmaxnm_f32(float32x2_t a,float32x2_t b)4890 float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) {
4891   return vpmaxnm_f32(a, b);
4892 }
4893 
4894 // CHECK-LABEL: define <4 x float> @test_vpmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
4895 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4896 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4897 // CHECK:   [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4898 // CHECK:   [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4899 // CHECK:   [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> [[VPMAXNM_I]], <4 x float> [[VPMAXNM1_I]]) #4
4900 // CHECK:   ret <4 x float> [[VPMAXNM2_I]]
test_vpmaxnmq_f32(float32x4_t a,float32x4_t b)4901 float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
4902   return vpmaxnmq_f32(a, b);
4903 }
4904 
4905 // CHECK-LABEL: define <2 x double> @test_vpmaxnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
4906 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4907 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4908 // CHECK:   [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4909 // CHECK:   [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4910 // CHECK:   [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> [[VPMAXNM_I]], <2 x double> [[VPMAXNM1_I]]) #4
4911 // CHECK:   ret <2 x double> [[VPMAXNM2_I]]
test_vpmaxnmq_f64(float64x2_t a,float64x2_t b)4912 float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
4913   return vpmaxnmq_f64(a, b);
4914 }
4915 
4916 // CHECK-LABEL: define <2 x float> @test_vpminnm_f32(<2 x float> %a, <2 x float> %b) #0 {
4917 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4918 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4919 // CHECK:   [[VPMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4920 // CHECK:   [[VPMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4921 // CHECK:   [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> [[VPMINNM_I]], <2 x float> [[VPMINNM1_I]]) #4
4922 // CHECK:   ret <2 x float> [[VPMINNM2_I]]
test_vpminnm_f32(float32x2_t a,float32x2_t b)4923 float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
4924   return vpminnm_f32(a, b);
4925 }
4926 
4927 // CHECK-LABEL: define <4 x float> @test_vpminnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
4928 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4929 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4930 // CHECK:   [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4931 // CHECK:   [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4932 // CHECK:   [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> [[VPMINNM_I]], <4 x float> [[VPMINNM1_I]]) #4
4933 // CHECK:   ret <4 x float> [[VPMINNM2_I]]
test_vpminnmq_f32(float32x4_t a,float32x4_t b)4934 float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
4935   return vpminnmq_f32(a, b);
4936 }
4937 
4938 // CHECK-LABEL: define <2 x double> @test_vpminnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
4939 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4940 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4941 // CHECK:   [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4942 // CHECK:   [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4943 // CHECK:   [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> [[VPMINNM_I]], <2 x double> [[VPMINNM1_I]]) #4
4944 // CHECK:   ret <2 x double> [[VPMINNM2_I]]
test_vpminnmq_f64(float64x2_t a,float64x2_t b)4945 float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
4946   return vpminnmq_f64(a, b);
4947 }
4948 
4949 // CHECK-LABEL: define <8 x i8> @test_vpadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4950 // CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4951 // CHECK:   ret <8 x i8> [[VPADD_V_I]]
test_vpadd_s8(int8x8_t a,int8x8_t b)4952 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
4953   return vpadd_s8(a, b);
4954 }
4955 
4956 // CHECK-LABEL: define <4 x i16> @test_vpadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4957 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4958 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4959 // CHECK:   [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4960 // CHECK:   [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4961 // CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4
4962 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4963 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
4964 // CHECK:   ret <4 x i16> [[TMP2]]
test_vpadd_s16(int16x4_t a,int16x4_t b)4965 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
4966   return vpadd_s16(a, b);
4967 }
4968 
4969 // CHECK-LABEL: define <2 x i32> @test_vpadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4970 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4971 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4972 // CHECK:   [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4973 // CHECK:   [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4974 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4
4975 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4976 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
4977 // CHECK:   ret <2 x i32> [[TMP2]]
test_vpadd_s32(int32x2_t a,int32x2_t b)4978 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
4979   return vpadd_s32(a, b);
4980 }
4981 
4982 // CHECK-LABEL: define <8 x i8> @test_vpadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4983 // CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4984 // CHECK:   ret <8 x i8> [[VPADD_V_I]]
test_vpadd_u8(uint8x8_t a,uint8x8_t b)4985 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
4986   return vpadd_u8(a, b);
4987 }
4988 
4989 // CHECK-LABEL: define <4 x i16> @test_vpadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4990 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4991 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4992 // CHECK:   [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4993 // CHECK:   [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4994 // CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4
4995 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4996 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
4997 // CHECK:   ret <4 x i16> [[TMP2]]
test_vpadd_u16(uint16x4_t a,uint16x4_t b)4998 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
4999   return vpadd_u16(a, b);
5000 }
5001 
5002 // CHECK-LABEL: define <2 x i32> @test_vpadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
5003 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5004 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5005 // CHECK:   [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5006 // CHECK:   [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5007 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4
5008 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
5009 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
5010 // CHECK:   ret <2 x i32> [[TMP2]]
test_vpadd_u32(uint32x2_t a,uint32x2_t b)5011 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
5012   return vpadd_u32(a, b);
5013 }
5014 
5015 // CHECK-LABEL: define <2 x float> @test_vpadd_f32(<2 x float> %a, <2 x float> %b) #0 {
5016 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
5017 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
5018 // CHECK:   [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
5019 // CHECK:   [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
5020 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> [[VPADD_V_I]], <2 x float> [[VPADD_V1_I]]) #4
5021 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
5022 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x float>
5023 // CHECK:   ret <2 x float> [[TMP2]]
test_vpadd_f32(float32x2_t a,float32x2_t b)5024 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
5025   return vpadd_f32(a, b);
5026 }
5027 
5028 // CHECK-LABEL: define <16 x i8> @test_vpaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
5029 // CHECK:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
5030 // CHECK:   ret <16 x i8> [[VPADDQ_V_I]]
test_vpaddq_s8(int8x16_t a,int8x16_t b)5031 int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
5032   return vpaddq_s8(a, b);
5033 }
5034 
5035 // CHECK-LABEL: define <8 x i16> @test_vpaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5036 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5037 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5038 // CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5039 // CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5040 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]]) #4
5041 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
5042 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16>
5043 // CHECK:   ret <8 x i16> [[TMP2]]
test_vpaddq_s16(int16x8_t a,int16x8_t b)5044 int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
5045   return vpaddq_s16(a, b);
5046 }
5047 
5048 // CHECK-LABEL: define <4 x i32> @test_vpaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5049 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5050 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5051 // CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5052 // CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5053 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]]) #4
5054 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
5055 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32>
5056 // CHECK:   ret <4 x i32> [[TMP2]]
test_vpaddq_s32(int32x4_t a,int32x4_t b)5057 int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
5058   return vpaddq_s32(a, b);
5059 }
5060 
5061 // CHECK-LABEL: define <16 x i8> @test_vpaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
5062 // CHECK:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
5063 // CHECK:   ret <16 x i8> [[VPADDQ_V_I]]
test_vpaddq_u8(uint8x16_t a,uint8x16_t b)5064 uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) {
5065   return vpaddq_u8(a, b);
5066 }
5067 
5068 // CHECK-LABEL: define <8 x i16> @test_vpaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
5069 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5070 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5071 // CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5072 // CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5073 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]]) #4
5074 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
5075 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16>
5076 // CHECK:   ret <8 x i16> [[TMP2]]
test_vpaddq_u16(uint16x8_t a,uint16x8_t b)5077 uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) {
5078   return vpaddq_u16(a, b);
5079 }
5080 
5081 // CHECK-LABEL: define <4 x i32> @test_vpaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
5082 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5083 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5084 // CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5085 // CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5086 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]]) #4
5087 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
5088 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32>
5089 // CHECK:   ret <4 x i32> [[TMP2]]
test_vpaddq_u32(uint32x4_t a,uint32x4_t b)5090 uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
5091   return vpaddq_u32(a, b);
5092 }
5093 
5094 // CHECK-LABEL: define <4 x float> @test_vpaddq_f32(<4 x float> %a, <4 x float> %b) #0 {
5095 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
5096 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
5097 // CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
5098 // CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
5099 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> [[VPADDQ_V_I]], <4 x float> [[VPADDQ_V1_I]]) #4
5100 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
5101 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x float>
5102 // CHECK:   ret <4 x float> [[TMP2]]
test_vpaddq_f32(float32x4_t a,float32x4_t b)5103 float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
5104   return vpaddq_f32(a, b);
5105 }
5106 
5107 // CHECK-LABEL: define <2 x double> @test_vpaddq_f64(<2 x double> %a, <2 x double> %b) #0 {
5108 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
5109 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
5110 // CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
5111 // CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
5112 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> [[VPADDQ_V_I]], <2 x double> [[VPADDQ_V1_I]]) #4
5113 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
5114 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x double>
5115 // CHECK:   ret <2 x double> [[TMP2]]
test_vpaddq_f64(float64x2_t a,float64x2_t b)5116 float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
5117   return vpaddq_f64(a, b);
5118 }
5119 
5120 // CHECK-LABEL: define <4 x i16> @test_vqdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 {
5121 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5122 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5123 // CHECK:   [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5124 // CHECK:   [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5125 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4
5126 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
5127 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
5128 // CHECK:   ret <4 x i16> [[TMP2]]
test_vqdmulh_s16(int16x4_t a,int16x4_t b)5129 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
5130   return vqdmulh_s16(a, b);
5131 }
5132 
5133 // CHECK-LABEL: define <2 x i32> @test_vqdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 {
5134 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5135 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5136 // CHECK:   [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5137 // CHECK:   [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5138 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4
5139 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
5140 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
5141 // CHECK:   ret <2 x i32> [[TMP2]]
test_vqdmulh_s32(int32x2_t a,int32x2_t b)5142 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
5143   return vqdmulh_s32(a, b);
5144 }
5145 
5146 // CHECK-LABEL: define <8 x i16> @test_vqdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5147 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5148 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5149 // CHECK:   [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5150 // CHECK:   [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5151 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4
5152 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
5153 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
5154 // CHECK:   ret <8 x i16> [[TMP2]]
test_vqdmulhq_s16(int16x8_t a,int16x8_t b)5155 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
5156   return vqdmulhq_s16(a, b);
5157 }
5158 
5159 // CHECK-LABEL: define <4 x i32> @test_vqdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5160 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5161 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5162 // CHECK:   [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5163 // CHECK:   [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5164 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4
5165 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
5166 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
5167 // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmulhq_s32(int32x4_t a,int32x4_t b)5168 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
5169   return vqdmulhq_s32(a, b);
5170 }
5171 
5172 // CHECK-LABEL: define <4 x i16> @test_vqrdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 {
5173 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5174 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5175 // CHECK:   [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5176 // CHECK:   [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5177 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4
5178 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
5179 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
5180 // CHECK:   ret <4 x i16> [[TMP2]]
test_vqrdmulh_s16(int16x4_t a,int16x4_t b)5181 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
5182   return vqrdmulh_s16(a, b);
5183 }
5184 
5185 // CHECK-LABEL: define <2 x i32> @test_vqrdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 {
5186 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5187 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5188 // CHECK:   [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5189 // CHECK:   [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5190 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4
5191 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
5192 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
5193 // CHECK:   ret <2 x i32> [[TMP2]]
test_vqrdmulh_s32(int32x2_t a,int32x2_t b)5194 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
5195   return vqrdmulh_s32(a, b);
5196 }
5197 
5198 // CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5199 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5200 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5201 // CHECK:   [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5202 // CHECK:   [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5203 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4
5204 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
5205 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
5206 // CHECK:   ret <8 x i16> [[TMP2]]
test_vqrdmulhq_s16(int16x8_t a,int16x8_t b)5207 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
5208   return vqrdmulhq_s16(a, b);
5209 }
5210 
5211 // CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5212 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5213 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5214 // CHECK:   [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5215 // CHECK:   [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5216 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4
5217 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
5218 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
5219 // CHECK:   ret <4 x i32> [[TMP2]]
test_vqrdmulhq_s32(int32x4_t a,int32x4_t b)5220 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
5221   return vqrdmulhq_s32(a, b);
5222 }
5223 
5224 // CHECK-LABEL: define <2 x float> @test_vmulx_f32(<2 x float> %a, <2 x float> %b) #0 {
5225 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
5226 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
5227 // CHECK:   [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
5228 // CHECK:   [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
5229 // CHECK:   [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #4
5230 // CHECK:   ret <2 x float> [[VMULX2_I]]
test_vmulx_f32(float32x2_t a,float32x2_t b)5231 float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
5232   return vmulx_f32(a, b);
5233 }
5234 
5235 // CHECK-LABEL: define <4 x float> @test_vmulxq_f32(<4 x float> %a, <4 x float> %b) #0 {
5236 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
5237 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
5238 // CHECK:   [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
5239 // CHECK:   [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
5240 // CHECK:   [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #4
5241 // CHECK:   ret <4 x float> [[VMULX2_I]]
test_vmulxq_f32(float32x4_t a,float32x4_t b)5242 float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) {
5243   return vmulxq_f32(a, b);
5244 }
5245 
5246 // CHECK-LABEL: define <2 x double> @test_vmulxq_f64(<2 x double> %a, <2 x double> %b) #0 {
5247 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
5248 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
5249 // CHECK:   [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
5250 // CHECK:   [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
5251 // CHECK:   [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #4
5252 // CHECK:   ret <2 x double> [[VMULX2_I]]
test_vmulxq_f64(float64x2_t a,float64x2_t b)5253 float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
5254   return vmulxq_f64(a, b);
5255 }
5256 
5257 // CHECK-LABEL: define <8 x i8> @test_vshl_n_s8(<8 x i8> %a) #0 {
5258 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5259 // CHECK:   ret <8 x i8> [[VSHL_N]]
test_vshl_n_s8(int8x8_t a)5260 int8x8_t test_vshl_n_s8(int8x8_t a) {
5261   return vshl_n_s8(a, 3);
5262 }
5263 
5264 // CHECK-LABEL: define <4 x i16> @test_vshl_n_s16(<4 x i16> %a) #0 {
5265 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5266 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5267 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
5268 // CHECK:   ret <4 x i16> [[VSHL_N]]
test_vshl_n_s16(int16x4_t a)5269 int16x4_t test_vshl_n_s16(int16x4_t a) {
5270   return vshl_n_s16(a, 3);
5271 }
5272 
5273 // CHECK-LABEL: define <2 x i32> @test_vshl_n_s32(<2 x i32> %a) #0 {
5274 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5275 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5276 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
5277 // CHECK:   ret <2 x i32> [[VSHL_N]]
test_vshl_n_s32(int32x2_t a)5278 int32x2_t test_vshl_n_s32(int32x2_t a) {
5279   return vshl_n_s32(a, 3);
5280 }
5281 
5282 // CHECK-LABEL: define <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) #0 {
5283 // CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5284 // CHECK:   ret <16 x i8> [[VSHL_N]]
test_vshlq_n_s8(int8x16_t a)5285 int8x16_t test_vshlq_n_s8(int8x16_t a) {
5286   return vshlq_n_s8(a, 3);
5287 }
5288 
5289 // CHECK-LABEL: define <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) #0 {
5290 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5291 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5292 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5293 // CHECK:   ret <8 x i16> [[VSHL_N]]
test_vshlq_n_s16(int16x8_t a)5294 int16x8_t test_vshlq_n_s16(int16x8_t a) {
5295   return vshlq_n_s16(a, 3);
5296 }
5297 
5298 // CHECK-LABEL: define <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) #0 {
5299 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5300 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5301 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
5302 // CHECK:   ret <4 x i32> [[VSHL_N]]
test_vshlq_n_s32(int32x4_t a)5303 int32x4_t test_vshlq_n_s32(int32x4_t a) {
5304   return vshlq_n_s32(a, 3);
5305 }
5306 
5307 // CHECK-LABEL: define <2 x i64> @test_vshlq_n_s64(<2 x i64> %a) #0 {
5308 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5309 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5310 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
5311 // CHECK:   ret <2 x i64> [[VSHL_N]]
test_vshlq_n_s64(int64x2_t a)5312 int64x2_t test_vshlq_n_s64(int64x2_t a) {
5313   return vshlq_n_s64(a, 3);
5314 }
5315 
5316 // CHECK-LABEL: define <8 x i8> @test_vshl_n_u8(<8 x i8> %a) #0 {
5317 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5318 // CHECK:   ret <8 x i8> [[VSHL_N]]
test_vshl_n_u8(int8x8_t a)5319 int8x8_t test_vshl_n_u8(int8x8_t a) {
5320   return vshl_n_u8(a, 3);
5321 }
5322 
5323 // CHECK-LABEL: define <4 x i16> @test_vshl_n_u16(<4 x i16> %a) #0 {
5324 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5325 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5326 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
5327 // CHECK:   ret <4 x i16> [[VSHL_N]]
test_vshl_n_u16(int16x4_t a)5328 int16x4_t test_vshl_n_u16(int16x4_t a) {
5329   return vshl_n_u16(a, 3);
5330 }
5331 
5332 // CHECK-LABEL: define <2 x i32> @test_vshl_n_u32(<2 x i32> %a) #0 {
5333 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5334 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5335 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
5336 // CHECK:   ret <2 x i32> [[VSHL_N]]
test_vshl_n_u32(int32x2_t a)5337 int32x2_t test_vshl_n_u32(int32x2_t a) {
5338   return vshl_n_u32(a, 3);
5339 }
5340 
5341 // CHECK-LABEL: define <16 x i8> @test_vshlq_n_u8(<16 x i8> %a) #0 {
5342 // CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5343 // CHECK:   ret <16 x i8> [[VSHL_N]]
test_vshlq_n_u8(int8x16_t a)5344 int8x16_t test_vshlq_n_u8(int8x16_t a) {
5345   return vshlq_n_u8(a, 3);
5346 }
5347 
5348 // CHECK-LABEL: define <8 x i16> @test_vshlq_n_u16(<8 x i16> %a) #0 {
5349 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5350 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5351 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5352 // CHECK:   ret <8 x i16> [[VSHL_N]]
test_vshlq_n_u16(int16x8_t a)5353 int16x8_t test_vshlq_n_u16(int16x8_t a) {
5354   return vshlq_n_u16(a, 3);
5355 }
5356 
5357 // CHECK-LABEL: define <4 x i32> @test_vshlq_n_u32(<4 x i32> %a) #0 {
5358 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5359 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5360 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
5361 // CHECK:   ret <4 x i32> [[VSHL_N]]
test_vshlq_n_u32(int32x4_t a)5362 int32x4_t test_vshlq_n_u32(int32x4_t a) {
5363   return vshlq_n_u32(a, 3);
5364 }
5365 
5366 // CHECK-LABEL: define <2 x i64> @test_vshlq_n_u64(<2 x i64> %a) #0 {
5367 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5368 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5369 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
5370 // CHECK:   ret <2 x i64> [[VSHL_N]]
test_vshlq_n_u64(int64x2_t a)5371 int64x2_t test_vshlq_n_u64(int64x2_t a) {
5372   return vshlq_n_u64(a, 3);
5373 }
5374 
5375 // CHECK-LABEL: define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) #0 {
5376 // CHECK:   [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5377 // CHECK:   ret <8 x i8> [[VSHR_N]]
test_vshr_n_s8(int8x8_t a)5378 int8x8_t test_vshr_n_s8(int8x8_t a) {
5379   return vshr_n_s8(a, 3);
5380 }
5381 
5382 // CHECK-LABEL: define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) #0 {
5383 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5384 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5385 // CHECK:   [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
5386 // CHECK:   ret <4 x i16> [[VSHR_N]]
test_vshr_n_s16(int16x4_t a)5387 int16x4_t test_vshr_n_s16(int16x4_t a) {
5388   return vshr_n_s16(a, 3);
5389 }
5390 
5391 // CHECK-LABEL: define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) #0 {
5392 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5393 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5394 // CHECK:   [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 3, i32 3>
5395 // CHECK:   ret <2 x i32> [[VSHR_N]]
test_vshr_n_s32(int32x2_t a)5396 int32x2_t test_vshr_n_s32(int32x2_t a) {
5397   return vshr_n_s32(a, 3);
5398 }
5399 
5400 // CHECK-LABEL: define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) #0 {
5401 // CHECK:   [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5402 // CHECK:   ret <16 x i8> [[VSHR_N]]
test_vshrq_n_s8(int8x16_t a)5403 int8x16_t test_vshrq_n_s8(int8x16_t a) {
5404   return vshrq_n_s8(a, 3);
5405 }
5406 
5407 // CHECK-LABEL: define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) #0 {
5408 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5409 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5410 // CHECK:   [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5411 // CHECK:   ret <8 x i16> [[VSHR_N]]
test_vshrq_n_s16(int16x8_t a)5412 int16x8_t test_vshrq_n_s16(int16x8_t a) {
5413   return vshrq_n_s16(a, 3);
5414 }
5415 
5416 // CHECK-LABEL: define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) #0 {
5417 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5418 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5419 // CHECK:   [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
5420 // CHECK:   ret <4 x i32> [[VSHR_N]]
test_vshrq_n_s32(int32x4_t a)5421 int32x4_t test_vshrq_n_s32(int32x4_t a) {
5422   return vshrq_n_s32(a, 3);
5423 }
5424 
5425 // CHECK-LABEL: define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) #0 {
5426 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5427 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5428 // CHECK:   [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 3, i64 3>
5429 // CHECK:   ret <2 x i64> [[VSHR_N]]
test_vshrq_n_s64(int64x2_t a)5430 int64x2_t test_vshrq_n_s64(int64x2_t a) {
5431   return vshrq_n_s64(a, 3);
5432 }
5433 
5434 // CHECK-LABEL: define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) #0 {
5435 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5436 // CHECK:   ret <8 x i8> [[VSHR_N]]
test_vshr_n_u8(int8x8_t a)5437 int8x8_t test_vshr_n_u8(int8x8_t a) {
5438   return vshr_n_u8(a, 3);
5439 }
5440 
5441 // CHECK-LABEL: define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) #0 {
5442 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5443 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5444 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
5445 // CHECK:   ret <4 x i16> [[VSHR_N]]
test_vshr_n_u16(int16x4_t a)5446 int16x4_t test_vshr_n_u16(int16x4_t a) {
5447   return vshr_n_u16(a, 3);
5448 }
5449 
5450 // CHECK-LABEL: define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) #0 {
5451 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5452 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5453 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 3, i32 3>
5454 // CHECK:   ret <2 x i32> [[VSHR_N]]
test_vshr_n_u32(int32x2_t a)5455 int32x2_t test_vshr_n_u32(int32x2_t a) {
5456   return vshr_n_u32(a, 3);
5457 }
5458 
5459 // CHECK-LABEL: define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) #0 {
5460 // CHECK:   [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5461 // CHECK:   ret <16 x i8> [[VSHR_N]]
test_vshrq_n_u8(int8x16_t a)5462 int8x16_t test_vshrq_n_u8(int8x16_t a) {
5463   return vshrq_n_u8(a, 3);
5464 }
5465 
5466 // CHECK-LABEL: define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) #0 {
5467 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5468 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5469 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5470 // CHECK:   ret <8 x i16> [[VSHR_N]]
test_vshrq_n_u16(int16x8_t a)5471 int16x8_t test_vshrq_n_u16(int16x8_t a) {
5472   return vshrq_n_u16(a, 3);
5473 }
5474 
5475 // CHECK-LABEL: define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) #0 {
5476 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5477 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5478 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
5479 // CHECK:   ret <4 x i32> [[VSHR_N]]
test_vshrq_n_u32(int32x4_t a)5480 int32x4_t test_vshrq_n_u32(int32x4_t a) {
5481   return vshrq_n_u32(a, 3);
5482 }
5483 
5484 // CHECK-LABEL: define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) #0 {
5485 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5486 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5487 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 3, i64 3>
5488 // CHECK:   ret <2 x i64> [[VSHR_N]]
test_vshrq_n_u64(int64x2_t a)5489 int64x2_t test_vshrq_n_u64(int64x2_t a) {
5490   return vshrq_n_u64(a, 3);
5491 }
5492 
5493 // CHECK-LABEL: define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
5494 // CHECK:   [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5495 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
5496 // CHECK:   ret <8 x i8> [[TMP0]]
test_vsra_n_s8(int8x8_t a,int8x8_t b)5497 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
5498   return vsra_n_s8(a, b, 3);
5499 }
5500 
5501 // CHECK-LABEL: define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
5502 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5503 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5504 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5505 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5506 // CHECK:   [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
5507 // CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
5508 // CHECK:   ret <4 x i16> [[TMP4]]
test_vsra_n_s16(int16x4_t a,int16x4_t b)5509 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
5510   return vsra_n_s16(a, b, 3);
5511 }
5512 
5513 // CHECK-LABEL: define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
5514 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5515 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5516 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5517 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5518 // CHECK:   [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 3, i32 3>
5519 // CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
5520 // CHECK:   ret <2 x i32> [[TMP4]]
test_vsra_n_s32(int32x2_t a,int32x2_t b)5521 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
5522   return vsra_n_s32(a, b, 3);
5523 }
5524 
5525 // CHECK-LABEL: define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
5526 // CHECK:   [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5527 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
5528 // CHECK:   ret <16 x i8> [[TMP0]]
test_vsraq_n_s8(int8x16_t a,int8x16_t b)5529 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
5530   return vsraq_n_s8(a, b, 3);
5531 }
5532 
5533 // CHECK-LABEL: define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5534 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5535 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5536 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5537 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5538 // CHECK:   [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5539 // CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
5540 // CHECK:   ret <8 x i16> [[TMP4]]
test_vsraq_n_s16(int16x8_t a,int16x8_t b)5541 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
5542   return vsraq_n_s16(a, b, 3);
5543 }
5544 
5545 // CHECK-LABEL: define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5546 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5547 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5548 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5549 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5550 // CHECK:   [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
5551 // CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
5552 // CHECK:   ret <4 x i32> [[TMP4]]
test_vsraq_n_s32(int32x4_t a,int32x4_t b)5553 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
5554   return vsraq_n_s32(a, b, 3);
5555 }
5556 
5557 // CHECK-LABEL: define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
5558 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5559 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5560 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5561 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5562 // CHECK:   [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 3, i64 3>
5563 // CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
5564 // CHECK:   ret <2 x i64> [[TMP4]]
test_vsraq_n_s64(int64x2_t a,int64x2_t b)5565 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
5566   return vsraq_n_s64(a, b, 3);
5567 }
5568 
5569 // CHECK-LABEL: define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
5570 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5571 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
5572 // CHECK:   ret <8 x i8> [[TMP0]]
test_vsra_n_u8(int8x8_t a,int8x8_t b)5573 int8x8_t test_vsra_n_u8(int8x8_t a, int8x8_t b) {
5574   return vsra_n_u8(a, b, 3);
5575 }
5576 
5577 // CHECK-LABEL: define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
5578 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5579 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5580 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5581 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5582 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
5583 // CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
5584 // CHECK:   ret <4 x i16> [[TMP4]]
test_vsra_n_u16(int16x4_t a,int16x4_t b)5585 int16x4_t test_vsra_n_u16(int16x4_t a, int16x4_t b) {
5586   return vsra_n_u16(a, b, 3);
5587 }
5588 
5589 // CHECK-LABEL: define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
5590 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5591 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5592 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5593 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5594 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 3, i32 3>
5595 // CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
5596 // CHECK:   ret <2 x i32> [[TMP4]]
test_vsra_n_u32(int32x2_t a,int32x2_t b)5597 int32x2_t test_vsra_n_u32(int32x2_t a, int32x2_t b) {
5598   return vsra_n_u32(a, b, 3);
5599 }
5600 
5601 // CHECK-LABEL: define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
5602 // CHECK:   [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5603 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
5604 // CHECK:   ret <16 x i8> [[TMP0]]
test_vsraq_n_u8(int8x16_t a,int8x16_t b)5605 int8x16_t test_vsraq_n_u8(int8x16_t a, int8x16_t b) {
5606   return vsraq_n_u8(a, b, 3);
5607 }
5608 
5609 // CHECK-LABEL: define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
5610 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5611 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5612 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5613 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5614 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5615 // CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
5616 // CHECK:   ret <8 x i16> [[TMP4]]
test_vsraq_n_u16(int16x8_t a,int16x8_t b)5617 int16x8_t test_vsraq_n_u16(int16x8_t a, int16x8_t b) {
5618   return vsraq_n_u16(a, b, 3);
5619 }
5620 
5621 // CHECK-LABEL: define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
5622 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5623 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5624 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5625 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5626 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
5627 // CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
5628 // CHECK:   ret <4 x i32> [[TMP4]]
test_vsraq_n_u32(int32x4_t a,int32x4_t b)5629 int32x4_t test_vsraq_n_u32(int32x4_t a, int32x4_t b) {
5630   return vsraq_n_u32(a, b, 3);
5631 }
5632 
5633 // CHECK-LABEL: define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
5634 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5635 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5636 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5637 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5638 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 3, i64 3>
5639 // CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
5640 // CHECK:   ret <2 x i64> [[TMP4]]
test_vsraq_n_u64(int64x2_t a,int64x2_t b)5641 int64x2_t test_vsraq_n_u64(int64x2_t a, int64x2_t b) {
5642   return vsraq_n_u64(a, b, 3);
5643 }
5644 
5645 // CHECK-LABEL: define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) #0 {
5646 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5647 // CHECK:   ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_s8(int8x8_t a)5648 int8x8_t test_vrshr_n_s8(int8x8_t a) {
5649   return vrshr_n_s8(a, 3);
5650 }
5651 
5652 // CHECK-LABEL: define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) #0 {
5653 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5654 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5655 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5656 // CHECK:   ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_s16(int16x4_t a)5657 int16x4_t test_vrshr_n_s16(int16x4_t a) {
5658   return vrshr_n_s16(a, 3);
5659 }
5660 
5661 // CHECK-LABEL: define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) #0 {
5662 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5663 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5664 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5665 // CHECK:   ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_s32(int32x2_t a)5666 int32x2_t test_vrshr_n_s32(int32x2_t a) {
5667   return vrshr_n_s32(a, 3);
5668 }
5669 
5670 // CHECK-LABEL: define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) #0 {
5671 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5672 // CHECK:   ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_s8(int8x16_t a)5673 int8x16_t test_vrshrq_n_s8(int8x16_t a) {
5674   return vrshrq_n_s8(a, 3);
5675 }
5676 
5677 // CHECK-LABEL: define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) #0 {
5678 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5679 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5680 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5681 // CHECK:   ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_s16(int16x8_t a)5682 int16x8_t test_vrshrq_n_s16(int16x8_t a) {
5683   return vrshrq_n_s16(a, 3);
5684 }
5685 
5686 // CHECK-LABEL: define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) #0 {
5687 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5688 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5689 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5690 // CHECK:   ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_s32(int32x4_t a)5691 int32x4_t test_vrshrq_n_s32(int32x4_t a) {
5692   return vrshrq_n_s32(a, 3);
5693 }
5694 
5695 // CHECK-LABEL: define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) #0 {
5696 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5697 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5698 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5699 // CHECK:   ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_s64(int64x2_t a)5700 int64x2_t test_vrshrq_n_s64(int64x2_t a) {
5701   return vrshrq_n_s64(a, 3);
5702 }
5703 
5704 // CHECK-LABEL: define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) #0 {
5705 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5706 // CHECK:   ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_u8(int8x8_t a)5707 int8x8_t test_vrshr_n_u8(int8x8_t a) {
5708   return vrshr_n_u8(a, 3);
5709 }
5710 
5711 // CHECK-LABEL: define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) #0 {
5712 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5713 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5714 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5715 // CHECK:   ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_u16(int16x4_t a)5716 int16x4_t test_vrshr_n_u16(int16x4_t a) {
5717   return vrshr_n_u16(a, 3);
5718 }
5719 
5720 // CHECK-LABEL: define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) #0 {
5721 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5722 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5723 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5724 // CHECK:   ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_u32(int32x2_t a)5725 int32x2_t test_vrshr_n_u32(int32x2_t a) {
5726   return vrshr_n_u32(a, 3);
5727 }
5728 
5729 // CHECK-LABEL: define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) #0 {
5730 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5731 // CHECK:   ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_u8(int8x16_t a)5732 int8x16_t test_vrshrq_n_u8(int8x16_t a) {
5733   return vrshrq_n_u8(a, 3);
5734 }
5735 
5736 // CHECK-LABEL: define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) #0 {
5737 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5738 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5739 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5740 // CHECK:   ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_u16(int16x8_t a)5741 int16x8_t test_vrshrq_n_u16(int16x8_t a) {
5742   return vrshrq_n_u16(a, 3);
5743 }
5744 
5745 // CHECK-LABEL: define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) #0 {
5746 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5747 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5748 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5749 // CHECK:   ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_u32(int32x4_t a)5750 int32x4_t test_vrshrq_n_u32(int32x4_t a) {
5751   return vrshrq_n_u32(a, 3);
5752 }
5753 
5754 // CHECK-LABEL: define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) #0 {
5755 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5756 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5757 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5758 // CHECK:   ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_u64(int64x2_t a)5759 int64x2_t test_vrshrq_n_u64(int64x2_t a) {
5760   return vrshrq_n_u64(a, 3);
5761 }
5762 
5763 // CHECK-LABEL: define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
5764 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5765 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5766 // CHECK:   ret <8 x i8> [[TMP0]]
test_vrsra_n_s8(int8x8_t a,int8x8_t b)5767 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
5768   return vrsra_n_s8(a, b, 3);
5769 }
5770 
5771 // CHECK-LABEL: define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
5772 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5773 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5774 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5775 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5776 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5777 // CHECK:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5778 // CHECK:   ret <4 x i16> [[TMP3]]
test_vrsra_n_s16(int16x4_t a,int16x4_t b)5779 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
5780   return vrsra_n_s16(a, b, 3);
5781 }
5782 
5783 // CHECK-LABEL: define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
5784 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5785 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5786 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5787 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5788 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5789 // CHECK:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5790 // CHECK:   ret <2 x i32> [[TMP3]]
test_vrsra_n_s32(int32x2_t a,int32x2_t b)5791 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
5792   return vrsra_n_s32(a, b, 3);
5793 }
5794 
5795 // CHECK-LABEL: define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
5796 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5797 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5798 // CHECK:   ret <16 x i8> [[TMP0]]
test_vrsraq_n_s8(int8x16_t a,int8x16_t b)5799 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
5800   return vrsraq_n_s8(a, b, 3);
5801 }
5802 
5803 // CHECK-LABEL: define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5804 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5805 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5806 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5807 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5808 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5809 // CHECK:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5810 // CHECK:   ret <8 x i16> [[TMP3]]
test_vrsraq_n_s16(int16x8_t a,int16x8_t b)5811 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
5812   return vrsraq_n_s16(a, b, 3);
5813 }
5814 
5815 // CHECK-LABEL: define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5816 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5817 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5818 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5819 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5820 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5821 // CHECK:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5822 // CHECK:   ret <4 x i32> [[TMP3]]
test_vrsraq_n_s32(int32x4_t a,int32x4_t b)5823 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
5824   return vrsraq_n_s32(a, b, 3);
5825 }
5826 
5827 // CHECK-LABEL: define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
5828 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5829 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5830 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5831 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5832 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5833 // CHECK:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5834 // CHECK:   ret <2 x i64> [[TMP3]]
test_vrsraq_n_s64(int64x2_t a,int64x2_t b)5835 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
5836   return vrsraq_n_s64(a, b, 3);
5837 }
5838 
5839 // CHECK-LABEL: define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
5840 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5841 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5842 // CHECK:   ret <8 x i8> [[TMP0]]
test_vrsra_n_u8(int8x8_t a,int8x8_t b)5843 int8x8_t test_vrsra_n_u8(int8x8_t a, int8x8_t b) {
5844   return vrsra_n_u8(a, b, 3);
5845 }
5846 
5847 // CHECK-LABEL: define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
5848 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5849 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5850 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5851 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5852 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5853 // CHECK:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5854 // CHECK:   ret <4 x i16> [[TMP3]]
test_vrsra_n_u16(int16x4_t a,int16x4_t b)5855 int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) {
5856   return vrsra_n_u16(a, b, 3);
5857 }
5858 
5859 // CHECK-LABEL: define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
5860 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5861 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5862 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5863 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5864 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5865 // CHECK:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5866 // CHECK:   ret <2 x i32> [[TMP3]]
test_vrsra_n_u32(int32x2_t a,int32x2_t b)5867 int32x2_t test_vrsra_n_u32(int32x2_t a, int32x2_t b) {
5868   return vrsra_n_u32(a, b, 3);
5869 }
5870 
5871 // CHECK-LABEL: define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
5872 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5873 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5874 // CHECK:   ret <16 x i8> [[TMP0]]
test_vrsraq_n_u8(int8x16_t a,int8x16_t b)5875 int8x16_t test_vrsraq_n_u8(int8x16_t a, int8x16_t b) {
5876   return vrsraq_n_u8(a, b, 3);
5877 }
5878 
5879 // CHECK-LABEL: define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
5880 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5881 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5882 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5883 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5884 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5885 // CHECK:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5886 // CHECK:   ret <8 x i16> [[TMP3]]
test_vrsraq_n_u16(int16x8_t a,int16x8_t b)5887 int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) {
5888   return vrsraq_n_u16(a, b, 3);
5889 }
5890 
5891 // CHECK-LABEL: define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
5892 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5893 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5894 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5895 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5896 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5897 // CHECK:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5898 // CHECK:   ret <4 x i32> [[TMP3]]
test_vrsraq_n_u32(int32x4_t a,int32x4_t b)5899 int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) {
5900   return vrsraq_n_u32(a, b, 3);
5901 }
5902 
5903 // CHECK-LABEL: define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
5904 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5905 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5906 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5907 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5908 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5909 // CHECK:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5910 // CHECK:   ret <2 x i64> [[TMP3]]
test_vrsraq_n_u64(int64x2_t a,int64x2_t b)5911 int64x2_t test_vrsraq_n_u64(int64x2_t a, int64x2_t b) {
5912   return vrsraq_n_u64(a, b, 3);
5913 }
5914 
5915 // CHECK-LABEL: define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
5916 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5917 // CHECK:   ret <8 x i8> [[VSRI_N]]
test_vsri_n_s8(int8x8_t a,int8x8_t b)5918 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
5919   return vsri_n_s8(a, b, 3);
5920 }
5921 
5922 // CHECK-LABEL: define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
5923 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5924 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5925 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5926 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5927 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5928 // CHECK:   ret <4 x i16> [[VSRI_N2]]
test_vsri_n_s16(int16x4_t a,int16x4_t b)5929 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
5930   return vsri_n_s16(a, b, 3);
5931 }
5932 
5933 // CHECK-LABEL: define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
5934 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5935 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5936 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5937 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5938 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5939 // CHECK:   ret <2 x i32> [[VSRI_N2]]
test_vsri_n_s32(int32x2_t a,int32x2_t b)5940 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
5941   return vsri_n_s32(a, b, 3);
5942 }
5943 
5944 // CHECK-LABEL: define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
5945 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5946 // CHECK:   ret <16 x i8> [[VSRI_N]]
test_vsriq_n_s8(int8x16_t a,int8x16_t b)5947 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
5948   return vsriq_n_s8(a, b, 3);
5949 }
5950 
5951 // CHECK-LABEL: define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5952 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5953 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5954 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5955 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5956 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5957 // CHECK:   ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_s16(int16x8_t a,int16x8_t b)5958 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
5959   return vsriq_n_s16(a, b, 3);
5960 }
5961 
5962 // CHECK-LABEL: define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5963 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5964 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5965 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5966 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5967 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5968 // CHECK:   ret <4 x i32> [[VSRI_N2]]
test_vsriq_n_s32(int32x4_t a,int32x4_t b)5969 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
5970   return vsriq_n_s32(a, b, 3);
5971 }
5972 
5973 // CHECK-LABEL: define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
5974 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5975 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5976 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5977 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5978 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5979 // CHECK:   ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_s64(int64x2_t a,int64x2_t b)5980 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
5981   return vsriq_n_s64(a, b, 3);
5982 }
5983 
5984 // CHECK-LABEL: define <8 x i8> @test_vsri_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
5985 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5986 // CHECK:   ret <8 x i8> [[VSRI_N]]
test_vsri_n_u8(int8x8_t a,int8x8_t b)5987 int8x8_t test_vsri_n_u8(int8x8_t a, int8x8_t b) {
5988   return vsri_n_u8(a, b, 3);
5989 }
5990 
5991 // CHECK-LABEL: define <4 x i16> @test_vsri_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
5992 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5993 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5994 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5995 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5996 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5997 // CHECK:   ret <4 x i16> [[VSRI_N2]]
test_vsri_n_u16(int16x4_t a,int16x4_t b)5998 int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) {
5999   return vsri_n_u16(a, b, 3);
6000 }
6001 
6002 // CHECK-LABEL: define <2 x i32> @test_vsri_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
6003 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6004 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6005 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6006 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
6007 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
6008 // CHECK:   ret <2 x i32> [[VSRI_N2]]
test_vsri_n_u32(int32x2_t a,int32x2_t b)6009 int32x2_t test_vsri_n_u32(int32x2_t a, int32x2_t b) {
6010   return vsri_n_u32(a, b, 3);
6011 }
6012 
6013 // CHECK-LABEL: define <16 x i8> @test_vsriq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
6014 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
6015 // CHECK:   ret <16 x i8> [[VSRI_N]]
test_vsriq_n_u8(int8x16_t a,int8x16_t b)6016 int8x16_t test_vsriq_n_u8(int8x16_t a, int8x16_t b) {
6017   return vsriq_n_u8(a, b, 3);
6018 }
6019 
6020 // CHECK-LABEL: define <8 x i16> @test_vsriq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
6021 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6022 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6023 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6024 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
6025 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
6026 // CHECK:   ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_u16(int16x8_t a,int16x8_t b)6027 int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) {
6028   return vsriq_n_u16(a, b, 3);
6029 }
6030 
6031 // CHECK-LABEL: define <4 x i32> @test_vsriq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
6032 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6033 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6034 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6035 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
6036 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
6037 // CHECK:   ret <4 x i32> [[VSRI_N2]]
test_vsriq_n_u32(int32x4_t a,int32x4_t b)6038 int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) {
6039   return vsriq_n_u32(a, b, 3);
6040 }
6041 
6042 // CHECK-LABEL: define <2 x i64> @test_vsriq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
6043 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6044 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6045 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6046 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
6047 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
6048 // CHECK:   ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_u64(int64x2_t a,int64x2_t b)6049 int64x2_t test_vsriq_n_u64(int64x2_t a, int64x2_t b) {
6050   return vsriq_n_u64(a, b, 3);
6051 }
6052 
6053 // CHECK-LABEL: define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) #0 {
6054 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
6055 // CHECK:   ret <8 x i8> [[VSRI_N]]
test_vsri_n_p8(poly8x8_t a,poly8x8_t b)6056 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
6057   return vsri_n_p8(a, b, 3);
6058 }
6059 
6060 // CHECK-LABEL: define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) #0 {
6061 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6062 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6063 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6064 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
6065 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15)
6066 // CHECK:   ret <4 x i16> [[VSRI_N2]]
test_vsri_n_p16(poly16x4_t a,poly16x4_t b)6067 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
6068   return vsri_n_p16(a, b, 15);
6069 }
6070 
6071 // CHECK-LABEL: define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 {
6072 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
6073 // CHECK:   ret <16 x i8> [[VSRI_N]]
test_vsriq_n_p8(poly8x16_t a,poly8x16_t b)6074 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
6075   return vsriq_n_p8(a, b, 3);
6076 }
6077 
6078 // CHECK-LABEL: define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 {
6079 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6080 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6081 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6082 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
6083 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15)
6084 // CHECK:   ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_p16(poly16x8_t a,poly16x8_t b)6085 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
6086   return vsriq_n_p16(a, b, 15);
6087 }
6088 
6089 // CHECK-LABEL: define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
6090 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
6091 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_s8(int8x8_t a,int8x8_t b)6092 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
6093   return vsli_n_s8(a, b, 3);
6094 }
6095 
6096 // CHECK-LABEL: define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
6097 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6098 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6099 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6100 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
6101 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
6102 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_s16(int16x4_t a,int16x4_t b)6103 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
6104   return vsli_n_s16(a, b, 3);
6105 }
6106 
6107 // CHECK-LABEL: define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
6108 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6109 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6110 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6111 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
6112 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
6113 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsli_n_s32(int32x2_t a,int32x2_t b)6114 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
6115   return vsli_n_s32(a, b, 3);
6116 }
6117 
6118 // CHECK-LABEL: define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
6119 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
6120 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_s8(int8x16_t a,int8x16_t b)6121 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
6122   return vsliq_n_s8(a, b, 3);
6123 }
6124 
6125 // CHECK-LABEL: define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
6126 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6127 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6128 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6129 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
6130 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
6131 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_s16(int16x8_t a,int16x8_t b)6132 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
6133   return vsliq_n_s16(a, b, 3);
6134 }
6135 
6136 // CHECK-LABEL: define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
6137 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6138 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6139 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6140 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
6141 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
6142 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_s32(int32x4_t a,int32x4_t b)6143 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
6144   return vsliq_n_s32(a, b, 3);
6145 }
6146 
6147 // CHECK-LABEL: define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
6148 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6149 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6150 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6151 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
6152 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
6153 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_s64(int64x2_t a,int64x2_t b)6154 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
6155   return vsliq_n_s64(a, b, 3);
6156 }
6157 
6158 // CHECK-LABEL: define <8 x i8> @test_vsli_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
6159 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
6160 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_u8(uint8x8_t a,uint8x8_t b)6161 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
6162   return vsli_n_u8(a, b, 3);
6163 }
6164 
6165 // CHECK-LABEL: define <4 x i16> @test_vsli_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
6166 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6167 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6168 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6169 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
6170 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
6171 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_u16(uint16x4_t a,uint16x4_t b)6172 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
6173   return vsli_n_u16(a, b, 3);
6174 }
6175 
6176 // CHECK-LABEL: define <2 x i32> @test_vsli_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
6177 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6178 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6179 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6180 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
6181 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
6182 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsli_n_u32(uint32x2_t a,uint32x2_t b)6183 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
6184   return vsli_n_u32(a, b, 3);
6185 }
6186 
6187 // CHECK-LABEL: define <16 x i8> @test_vsliq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
6188 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
6189 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_u8(uint8x16_t a,uint8x16_t b)6190 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
6191   return vsliq_n_u8(a, b, 3);
6192 }
6193 
6194 // CHECK-LABEL: define <8 x i16> @test_vsliq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
6195 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6196 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6197 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6198 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
6199 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
6200 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_u16(uint16x8_t a,uint16x8_t b)6201 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
6202   return vsliq_n_u16(a, b, 3);
6203 }
6204 
6205 // CHECK-LABEL: define <4 x i32> @test_vsliq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
6206 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6207 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6208 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6209 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
6210 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
6211 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_u32(uint32x4_t a,uint32x4_t b)6212 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
6213   return vsliq_n_u32(a, b, 3);
6214 }
6215 
6216 // CHECK-LABEL: define <2 x i64> @test_vsliq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
6217 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6218 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6219 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6220 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
6221 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
6222 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_u64(uint64x2_t a,uint64x2_t b)6223 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
6224   return vsliq_n_u64(a, b, 3);
6225 }
6226 
6227 // CHECK-LABEL: define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) #0 {
6228 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
6229 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_p8(poly8x8_t a,poly8x8_t b)6230 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
6231   return vsli_n_p8(a, b, 3);
6232 }
6233 
6234 // CHECK-LABEL: define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) #0 {
6235 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6236 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6237 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6238 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
6239 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15)
6240 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_p16(poly16x4_t a,poly16x4_t b)6241 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
6242   return vsli_n_p16(a, b, 15);
6243 }
6244 
6245 // CHECK-LABEL: define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 {
6246 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
6247 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_p8(poly8x16_t a,poly8x16_t b)6248 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
6249   return vsliq_n_p8(a, b, 3);
6250 }
6251 
6252 // CHECK-LABEL: define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 {
6253 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6254 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6255 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6256 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
6257 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15)
6258 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_p16(poly16x8_t a,poly16x8_t b)6259 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
6260   return vsliq_n_p16(a, b, 15);
6261 }
6262 
6263 // CHECK-LABEL: define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) #0 {
6264 // CHECK:   [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
6265 // CHECK:   ret <8 x i8> [[VQSHLU_N]]
test_vqshlu_n_s8(int8x8_t a)6266 int8x8_t test_vqshlu_n_s8(int8x8_t a) {
6267   return vqshlu_n_s8(a, 3);
6268 }
6269 
6270 // CHECK-LABEL: define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) #0 {
6271 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6272 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6273 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
6274 // CHECK:   ret <4 x i16> [[VQSHLU_N1]]
test_vqshlu_n_s16(int16x4_t a)6275 int16x4_t test_vqshlu_n_s16(int16x4_t a) {
6276   return vqshlu_n_s16(a, 3);
6277 }
6278 
6279 // CHECK-LABEL: define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) #0 {
6280 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6281 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6282 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
6283 // CHECK:   ret <2 x i32> [[VQSHLU_N1]]
test_vqshlu_n_s32(int32x2_t a)6284 int32x2_t test_vqshlu_n_s32(int32x2_t a) {
6285   return vqshlu_n_s32(a, 3);
6286 }
6287 
6288 // CHECK-LABEL: define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) #0 {
6289 // CHECK:   [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
6290 // CHECK:   ret <16 x i8> [[VQSHLU_N]]
test_vqshluq_n_s8(int8x16_t a)6291 int8x16_t test_vqshluq_n_s8(int8x16_t a) {
6292   return vqshluq_n_s8(a, 3);
6293 }
6294 
6295 // CHECK-LABEL: define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) #0 {
6296 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6297 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6298 // CHECK:   [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
6299 // CHECK:   ret <8 x i16> [[VQSHLU_N1]]
test_vqshluq_n_s16(int16x8_t a)6300 int16x8_t test_vqshluq_n_s16(int16x8_t a) {
6301   return vqshluq_n_s16(a, 3);
6302 }
6303 
6304 // CHECK-LABEL: define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) #0 {
6305 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6306 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6307 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
6308 // CHECK:   ret <4 x i32> [[VQSHLU_N1]]
test_vqshluq_n_s32(int32x4_t a)6309 int32x4_t test_vqshluq_n_s32(int32x4_t a) {
6310   return vqshluq_n_s32(a, 3);
6311 }
6312 
6313 // CHECK-LABEL: define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) #0 {
6314 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6315 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6316 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>)
6317 // CHECK:   ret <2 x i64> [[VQSHLU_N1]]
test_vqshluq_n_s64(int64x2_t a)6318 int64x2_t test_vqshluq_n_s64(int64x2_t a) {
6319   return vqshluq_n_s64(a, 3);
6320 }
6321 
6322 // CHECK-LABEL: define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) #0 {
6323 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6324 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6325 // CHECK:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6326 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
6327 // CHECK:   ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_s16(int16x8_t a)6328 int8x8_t test_vshrn_n_s16(int16x8_t a) {
6329   return vshrn_n_s16(a, 3);
6330 }
6331 
6332 // CHECK-LABEL: define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) #0 {
6333 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6334 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6335 // CHECK:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
6336 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
6337 // CHECK:   ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_s32(int32x4_t a)6338 int16x4_t test_vshrn_n_s32(int32x4_t a) {
6339   return vshrn_n_s32(a, 9);
6340 }
6341 
6342 // CHECK-LABEL: define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) #0 {
6343 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6344 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6345 // CHECK:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
6346 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
6347 // CHECK:   ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_s64(int64x2_t a)6348 int32x2_t test_vshrn_n_s64(int64x2_t a) {
6349   return vshrn_n_s64(a, 19);
6350 }
6351 
6352 // CHECK-LABEL: define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) #0 {
6353 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6354 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6355 // CHECK:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6356 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
6357 // CHECK:   ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_u16(uint16x8_t a)6358 uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
6359   return vshrn_n_u16(a, 3);
6360 }
6361 
6362 // CHECK-LABEL: define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) #0 {
6363 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6364 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6365 // CHECK:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
6366 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
6367 // CHECK:   ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_u32(uint32x4_t a)6368 uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
6369   return vshrn_n_u32(a, 9);
6370 }
6371 
6372 // CHECK-LABEL: define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) #0 {
6373 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6374 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6375 // CHECK:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
6376 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
6377 // CHECK:   ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_u64(uint64x2_t a)6378 uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
6379   return vshrn_n_u64(a, 19);
6380 }
6381 
6382 // CHECK-LABEL: define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6383 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6384 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6385 // CHECK:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6386 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
6387 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6388 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vshrn_high_n_s16(int8x8_t a,int16x8_t b)6389 int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6390   return vshrn_high_n_s16(a, b, 3);
6391 }
6392 
6393 // CHECK-LABEL: define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6394 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6395 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6396 // CHECK:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
6397 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
6398 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6399 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vshrn_high_n_s32(int16x4_t a,int32x4_t b)6400 int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6401   return vshrn_high_n_s32(a, b, 9);
6402 }
6403 
6404 // CHECK-LABEL: define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6405 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6406 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6407 // CHECK:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
6408 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
6409 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6410 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vshrn_high_n_s64(int32x2_t a,int64x2_t b)6411 int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6412   return vshrn_high_n_s64(a, b, 19);
6413 }
6414 
6415 // CHECK-LABEL: define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
6416 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6417 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6418 // CHECK:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6419 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
6420 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6421 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6422 uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6423   return vshrn_high_n_u16(a, b, 3);
6424 }
6425 
6426 // CHECK-LABEL: define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
6427 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6428 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6429 // CHECK:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
6430 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
6431 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6432 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6433 uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6434   return vshrn_high_n_u32(a, b, 9);
6435 }
6436 
6437 // CHECK-LABEL: define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
6438 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6439 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6440 // CHECK:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
6441 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
6442 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6443 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6444 uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6445   return vshrn_high_n_u64(a, b, 19);
6446 }
6447 
6448 // CHECK-LABEL: define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) #0 {
6449 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6450 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6451 // CHECK:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
6452 // CHECK:   ret <8 x i8> [[VQSHRUN_N1]]
test_vqshrun_n_s16(int16x8_t a)6453 int8x8_t test_vqshrun_n_s16(int16x8_t a) {
6454   return vqshrun_n_s16(a, 3);
6455 }
6456 
6457 // CHECK-LABEL: define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) #0 {
6458 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6459 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6460 // CHECK:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
6461 // CHECK:   ret <4 x i16> [[VQSHRUN_N1]]
test_vqshrun_n_s32(int32x4_t a)6462 int16x4_t test_vqshrun_n_s32(int32x4_t a) {
6463   return vqshrun_n_s32(a, 9);
6464 }
6465 
6466 // CHECK-LABEL: define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) #0 {
6467 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6468 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6469 // CHECK:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
6470 // CHECK:   ret <2 x i32> [[VQSHRUN_N1]]
test_vqshrun_n_s64(int64x2_t a)6471 int32x2_t test_vqshrun_n_s64(int64x2_t a) {
6472   return vqshrun_n_s64(a, 19);
6473 }
6474 
6475 // CHECK-LABEL: define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6476 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6477 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6478 // CHECK:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
6479 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6480 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqshrun_high_n_s16(int8x8_t a,int16x8_t b)6481 int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
6482   return vqshrun_high_n_s16(a, b, 3);
6483 }
6484 
6485 // CHECK-LABEL: define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6486 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6487 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6488 // CHECK:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
6489 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6490 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqshrun_high_n_s32(int16x4_t a,int32x4_t b)6491 int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
6492   return vqshrun_high_n_s32(a, b, 9);
6493 }
6494 
6495 // CHECK-LABEL: define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6496 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6497 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6498 // CHECK:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
6499 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6500 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqshrun_high_n_s64(int32x2_t a,int64x2_t b)6501 int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) {
6502   return vqshrun_high_n_s64(a, b, 19);
6503 }
6504 
6505 // CHECK-LABEL: define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) #0 {
6506 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6507 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6508 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
6509 // CHECK:   ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_s16(int16x8_t a)6510 int8x8_t test_vrshrn_n_s16(int16x8_t a) {
6511   return vrshrn_n_s16(a, 3);
6512 }
6513 
6514 // CHECK-LABEL: define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) #0 {
6515 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6516 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6517 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
6518 // CHECK:   ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_s32(int32x4_t a)6519 int16x4_t test_vrshrn_n_s32(int32x4_t a) {
6520   return vrshrn_n_s32(a, 9);
6521 }
6522 
6523 // CHECK-LABEL: define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) #0 {
6524 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6525 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6526 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
6527 // CHECK:   ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_s64(int64x2_t a)6528 int32x2_t test_vrshrn_n_s64(int64x2_t a) {
6529   return vrshrn_n_s64(a, 19);
6530 }
6531 
6532 // CHECK-LABEL: define <8 x i8> @test_vrshrn_n_u16(<8 x i16> %a) #0 {
6533 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6534 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6535 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
6536 // CHECK:   ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_u16(uint16x8_t a)6537 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
6538   return vrshrn_n_u16(a, 3);
6539 }
6540 
6541 // CHECK-LABEL: define <4 x i16> @test_vrshrn_n_u32(<4 x i32> %a) #0 {
6542 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6543 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6544 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
6545 // CHECK:   ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_u32(uint32x4_t a)6546 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
6547   return vrshrn_n_u32(a, 9);
6548 }
6549 
6550 // CHECK-LABEL: define <2 x i32> @test_vrshrn_n_u64(<2 x i64> %a) #0 {
6551 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6552 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6553 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
6554 // CHECK:   ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_u64(uint64x2_t a)6555 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
6556   return vrshrn_n_u64(a, 19);
6557 }
6558 
6559 // CHECK-LABEL: define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6560 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6561 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6562 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
6563 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6564 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrshrn_high_n_s16(int8x8_t a,int16x8_t b)6565 int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6566   return vrshrn_high_n_s16(a, b, 3);
6567 }
6568 
6569 // CHECK-LABEL: define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6570 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6571 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6572 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
6573 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6574 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrshrn_high_n_s32(int16x4_t a,int32x4_t b)6575 int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6576   return vrshrn_high_n_s32(a, b, 9);
6577 }
6578 
6579 // CHECK-LABEL: define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6580 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6581 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6582 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
6583 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6584 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vrshrn_high_n_s64(int32x2_t a,int64x2_t b)6585 int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6586   return vrshrn_high_n_s64(a, b, 19);
6587 }
6588 
6589 // CHECK-LABEL: define <16 x i8> @test_vrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
6590 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6591 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6592 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
6593 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6594 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6595 uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6596   return vrshrn_high_n_u16(a, b, 3);
6597 }
6598 
6599 // CHECK-LABEL: define <8 x i16> @test_vrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
6600 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6601 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6602 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
6603 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6604 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6605 uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6606   return vrshrn_high_n_u32(a, b, 9);
6607 }
6608 
6609 // CHECK-LABEL: define <4 x i32> @test_vrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
6610 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6611 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6612 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
6613 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6614 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vrshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6615 uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6616   return vrshrn_high_n_u64(a, b, 19);
6617 }
6618 
6619 // CHECK-LABEL: define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) #0 {
6620 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6621 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6622 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
6623 // CHECK:   ret <8 x i8> [[VQRSHRUN_N1]]
test_vqrshrun_n_s16(int16x8_t a)6624 int8x8_t test_vqrshrun_n_s16(int16x8_t a) {
6625   return vqrshrun_n_s16(a, 3);
6626 }
6627 
6628 // CHECK-LABEL: define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) #0 {
6629 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6630 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6631 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
6632 // CHECK:   ret <4 x i16> [[VQRSHRUN_N1]]
test_vqrshrun_n_s32(int32x4_t a)6633 int16x4_t test_vqrshrun_n_s32(int32x4_t a) {
6634   return vqrshrun_n_s32(a, 9);
6635 }
6636 
6637 // CHECK-LABEL: define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) #0 {
6638 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6639 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6640 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
6641 // CHECK:   ret <2 x i32> [[VQRSHRUN_N1]]
test_vqrshrun_n_s64(int64x2_t a)6642 int32x2_t test_vqrshrun_n_s64(int64x2_t a) {
6643   return vqrshrun_n_s64(a, 19);
6644 }
6645 
6646 // CHECK-LABEL: define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6647 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6648 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6649 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
6650 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6651 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrun_high_n_s16(int8x8_t a,int16x8_t b)6652 int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
6653   return vqrshrun_high_n_s16(a, b, 3);
6654 }
6655 
6656 // CHECK-LABEL: define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6657 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6658 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6659 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
6660 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6661 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrun_high_n_s32(int16x4_t a,int32x4_t b)6662 int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
6663   return vqrshrun_high_n_s32(a, b, 9);
6664 }
6665 
6666 // CHECK-LABEL: define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6667 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6668 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6669 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
6670 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6671 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrun_high_n_s64(int32x2_t a,int64x2_t b)6672 int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) {
6673   return vqrshrun_high_n_s64(a, b, 19);
6674 }
6675 
6676 // CHECK-LABEL: define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) #0 {
6677 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6678 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6679 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6680 // CHECK:   ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_s16(int16x8_t a)6681 int8x8_t test_vqshrn_n_s16(int16x8_t a) {
6682   return vqshrn_n_s16(a, 3);
6683 }
6684 
6685 // CHECK-LABEL: define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) #0 {
6686 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6687 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6688 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6689 // CHECK:   ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_s32(int32x4_t a)6690 int16x4_t test_vqshrn_n_s32(int32x4_t a) {
6691   return vqshrn_n_s32(a, 9);
6692 }
6693 
6694 // CHECK-LABEL: define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) #0 {
6695 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6696 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6697 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6698 // CHECK:   ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_s64(int64x2_t a)6699 int32x2_t test_vqshrn_n_s64(int64x2_t a) {
6700   return vqshrn_n_s64(a, 19);
6701 }
6702 
6703 // CHECK-LABEL: define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) #0 {
6704 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6705 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6706 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6707 // CHECK:   ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_u16(uint16x8_t a)6708 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
6709   return vqshrn_n_u16(a, 3);
6710 }
6711 
6712 // CHECK-LABEL: define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) #0 {
6713 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6714 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6715 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6716 // CHECK:   ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_u32(uint32x4_t a)6717 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
6718   return vqshrn_n_u32(a, 9);
6719 }
6720 
6721 // CHECK-LABEL: define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) #0 {
6722 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6723 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6724 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6725 // CHECK:   ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_u64(uint64x2_t a)6726 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
6727   return vqshrn_n_u64(a, 19);
6728 }
6729 
6730 // CHECK-LABEL: define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6731 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6732 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6733 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6734 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6735 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqshrn_high_n_s16(int8x8_t a,int16x8_t b)6736 int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6737   return vqshrn_high_n_s16(a, b, 3);
6738 }
6739 
6740 // CHECK-LABEL: define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6741 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6742 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6743 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6744 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6745 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqshrn_high_n_s32(int16x4_t a,int32x4_t b)6746 int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6747   return vqshrn_high_n_s32(a, b, 9);
6748 }
6749 
6750 // CHECK-LABEL: define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6751 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6752 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6753 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6754 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6755 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqshrn_high_n_s64(int32x2_t a,int64x2_t b)6756 int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6757   return vqshrn_high_n_s64(a, b, 19);
6758 }
6759 
6760 // CHECK-LABEL: define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
6761 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6762 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6763 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6764 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6765 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6766 uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6767   return vqshrn_high_n_u16(a, b, 3);
6768 }
6769 
6770 // CHECK-LABEL: define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
6771 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6772 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6773 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6774 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6775 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6776 uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6777   return vqshrn_high_n_u32(a, b, 9);
6778 }
6779 
6780 // CHECK-LABEL: define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
6781 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6782 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6783 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6784 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6785 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6786 uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6787   return vqshrn_high_n_u64(a, b, 19);
6788 }
6789 
6790 // CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) #0 {
6791 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6792 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6793 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6794 // CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_s16(int16x8_t a)6795 int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
6796   return vqrshrn_n_s16(a, 3);
6797 }
6798 
6799 // CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) #0 {
6800 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6801 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6802 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6803 // CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_s32(int32x4_t a)6804 int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
6805   return vqrshrn_n_s32(a, 9);
6806 }
6807 
6808 // CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) #0 {
6809 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6810 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6811 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6812 // CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_s64(int64x2_t a)6813 int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
6814   return vqrshrn_n_s64(a, 19);
6815 }
6816 
6817 // CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) #0 {
6818 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6819 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6820 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6821 // CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_u16(uint16x8_t a)6822 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
6823   return vqrshrn_n_u16(a, 3);
6824 }
6825 
6826 // CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) #0 {
6827 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6828 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6829 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6830 // CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_u32(uint32x4_t a)6831 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
6832   return vqrshrn_n_u32(a, 9);
6833 }
6834 
6835 // CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) #0 {
6836 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6837 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6838 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6839 // CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_u64(uint64x2_t a)6840 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
6841   return vqrshrn_n_u64(a, 19);
6842 }
6843 
6844 // CHECK-LABEL: define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6845 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6846 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6847 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6848 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6849 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrn_high_n_s16(int8x8_t a,int16x8_t b)6850 int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6851   return vqrshrn_high_n_s16(a, b, 3);
6852 }
6853 
6854 // CHECK-LABEL: define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6855 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6856 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6857 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6858 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6859 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrn_high_n_s32(int16x4_t a,int32x4_t b)6860 int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6861   return vqrshrn_high_n_s32(a, b, 9);
6862 }
6863 
6864 // CHECK-LABEL: define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6865 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6866 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6867 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6868 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6869 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrn_high_n_s64(int32x2_t a,int64x2_t b)6870 int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6871   return vqrshrn_high_n_s64(a, b, 19);
6872 }
6873 
6874 // CHECK-LABEL: define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
6875 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6876 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6877 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6878 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6879 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6880 uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6881   return vqrshrn_high_n_u16(a, b, 3);
6882 }
6883 
6884 // CHECK-LABEL: define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
6885 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6886 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6887 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6888 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6889 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6890 uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6891   return vqrshrn_high_n_u32(a, b, 9);
6892 }
6893 
6894 // CHECK-LABEL: define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
6895 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6896 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6897 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6898 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6899 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6900 uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6901   return vqrshrn_high_n_u64(a, b, 19);
6902 }
6903 
6904 // CHECK-LABEL: define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 {
6905 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
6906 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6907 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_n_s8(int8x8_t a)6908 int16x8_t test_vshll_n_s8(int8x8_t a) {
6909   return vshll_n_s8(a, 3);
6910 }
6911 
6912 // CHECK-LABEL: define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 {
6913 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6914 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6915 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6916 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6917 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_n_s16(int16x4_t a)6918 int32x4_t test_vshll_n_s16(int16x4_t a) {
6919   return vshll_n_s16(a, 9);
6920 }
6921 
6922 // CHECK-LABEL: define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 {
6923 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6924 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6925 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6926 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6927 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_n_s32(int32x2_t a)6928 int64x2_t test_vshll_n_s32(int32x2_t a) {
6929   return vshll_n_s32(a, 19);
6930 }
6931 
6932 // CHECK-LABEL: define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 {
6933 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
6934 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6935 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_n_u8(uint8x8_t a)6936 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
6937   return vshll_n_u8(a, 3);
6938 }
6939 
6940 // CHECK-LABEL: define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 {
6941 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6942 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6943 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6944 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6945 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_n_u16(uint16x4_t a)6946 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
6947   return vshll_n_u16(a, 9);
6948 }
6949 
6950 // CHECK-LABEL: define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 {
6951 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6952 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6953 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6954 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6955 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_n_u32(uint32x2_t a)6956 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
6957   return vshll_n_u32(a, 19);
6958 }
6959 
6960 // CHECK-LABEL: define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 {
6961 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6962 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6963 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6964 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_high_n_s8(int8x16_t a)6965 int16x8_t test_vshll_high_n_s8(int8x16_t a) {
6966   return vshll_high_n_s8(a, 3);
6967 }
6968 
6969 // CHECK-LABEL: define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 {
6970 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6971 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6972 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6973 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6974 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6975 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_high_n_s16(int16x8_t a)6976 int32x4_t test_vshll_high_n_s16(int16x8_t a) {
6977   return vshll_high_n_s16(a, 9);
6978 }
6979 
6980 // CHECK-LABEL: define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 {
6981 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6982 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6983 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6984 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6985 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6986 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_high_n_s32(int32x4_t a)6987 int64x2_t test_vshll_high_n_s32(int32x4_t a) {
6988   return vshll_high_n_s32(a, 19);
6989 }
6990 
6991 // CHECK-LABEL: define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 {
6992 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6993 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6994 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6995 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_high_n_u8(uint8x16_t a)6996 uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
6997   return vshll_high_n_u8(a, 3);
6998 }
6999 
7000 // CHECK-LABEL: define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 {
7001 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7002 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
7003 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7004 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7005 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
7006 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_high_n_u16(uint16x8_t a)7007 uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
7008   return vshll_high_n_u16(a, 9);
7009 }
7010 
7011 // CHECK-LABEL: define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 {
7012 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7013 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
7014 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7015 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7016 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
7017 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_high_n_u32(uint32x4_t a)7018 uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
7019   return vshll_high_n_u32(a, 19);
7020 }
7021 
7022 // CHECK-LABEL: define <8 x i16> @test_vmovl_s8(<8 x i8> %a) #0 {
7023 // CHECK:   [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
7024 // CHECK:   ret <8 x i16> [[VMOVL_I]]
test_vmovl_s8(int8x8_t a)7025 int16x8_t test_vmovl_s8(int8x8_t a) {
7026   return vmovl_s8(a);
7027 }
7028 
7029 // CHECK-LABEL: define <4 x i32> @test_vmovl_s16(<4 x i16> %a) #0 {
7030 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7031 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7032 // CHECK:   [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7033 // CHECK:   ret <4 x i32> [[VMOVL_I]]
test_vmovl_s16(int16x4_t a)7034 int32x4_t test_vmovl_s16(int16x4_t a) {
7035   return vmovl_s16(a);
7036 }
7037 
7038 // CHECK-LABEL: define <2 x i64> @test_vmovl_s32(<2 x i32> %a) #0 {
7039 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7040 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7041 // CHECK:   [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7042 // CHECK:   ret <2 x i64> [[VMOVL_I]]
test_vmovl_s32(int32x2_t a)7043 int64x2_t test_vmovl_s32(int32x2_t a) {
7044   return vmovl_s32(a);
7045 }
7046 
7047 // CHECK-LABEL: define <8 x i16> @test_vmovl_u8(<8 x i8> %a) #0 {
7048 // CHECK:   [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
7049 // CHECK:   ret <8 x i16> [[VMOVL_I]]
test_vmovl_u8(uint8x8_t a)7050 uint16x8_t test_vmovl_u8(uint8x8_t a) {
7051   return vmovl_u8(a);
7052 }
7053 
7054 // CHECK-LABEL: define <4 x i32> @test_vmovl_u16(<4 x i16> %a) #0 {
7055 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7056 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7057 // CHECK:   [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7058 // CHECK:   ret <4 x i32> [[VMOVL_I]]
test_vmovl_u16(uint16x4_t a)7059 uint32x4_t test_vmovl_u16(uint16x4_t a) {
7060   return vmovl_u16(a);
7061 }
7062 
7063 // CHECK-LABEL: define <2 x i64> @test_vmovl_u32(<2 x i32> %a) #0 {
7064 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7065 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7066 // CHECK:   [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7067 // CHECK:   ret <2 x i64> [[VMOVL_I]]
test_vmovl_u32(uint32x2_t a)7068 uint64x2_t test_vmovl_u32(uint32x2_t a) {
7069   return vmovl_u32(a);
7070 }
7071 
7072 // CHECK-LABEL: define <8 x i16> @test_vmovl_high_s8(<16 x i8> %a) #0 {
7073 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7074 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
7075 // CHECK:   ret <8 x i16> [[TMP0]]
test_vmovl_high_s8(int8x16_t a)7076 int16x8_t test_vmovl_high_s8(int8x16_t a) {
7077   return vmovl_high_s8(a);
7078 }
7079 
7080 // CHECK-LABEL: define <4 x i32> @test_vmovl_high_s16(<8 x i16> %a) #0 {
7081 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7082 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7083 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7084 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7085 // CHECK:   ret <4 x i32> [[TMP2]]
test_vmovl_high_s16(int16x8_t a)7086 int32x4_t test_vmovl_high_s16(int16x8_t a) {
7087   return vmovl_high_s16(a);
7088 }
7089 
7090 // CHECK-LABEL: define <2 x i64> @test_vmovl_high_s32(<4 x i32> %a) #0 {
7091 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7092 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7093 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7094 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7095 // CHECK:   ret <2 x i64> [[TMP2]]
test_vmovl_high_s32(int32x4_t a)7096 int64x2_t test_vmovl_high_s32(int32x4_t a) {
7097   return vmovl_high_s32(a);
7098 }
7099 
7100 // CHECK-LABEL: define <8 x i16> @test_vmovl_high_u8(<16 x i8> %a) #0 {
7101 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7102 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
7103 // CHECK:   ret <8 x i16> [[TMP0]]
test_vmovl_high_u8(uint8x16_t a)7104 uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
7105   return vmovl_high_u8(a);
7106 }
7107 
7108 // CHECK-LABEL: define <4 x i32> @test_vmovl_high_u16(<8 x i16> %a) #0 {
7109 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7110 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7111 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7112 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7113 // CHECK:   ret <4 x i32> [[TMP2]]
test_vmovl_high_u16(uint16x8_t a)7114 uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
7115   return vmovl_high_u16(a);
7116 }
7117 
7118 // CHECK-LABEL: define <2 x i64> @test_vmovl_high_u32(<4 x i32> %a) #0 {
7119 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7120 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7121 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7122 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7123 // CHECK:   ret <2 x i64> [[TMP2]]
test_vmovl_high_u32(uint32x4_t a)7124 uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
7125   return vmovl_high_u32(a);
7126 }
7127 
7128 // CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) #0 {
7129 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7130 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7131 // CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
7132 // CHECK:   ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_s32(int32x2_t a)7133 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
7134   return vcvt_n_f32_s32(a, 31);
7135 }
7136 
7137 // CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) #0 {
7138 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7139 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7140 // CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
7141 // CHECK:   ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_s32(int32x4_t a)7142 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
7143   return vcvtq_n_f32_s32(a, 31);
7144 }
7145 
7146 // CHECK-LABEL: define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) #0 {
7147 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7148 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7149 // CHECK:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
7150 // CHECK:   ret <2 x double> [[VCVT_N1]]
test_vcvtq_n_f64_s64(int64x2_t a)7151 float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) {
7152   return vcvtq_n_f64_s64(a, 50);
7153 }
7154 
7155 // CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) #0 {
7156 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7157 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7158 // CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
7159 // CHECK:   ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_u32(uint32x2_t a)7160 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
7161   return vcvt_n_f32_u32(a, 31);
7162 }
7163 
7164 // CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) #0 {
7165 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7166 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7167 // CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
7168 // CHECK:   ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_u32(uint32x4_t a)7169 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
7170   return vcvtq_n_f32_u32(a, 31);
7171 }
7172 
7173 // CHECK-LABEL: define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) #0 {
7174 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7175 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7176 // CHECK:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
7177 // CHECK:   ret <2 x double> [[VCVT_N1]]
test_vcvtq_n_f64_u64(uint64x2_t a)7178 float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) {
7179   return vcvtq_n_f64_u64(a, 50);
7180 }
7181 
7182 // CHECK-LABEL: define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) #0 {
7183 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
7184 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7185 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
7186 // CHECK:   ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_s32_f32(float32x2_t a)7187 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
7188   return vcvt_n_s32_f32(a, 31);
7189 }
7190 
7191 // CHECK-LABEL: define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) #0 {
7192 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
7193 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
7194 // CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
7195 // CHECK:   ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_s32_f32(float32x4_t a)7196 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
7197   return vcvtq_n_s32_f32(a, 31);
7198 }
7199 
7200 // CHECK-LABEL: define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) #0 {
7201 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
7202 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
7203 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
7204 // CHECK:   ret <2 x i64> [[VCVT_N1]]
test_vcvtq_n_s64_f64(float64x2_t a)7205 int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) {
7206   return vcvtq_n_s64_f64(a, 50);
7207 }
7208 
7209 // CHECK-LABEL: define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) #0 {
7210 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
7211 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7212 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
7213 // CHECK:   ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_u32_f32(float32x2_t a)7214 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
7215   return vcvt_n_u32_f32(a, 31);
7216 }
7217 
7218 // CHECK-LABEL: define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) #0 {
7219 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
7220 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
7221 // CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
7222 // CHECK:   ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_u32_f32(float32x4_t a)7223 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
7224   return vcvtq_n_u32_f32(a, 31);
7225 }
7226 
7227 // CHECK-LABEL: define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) #0 {
7228 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
7229 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
7230 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
7231 // CHECK:   ret <2 x i64> [[VCVT_N1]]
test_vcvtq_n_u64_f64(float64x2_t a)7232 uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
7233   return vcvtq_n_u64_f64(a, 50);
7234 }
7235 
7236 // CHECK-LABEL: define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
7237 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
7238 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
7239 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7240 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_s8(int8x8_t a,int8x8_t b)7241 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
7242   return vaddl_s8(a, b);
7243 }
7244 
7245 // CHECK-LABEL: define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
7246 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7247 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7248 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7249 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7250 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
7251 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
7252 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7253 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_s16(int16x4_t a,int16x4_t b)7254 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
7255   return vaddl_s16(a, b);
7256 }
7257 
7258 // CHECK-LABEL: define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
7259 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7260 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7261 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7262 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7263 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
7264 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64>
7265 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7266 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_s32(int32x2_t a,int32x2_t b)7267 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
7268   return vaddl_s32(a, b);
7269 }
7270 
7271 // CHECK-LABEL: define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
7272 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
7273 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
7274 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7275 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_u8(uint8x8_t a,uint8x8_t b)7276 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
7277   return vaddl_u8(a, b);
7278 }
7279 
7280 // CHECK-LABEL: define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
7281 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7282 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7283 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7284 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7285 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
7286 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
7287 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7288 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_u16(uint16x4_t a,uint16x4_t b)7289 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
7290   return vaddl_u16(a, b);
7291 }
7292 
7293 // CHECK-LABEL: define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
7294 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7295 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7296 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7297 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7298 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
7299 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
7300 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7301 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_u32(uint32x2_t a,uint32x2_t b)7302 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
7303   return vaddl_u32(a, b);
7304 }
7305 
7306 // CHECK-LABEL: define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
7307 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7308 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7309 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7310 // CHECK:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
7311 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
7312 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_high_s8(int8x16_t a,int8x16_t b)7313 int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
7314   return vaddl_high_s8(a, b);
7315 }
7316 
7317 // CHECK-LABEL: define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
7318 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7319 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7320 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7321 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7322 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7323 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
7324 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
7325 // CHECK:   [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
7326 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]]
7327 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_high_s16(int16x8_t a,int16x8_t b)7328 int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
7329   return vaddl_high_s16(a, b);
7330 }
7331 
7332 // CHECK-LABEL: define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
7333 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7334 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7335 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7336 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7337 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7338 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
7339 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
7340 // CHECK:   [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
7341 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]]
7342 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_high_s32(int32x4_t a,int32x4_t b)7343 int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
7344   return vaddl_high_s32(a, b);
7345 }
7346 
7347 // CHECK-LABEL: define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
7348 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7349 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7350 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7351 // CHECK:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
7352 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
7353 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_high_u8(uint8x16_t a,uint8x16_t b)7354 uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
7355   return vaddl_high_u8(a, b);
7356 }
7357 
7358 // CHECK-LABEL: define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
7359 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7360 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7361 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7362 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7363 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7364 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
7365 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
7366 // CHECK:   [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
7367 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]]
7368 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_high_u16(uint16x8_t a,uint16x8_t b)7369 uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
7370   return vaddl_high_u16(a, b);
7371 }
7372 
7373 // CHECK-LABEL: define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
7374 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7375 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7376 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7377 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7378 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7379 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
7380 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
7381 // CHECK:   [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
7382 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]]
7383 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_high_u32(uint32x4_t a,uint32x4_t b)7384 uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
7385   return vaddl_high_u32(a, b);
7386 }
7387 
7388 // CHECK-LABEL: define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) #0 {
7389 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
7390 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
7391 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_s8(int16x8_t a,int8x8_t b)7392 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
7393   return vaddw_s8(a, b);
7394 }
7395 
7396 // CHECK-LABEL: define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) #0 {
7397 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7398 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7399 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7400 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
7401 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_s16(int32x4_t a,int16x4_t b)7402 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
7403   return vaddw_s16(a, b);
7404 }
7405 
7406 // CHECK-LABEL: define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) #0 {
7407 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7408 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7409 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7410 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
7411 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_s32(int64x2_t a,int32x2_t b)7412 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
7413   return vaddw_s32(a, b);
7414 }
7415 
7416 // CHECK-LABEL: define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) #0 {
7417 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
7418 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
7419 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_u8(uint16x8_t a,uint8x8_t b)7420 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
7421   return vaddw_u8(a, b);
7422 }
7423 
7424 // CHECK-LABEL: define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) #0 {
7425 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7426 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7427 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7428 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
7429 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_u16(uint32x4_t a,uint16x4_t b)7430 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
7431   return vaddw_u16(a, b);
7432 }
7433 
7434 // CHECK-LABEL: define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) #0 {
7435 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7436 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7437 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7438 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
7439 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_u32(uint64x2_t a,uint32x2_t b)7440 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
7441   return vaddw_u32(a, b);
7442 }
7443 
7444 // CHECK-LABEL: define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) #0 {
7445 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7446 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7447 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
7448 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_high_s8(int16x8_t a,int8x16_t b)7449 int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
7450   return vaddw_high_s8(a, b);
7451 }
7452 
7453 // CHECK-LABEL: define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) #0 {
7454 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7455 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7456 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7457 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7458 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]]
7459 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_high_s16(int32x4_t a,int16x8_t b)7460 int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
7461   return vaddw_high_s16(a, b);
7462 }
7463 
7464 // CHECK-LABEL: define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) #0 {
7465 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7466 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7467 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7468 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7469 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP2]]
7470 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_high_s32(int64x2_t a,int32x4_t b)7471 int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
7472   return vaddw_high_s32(a, b);
7473 }
7474 
7475 // CHECK-LABEL: define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) #0 {
7476 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7477 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7478 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
7479 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_high_u8(uint16x8_t a,uint8x16_t b)7480 uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
7481   return vaddw_high_u8(a, b);
7482 }
7483 
7484 // CHECK-LABEL: define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) #0 {
7485 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7486 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7487 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7488 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7489 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]]
7490 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_high_u16(uint32x4_t a,uint16x8_t b)7491 uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
7492   return vaddw_high_u16(a, b);
7493 }
7494 
7495 // CHECK-LABEL: define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) #0 {
7496 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7497 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7498 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7499 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7500 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP2]]
7501 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_high_u32(uint64x2_t a,uint32x4_t b)7502 uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
7503   return vaddw_high_u32(a, b);
7504 }
7505 
7506 // CHECK-LABEL: define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
7507 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
7508 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
7509 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7510 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_s8(int8x8_t a,int8x8_t b)7511 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
7512   return vsubl_s8(a, b);
7513 }
7514 
7515 // CHECK-LABEL: define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
7516 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7517 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7518 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7519 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7520 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
7521 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
7522 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7523 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_s16(int16x4_t a,int16x4_t b)7524 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
7525   return vsubl_s16(a, b);
7526 }
7527 
7528 // CHECK-LABEL: define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
7529 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7530 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7531 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7532 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7533 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
7534 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64>
7535 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7536 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_s32(int32x2_t a,int32x2_t b)7537 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
7538   return vsubl_s32(a, b);
7539 }
7540 
7541 // CHECK-LABEL: define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
7542 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
7543 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
7544 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7545 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_u8(uint8x8_t a,uint8x8_t b)7546 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
7547   return vsubl_u8(a, b);
7548 }
7549 
7550 // CHECK-LABEL: define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
7551 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7552 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7553 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7554 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7555 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
7556 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
7557 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7558 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_u16(uint16x4_t a,uint16x4_t b)7559 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
7560   return vsubl_u16(a, b);
7561 }
7562 
7563 // CHECK-LABEL: define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
7564 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7565 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7566 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7567 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7568 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
7569 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
7570 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7571 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_u32(uint32x2_t a,uint32x2_t b)7572 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
7573   return vsubl_u32(a, b);
7574 }
7575 
7576 // CHECK-LABEL: define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
7577 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7578 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7579 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7580 // CHECK:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
7581 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
7582 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_high_s8(int8x16_t a,int8x16_t b)7583 int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
7584   return vsubl_high_s8(a, b);
7585 }
7586 
7587 // CHECK-LABEL: define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
7588 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7589 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7590 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7591 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7592 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7593 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
7594 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
7595 // CHECK:   [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
7596 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]]
7597 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_high_s16(int16x8_t a,int16x8_t b)7598 int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
7599   return vsubl_high_s16(a, b);
7600 }
7601 
7602 // CHECK-LABEL: define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
7603 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7604 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7605 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7606 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7607 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7608 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
7609 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
7610 // CHECK:   [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
7611 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]]
7612 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_high_s32(int32x4_t a,int32x4_t b)7613 int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
7614   return vsubl_high_s32(a, b);
7615 }
7616 
7617 // CHECK-LABEL: define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
7618 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7619 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7620 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7621 // CHECK:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
7622 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
7623 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_high_u8(uint8x16_t a,uint8x16_t b)7624 uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
7625   return vsubl_high_u8(a, b);
7626 }
7627 
7628 // CHECK-LABEL: define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
7629 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7630 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7631 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7632 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7633 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7634 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
7635 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
7636 // CHECK:   [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
7637 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]]
7638 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_high_u16(uint16x8_t a,uint16x8_t b)7639 uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
7640   return vsubl_high_u16(a, b);
7641 }
7642 
7643 // CHECK-LABEL: define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
7644 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7645 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7646 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7647 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7648 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7649 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
7650 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
7651 // CHECK:   [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
7652 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]]
7653 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_high_u32(uint32x4_t a,uint32x4_t b)7654 uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
7655   return vsubl_high_u32(a, b);
7656 }
7657 
7658 // CHECK-LABEL: define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) #0 {
7659 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
7660 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
7661 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_s8(int16x8_t a,int8x8_t b)7662 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
7663   return vsubw_s8(a, b);
7664 }
7665 
7666 // CHECK-LABEL: define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) #0 {
7667 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7668 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7669 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7670 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
7671 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_s16(int32x4_t a,int16x4_t b)7672 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
7673   return vsubw_s16(a, b);
7674 }
7675 
7676 // CHECK-LABEL: define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) #0 {
7677 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7678 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7679 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7680 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
7681 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_s32(int64x2_t a,int32x2_t b)7682 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
7683   return vsubw_s32(a, b);
7684 }
7685 
7686 // CHECK-LABEL: define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) #0 {
7687 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
7688 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
7689 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_u8(uint16x8_t a,uint8x8_t b)7690 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
7691   return vsubw_u8(a, b);
7692 }
7693 
7694 // CHECK-LABEL: define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) #0 {
7695 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7696 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7697 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7698 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
7699 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_u16(uint32x4_t a,uint16x4_t b)7700 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
7701   return vsubw_u16(a, b);
7702 }
7703 
7704 // CHECK-LABEL: define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) #0 {
7705 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7706 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7707 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7708 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
7709 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_u32(uint64x2_t a,uint32x2_t b)7710 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
7711   return vsubw_u32(a, b);
7712 }
7713 
7714 // CHECK-LABEL: define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) #0 {
7715 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7716 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7717 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7718 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_high_s8(int16x8_t a,int8x16_t b)7719 int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
7720   return vsubw_high_s8(a, b);
7721 }
7722 
7723 // CHECK-LABEL: define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) #0 {
7724 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7725 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7726 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7727 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7728 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP2]]
7729 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_high_s16(int32x4_t a,int16x8_t b)7730 int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
7731   return vsubw_high_s16(a, b);
7732 }
7733 
7734 // CHECK-LABEL: define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) #0 {
7735 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7736 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7737 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7738 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7739 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP2]]
7740 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_high_s32(int64x2_t a,int32x4_t b)7741 int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
7742   return vsubw_high_s32(a, b);
7743 }
7744 
7745 // CHECK-LABEL: define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) #0 {
7746 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7747 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7748 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7749 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_high_u8(uint16x8_t a,uint8x16_t b)7750 uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
7751   return vsubw_high_u8(a, b);
7752 }
7753 
7754 // CHECK-LABEL: define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) #0 {
7755 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7756 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7757 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7758 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7759 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP2]]
7760 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_high_u16(uint32x4_t a,uint16x8_t b)7761 uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
7762   return vsubw_high_u16(a, b);
7763 }
7764 
7765 // CHECK-LABEL: define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) #0 {
7766 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7767 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7768 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7769 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7770 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP2]]
7771 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_high_u32(uint64x2_t a,uint32x4_t b)7772 uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
7773   return vsubw_high_u32(a, b);
7774 }
7775 
7776 // CHECK-LABEL: define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
7777 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7778 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7779 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7780 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7781 // CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
7782 // CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7783 // CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7784 // CHECK:   ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_s16(int16x8_t a,int16x8_t b)7785 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
7786   return vaddhn_s16(a, b);
7787 }
7788 
7789 // CHECK-LABEL: define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
7790 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7791 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7792 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7793 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7794 // CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
7795 // CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7796 // CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7797 // CHECK:   ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_s32(int32x4_t a,int32x4_t b)7798 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
7799   return vaddhn_s32(a, b);
7800 }
7801 
7802 // CHECK-LABEL: define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
7803 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7804 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7805 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7806 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
7807 // CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
7808 // CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7809 // CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7810 // CHECK:   ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_s64(int64x2_t a,int64x2_t b)7811 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
7812   return vaddhn_s64(a, b);
7813 }
7814 
7815 // CHECK-LABEL: define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
7816 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7817 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7818 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7819 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7820 // CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
7821 // CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7822 // CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7823 // CHECK:   ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_u16(uint16x8_t a,uint16x8_t b)7824 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
7825   return vaddhn_u16(a, b);
7826 }
7827 
7828 // CHECK-LABEL: define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
7829 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7830 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7831 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7832 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7833 // CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
7834 // CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7835 // CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7836 // CHECK:   ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_u32(uint32x4_t a,uint32x4_t b)7837 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
7838   return vaddhn_u32(a, b);
7839 }
7840 
7841 // CHECK-LABEL: define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
7842 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7843 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7844 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7845 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
7846 // CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
7847 // CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7848 // CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7849 // CHECK:   ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_u64(uint64x2_t a,uint64x2_t b)7850 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
7851   return vaddhn_u64(a, b);
7852 }
7853 
7854 // CHECK-LABEL: define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
7855 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7856 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7857 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7858 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7859 // CHECK:   [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
7860 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7861 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7862 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7863 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vaddhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7864 int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7865   return vaddhn_high_s16(r, a, b);
7866 }
7867 
7868 // CHECK-LABEL: define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
7869 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7870 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7871 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7872 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7873 // CHECK:   [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
7874 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7875 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7876 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7877 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vaddhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7878 int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7879   return vaddhn_high_s32(r, a, b);
7880 }
7881 
7882 // CHECK-LABEL: define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
7883 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7884 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7885 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7886 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
7887 // CHECK:   [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
7888 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7889 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7890 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7891 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vaddhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7892 int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7893   return vaddhn_high_s64(r, a, b);
7894 }
7895 
7896 // CHECK-LABEL: define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
7897 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7898 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7899 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7900 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7901 // CHECK:   [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
7902 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7903 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7904 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7905 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vaddhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7906 uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7907   return vaddhn_high_u16(r, a, b);
7908 }
7909 
7910 // CHECK-LABEL: define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
7911 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7912 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7913 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7914 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7915 // CHECK:   [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
7916 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7917 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7918 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7919 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vaddhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7920 uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7921   return vaddhn_high_u32(r, a, b);
7922 }
7923 
7924 // CHECK-LABEL: define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
7925 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7926 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7927 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7928 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
7929 // CHECK:   [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
7930 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7931 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7932 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7933 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vaddhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7934 uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7935   return vaddhn_high_u64(r, a, b);
7936 }
7937 
7938 // CHECK-LABEL: define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
7939 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7940 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7941 // CHECK:   [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7942 // CHECK:   [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7943 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4
7944 // CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_s16(int16x8_t a,int16x8_t b)7945 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
7946   return vraddhn_s16(a, b);
7947 }
7948 
7949 // CHECK-LABEL: define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
7950 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7951 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7952 // CHECK:   [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7953 // CHECK:   [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7954 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4
7955 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7956 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
7957 // CHECK:   ret <4 x i16> [[TMP2]]
test_vraddhn_s32(int32x4_t a,int32x4_t b)7958 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
7959   return vraddhn_s32(a, b);
7960 }
7961 
7962 // CHECK-LABEL: define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
7963 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7964 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7965 // CHECK:   [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7966 // CHECK:   [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
7967 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4
7968 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7969 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
7970 // CHECK:   ret <2 x i32> [[TMP2]]
test_vraddhn_s64(int64x2_t a,int64x2_t b)7971 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
7972   return vraddhn_s64(a, b);
7973 }
7974 
7975 // CHECK-LABEL: define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
7976 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7977 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7978 // CHECK:   [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7979 // CHECK:   [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7980 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4
7981 // CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_u16(uint16x8_t a,uint16x8_t b)7982 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
7983   return vraddhn_u16(a, b);
7984 }
7985 
7986 // CHECK-LABEL: define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
7987 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7988 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7989 // CHECK:   [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7990 // CHECK:   [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7991 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4
7992 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7993 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
7994 // CHECK:   ret <4 x i16> [[TMP2]]
test_vraddhn_u32(uint32x4_t a,uint32x4_t b)7995 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
7996   return vraddhn_u32(a, b);
7997 }
7998 
7999 // CHECK-LABEL: define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
8000 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8001 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8002 // CHECK:   [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8003 // CHECK:   [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8004 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4
8005 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
8006 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
8007 // CHECK:   ret <2 x i32> [[TMP2]]
test_vraddhn_u64(uint64x2_t a,uint64x2_t b)8008 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
8009   return vraddhn_u64(a, b);
8010 }
8011 
8012 // CHECK-LABEL: define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8013 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8014 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8015 // CHECK:   [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8016 // CHECK:   [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8017 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]]) #4
8018 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8019 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vraddhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)8020 int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
8021   return vraddhn_high_s16(r, a, b);
8022 }
8023 
8024 // CHECK-LABEL: define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8025 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8026 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8027 // CHECK:   [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8028 // CHECK:   [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8029 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]]) #4
8030 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
8031 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16>
8032 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8033 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vraddhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)8034 int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
8035   return vraddhn_high_s32(r, a, b);
8036 }
8037 
8038 // CHECK-LABEL: define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8039 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8040 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8041 // CHECK:   [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8042 // CHECK:   [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8043 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]]) #4
8044 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
8045 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32>
8046 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8047 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vraddhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)8048 int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
8049   return vraddhn_high_s64(r, a, b);
8050 }
8051 
8052 // CHECK-LABEL: define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8053 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8054 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8055 // CHECK:   [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8056 // CHECK:   [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8057 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]]) #4
8058 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8059 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vraddhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)8060 uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
8061   return vraddhn_high_u16(r, a, b);
8062 }
8063 
8064 // CHECK-LABEL: define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8065 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8066 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8067 // CHECK:   [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8068 // CHECK:   [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8069 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]]) #4
8070 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
8071 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16>
8072 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8073 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vraddhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)8074 uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
8075   return vraddhn_high_u32(r, a, b);
8076 }
8077 
8078 // CHECK-LABEL: define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8079 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8080 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8081 // CHECK:   [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8082 // CHECK:   [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8083 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]]) #4
8084 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
8085 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32>
8086 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8087 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vraddhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)8088 uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
8089   return vraddhn_high_u64(r, a, b);
8090 }
8091 
8092 // CHECK-LABEL: define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
8093 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8094 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8095 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8096 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8097 // CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
8098 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
8099 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
8100 // CHECK:   ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_s16(int16x8_t a,int16x8_t b)8101 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
8102   return vsubhn_s16(a, b);
8103 }
8104 
8105 // CHECK-LABEL: define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
8106 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8107 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8108 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8109 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8110 // CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
8111 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
8112 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
8113 // CHECK:   ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_s32(int32x4_t a,int32x4_t b)8114 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
8115   return vsubhn_s32(a, b);
8116 }
8117 
8118 // CHECK-LABEL: define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
8119 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8120 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8121 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8122 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8123 // CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
8124 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
8125 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
8126 // CHECK:   ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_s64(int64x2_t a,int64x2_t b)8127 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
8128   return vsubhn_s64(a, b);
8129 }
8130 
8131 // CHECK-LABEL: define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
8132 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8133 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8134 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8135 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8136 // CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
8137 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
8138 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
8139 // CHECK:   ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_u16(uint16x8_t a,uint16x8_t b)8140 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
8141   return vsubhn_u16(a, b);
8142 }
8143 
8144 // CHECK-LABEL: define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
8145 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8146 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8147 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8148 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8149 // CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
8150 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
8151 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
8152 // CHECK:   ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_u32(uint32x4_t a,uint32x4_t b)8153 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
8154   return vsubhn_u32(a, b);
8155 }
8156 
8157 // CHECK-LABEL: define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
8158 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8159 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8160 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8161 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8162 // CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
8163 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
8164 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
8165 // CHECK:   ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_u64(uint64x2_t a,uint64x2_t b)8166 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
8167   return vsubhn_u64(a, b);
8168 }
8169 
8170 // CHECK-LABEL: define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8171 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8172 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8173 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8174 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8175 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
8176 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
8177 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
8178 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8179 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vsubhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)8180 int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
8181   return vsubhn_high_s16(r, a, b);
8182 }
8183 
8184 // CHECK-LABEL: define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8185 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8186 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8187 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8188 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8189 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
8190 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
8191 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
8192 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8193 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vsubhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)8194 int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
8195   return vsubhn_high_s32(r, a, b);
8196 }
8197 
8198 // CHECK-LABEL: define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8199 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8200 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8201 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8202 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8203 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
8204 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
8205 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
8206 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8207 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vsubhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)8208 int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
8209   return vsubhn_high_s64(r, a, b);
8210 }
8211 
8212 // CHECK-LABEL: define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8213 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8214 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8215 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8216 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8217 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
8218 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
8219 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
8220 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8221 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vsubhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)8222 uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
8223   return vsubhn_high_u16(r, a, b);
8224 }
8225 
8226 // CHECK-LABEL: define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8227 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8228 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8229 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8230 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8231 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
8232 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
8233 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
8234 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8235 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vsubhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)8236 uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
8237   return vsubhn_high_u32(r, a, b);
8238 }
8239 
8240 // CHECK-LABEL: define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8241 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8242 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8243 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8244 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8245 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
8246 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
8247 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
8248 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8249 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vsubhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)8250 uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
8251   return vsubhn_high_u64(r, a, b);
8252 }
8253 
8254 // CHECK-LABEL: define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
8255 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8256 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8257 // CHECK:   [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8258 // CHECK:   [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8259 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4
8260 // CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_s16(int16x8_t a,int16x8_t b)8261 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
8262   return vrsubhn_s16(a, b);
8263 }
8264 
8265 // CHECK-LABEL: define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
8266 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8267 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8268 // CHECK:   [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8269 // CHECK:   [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8270 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4
8271 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
8272 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16>
8273 // CHECK:   ret <4 x i16> [[TMP2]]
test_vrsubhn_s32(int32x4_t a,int32x4_t b)8274 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
8275   return vrsubhn_s32(a, b);
8276 }
8277 
8278 // CHECK-LABEL: define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
8279 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8280 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8281 // CHECK:   [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8282 // CHECK:   [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8283 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4
8284 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
8285 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32>
8286 // CHECK:   ret <2 x i32> [[TMP2]]
test_vrsubhn_s64(int64x2_t a,int64x2_t b)8287 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
8288   return vrsubhn_s64(a, b);
8289 }
8290 
8291 // CHECK-LABEL: define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
8292 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8293 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8294 // CHECK:   [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8295 // CHECK:   [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8296 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4
8297 // CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_u16(uint16x8_t a,uint16x8_t b)8298 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
8299   return vrsubhn_u16(a, b);
8300 }
8301 
8302 // CHECK-LABEL: define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
8303 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8304 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8305 // CHECK:   [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8306 // CHECK:   [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8307 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4
8308 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
8309 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16>
8310 // CHECK:   ret <4 x i16> [[TMP2]]
test_vrsubhn_u32(uint32x4_t a,uint32x4_t b)8311 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
8312   return vrsubhn_u32(a, b);
8313 }
8314 
8315 // CHECK-LABEL: define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
8316 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8317 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8318 // CHECK:   [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8319 // CHECK:   [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8320 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4
8321 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
8322 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32>
8323 // CHECK:   ret <2 x i32> [[TMP2]]
test_vrsubhn_u64(uint64x2_t a,uint64x2_t b)8324 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
8325   return vrsubhn_u64(a, b);
8326 }
8327 
8328 // CHECK-LABEL: define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8329 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8330 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8331 // CHECK:   [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8332 // CHECK:   [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8333 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]]) #4
8334 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8335 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vrsubhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)8336 int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
8337   return vrsubhn_high_s16(r, a, b);
8338 }
8339 
8340 // CHECK-LABEL: define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8341 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8342 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8343 // CHECK:   [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8344 // CHECK:   [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8345 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]]) #4
8346 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
8347 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16>
8348 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8349 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vrsubhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)8350 int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
8351   return vrsubhn_high_s32(r, a, b);
8352 }
8353 
8354 // CHECK-LABEL: define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8355 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8356 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8357 // CHECK:   [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8358 // CHECK:   [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8359 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]]) #4
8360 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
8361 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32>
8362 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8363 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vrsubhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)8364 int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
8365   return vrsubhn_high_s64(r, a, b);
8366 }
8367 
8368 // CHECK-LABEL: define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8369 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8370 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8371 // CHECK:   [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8372 // CHECK:   [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8373 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]]) #4
8374 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8375 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vrsubhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)8376 uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
8377   return vrsubhn_high_u16(r, a, b);
8378 }
8379 
8380 // CHECK-LABEL: define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8381 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8382 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8383 // CHECK:   [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8384 // CHECK:   [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8385 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]]) #4
8386 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
8387 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16>
8388 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8389 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vrsubhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)8390 uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
8391   return vrsubhn_high_u32(r, a, b);
8392 }
8393 
8394 // CHECK-LABEL: define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8395 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8396 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8397 // CHECK:   [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8398 // CHECK:   [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8399 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]]) #4
8400 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
8401 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32>
8402 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8403 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vrsubhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)8404 uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
8405   return vrsubhn_high_u64(r, a, b);
8406 }
8407 
8408 // CHECK-LABEL: define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
8409 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
8410 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
8411 // CHECK:   ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_s8(int8x8_t a,int8x8_t b)8412 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
8413   return vabdl_s8(a, b);
8414 }
8415 // CHECK-LABEL: define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
8416 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8417 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8418 // CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8419 // CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8420 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
8421 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
8422 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8423 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8424 // CHECK:   ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_s16(int16x4_t a,int16x4_t b)8425 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
8426   return vabdl_s16(a, b);
8427 }
8428 // CHECK-LABEL: define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
8429 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8430 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8431 // CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8432 // CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8433 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
8434 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
8435 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8436 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8437 // CHECK:   ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_s32(int32x2_t a,int32x2_t b)8438 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
8439   return vabdl_s32(a, b);
8440 }
8441 // CHECK-LABEL: define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
8442 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
8443 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
8444 // CHECK:   ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_u8(uint8x8_t a,uint8x8_t b)8445 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
8446   return vabdl_u8(a, b);
8447 }
8448 // CHECK-LABEL: define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
8449 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8450 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8451 // CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8452 // CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8453 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
8454 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
8455 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8456 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8457 // CHECK:   ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_u16(uint16x4_t a,uint16x4_t b)8458 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
8459   return vabdl_u16(a, b);
8460 }
8461 // CHECK-LABEL: define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
8462 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8463 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8464 // CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8465 // CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8466 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
8467 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
8468 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8469 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8470 // CHECK:   ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_u32(uint32x2_t a,uint32x2_t b)8471 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
8472   return vabdl_u32(a, b);
8473 }
8474 
8475 // CHECK-LABEL: define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8476 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4
8477 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
8478 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
8479 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabal_s8(int16x8_t a,int8x8_t b,int8x8_t c)8480 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8481   return vabal_s8(a, b, c);
8482 }
8483 // CHECK-LABEL: define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8484 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8485 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8486 // CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8487 // CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8488 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
8489 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
8490 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8491 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8492 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
8493 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabal_s16(int32x4_t a,int16x4_t b,int16x4_t c)8494 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8495   return vabal_s16(a, b, c);
8496 }
8497 // CHECK-LABEL: define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8498 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8499 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8500 // CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8501 // CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8502 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
8503 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
8504 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8505 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8506 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
8507 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vabal_s32(int64x2_t a,int32x2_t b,int32x2_t c)8508 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8509   return vabal_s32(a, b, c);
8510 }
8511 // CHECK-LABEL: define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8512 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4
8513 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
8514 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
8515 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8516 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8517   return vabal_u8(a, b, c);
8518 }
8519 // CHECK-LABEL: define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8520 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8521 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8522 // CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8523 // CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8524 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
8525 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
8526 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8527 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8528 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
8529 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)8530 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8531   return vabal_u16(a, b, c);
8532 }
8533 // CHECK-LABEL: define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8534 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8535 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8536 // CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8537 // CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8538 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
8539 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
8540 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8541 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8542 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
8543 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vabal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)8544 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8545   return vabal_u32(a, b, c);
8546 }
8547 
8548 // CHECK-LABEL: define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
8549 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8550 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8551 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8552 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
8553 // CHECK:   ret <8 x i16> [[VMOVL_I_I_I]]
test_vabdl_high_s8(int8x16_t a,int8x16_t b)8554 int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
8555   return vabdl_high_s8(a, b);
8556 }
8557 // CHECK-LABEL: define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
8558 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8559 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8560 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8561 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8562 // CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8563 // CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8564 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
8565 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
8566 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8567 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8568 // CHECK:   ret <4 x i32> [[VMOVL_I_I_I]]
test_vabdl_high_s16(int16x8_t a,int16x8_t b)8569 int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
8570   return vabdl_high_s16(a, b);
8571 }
8572 // CHECK-LABEL: define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
8573 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8574 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8575 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8576 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8577 // CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8578 // CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8579 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
8580 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
8581 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8582 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8583 // CHECK:   ret <2 x i64> [[VMOVL_I_I_I]]
test_vabdl_high_s32(int32x4_t a,int32x4_t b)8584 int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
8585   return vabdl_high_s32(a, b);
8586 }
8587 // CHECK-LABEL: define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
8588 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8589 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8590 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8591 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
8592 // CHECK:   ret <8 x i16> [[VMOVL_I_I_I]]
test_vabdl_high_u8(uint8x16_t a,uint8x16_t b)8593 uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
8594   return vabdl_high_u8(a, b);
8595 }
8596 // CHECK-LABEL: define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
8597 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8598 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8599 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8600 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8601 // CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8602 // CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8603 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
8604 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
8605 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8606 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8607 // CHECK:   ret <4 x i32> [[VMOVL_I_I_I]]
test_vabdl_high_u16(uint16x8_t a,uint16x8_t b)8608 uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
8609   return vabdl_high_u16(a, b);
8610 }
8611 // CHECK-LABEL: define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
8612 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8613 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8614 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8615 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8616 // CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8617 // CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8618 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
8619 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
8620 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8621 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8622 // CHECK:   ret <2 x i64> [[VMOVL_I_I_I]]
test_vabdl_high_u32(uint32x4_t a,uint32x4_t b)8623 uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
8624   return vabdl_high_u32(a, b);
8625 }
8626 
8627 // CHECK-LABEL: define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
8628 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8629 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8630 // CHECK:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8631 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
8632 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
8633 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vabal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)8634 int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8635   return vabal_high_s8(a, b, c);
8636 }
8637 // CHECK-LABEL: define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
8638 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8639 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8640 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8641 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8642 // CHECK:   [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8643 // CHECK:   [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8644 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]]) #4
8645 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
8646 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8647 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8648 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
8649 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vabal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8650 int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8651   return vabal_high_s16(a, b, c);
8652 }
8653 // CHECK-LABEL: define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
8654 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8655 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8656 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8657 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8658 // CHECK:   [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8659 // CHECK:   [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8660 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]]) #4
8661 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
8662 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8663 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8664 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
8665 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vabal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8666 int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8667   return vabal_high_s32(a, b, c);
8668 }
8669 // CHECK-LABEL: define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
8670 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8671 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8672 // CHECK:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8673 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
8674 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
8675 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vabal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)8676 uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8677   return vabal_high_u8(a, b, c);
8678 }
8679 // CHECK-LABEL: define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
8680 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8681 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8682 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8683 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8684 // CHECK:   [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8685 // CHECK:   [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8686 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]]) #4
8687 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
8688 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8689 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8690 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
8691 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vabal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)8692 uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8693   return vabal_high_u16(a, b, c);
8694 }
8695 // CHECK-LABEL: define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
8696 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8697 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8698 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8699 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8700 // CHECK:   [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8701 // CHECK:   [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8702 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]]) #4
8703 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
8704 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8705 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8706 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
8707 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vabal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)8708 uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8709   return vabal_high_u32(a, b, c);
8710 }
8711 
8712 // CHECK-LABEL: define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) #0 {
8713 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) #4
8714 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_s8(int8x8_t a,int8x8_t b)8715 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
8716   return vmull_s8(a, b);
8717 }
8718 // CHECK-LABEL: define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) #0 {
8719 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8720 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8721 // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8722 // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8723 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
8724 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_s16(int16x4_t a,int16x4_t b)8725 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
8726   return vmull_s16(a, b);
8727 }
8728 // CHECK-LABEL: define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) #0 {
8729 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8730 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8731 // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8732 // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8733 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
8734 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_s32(int32x2_t a,int32x2_t b)8735 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
8736   return vmull_s32(a, b);
8737 }
8738 // CHECK-LABEL: define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) #0 {
8739 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) #4
8740 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_u8(uint8x8_t a,uint8x8_t b)8741 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
8742   return vmull_u8(a, b);
8743 }
8744 // CHECK-LABEL: define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) #0 {
8745 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8746 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8747 // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8748 // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8749 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
8750 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_u16(uint16x4_t a,uint16x4_t b)8751 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
8752   return vmull_u16(a, b);
8753 }
8754 // CHECK-LABEL: define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) #0 {
8755 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8756 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8757 // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8758 // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8759 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
8760 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_u32(uint32x2_t a,uint32x2_t b)8761 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
8762   return vmull_u32(a, b);
8763 }
8764 
8765 // CHECK-LABEL: define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
8766 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8767 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8768 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8769 // CHECK:   ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_s8(int8x16_t a,int8x16_t b)8770 int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
8771   return vmull_high_s8(a, b);
8772 }
8773 // CHECK-LABEL: define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
8774 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8775 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8776 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8777 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8778 // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8779 // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8780 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8781 // CHECK:   ret <4 x i32> [[VMULL2_I_I]]
test_vmull_high_s16(int16x8_t a,int16x8_t b)8782 int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
8783   return vmull_high_s16(a, b);
8784 }
8785 // CHECK-LABEL: define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
8786 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8787 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8788 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8789 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8790 // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8791 // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8792 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8793 // CHECK:   ret <2 x i64> [[VMULL2_I_I]]
test_vmull_high_s32(int32x4_t a,int32x4_t b)8794 int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
8795   return vmull_high_s32(a, b);
8796 }
8797 // CHECK-LABEL: define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
8798 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8799 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8800 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8801 // CHECK:   ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_u8(uint8x16_t a,uint8x16_t b)8802 uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
8803   return vmull_high_u8(a, b);
8804 }
8805 // CHECK-LABEL: define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
8806 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8807 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8808 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8809 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8810 // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8811 // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8812 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8813 // CHECK:   ret <4 x i32> [[VMULL2_I_I]]
test_vmull_high_u16(uint16x8_t a,uint16x8_t b)8814 uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
8815   return vmull_high_u16(a, b);
8816 }
8817 // CHECK-LABEL: define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
8818 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8819 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8820 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8821 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8822 // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8823 // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8824 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8825 // CHECK:   ret <2 x i64> [[VMULL2_I_I]]
test_vmull_high_u32(uint32x4_t a,uint32x4_t b)8826 uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
8827   return vmull_high_u32(a, b);
8828 }
8829 
8830 // CHECK-LABEL: define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8831 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
8832 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
8833 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlal_s8(int16x8_t a,int8x8_t b,int8x8_t c)8834 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8835   return vmlal_s8(a, b, c);
8836 }
8837 // CHECK-LABEL: define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8838 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8839 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8840 // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8841 // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8842 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8843 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
8844 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)8845 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8846   return vmlal_s16(a, b, c);
8847 }
8848 // CHECK-LABEL: define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8849 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8850 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8851 // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8852 // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8853 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8854 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
8855 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)8856 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8857   return vmlal_s32(a, b, c);
8858 }
8859 // CHECK-LABEL: define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8860 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
8861 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
8862 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8863 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8864   return vmlal_u8(a, b, c);
8865 }
8866 // CHECK-LABEL: define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8867 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8868 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8869 // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8870 // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8871 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8872 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
8873 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)8874 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8875   return vmlal_u16(a, b, c);
8876 }
8877 // CHECK-LABEL: define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8878 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8879 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8880 // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8881 // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8882 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8883 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
8884 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)8885 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8886   return vmlal_u32(a, b, c);
8887 }
8888 
8889 // CHECK-LABEL: define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
8890 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8891 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8892 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8893 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8894 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vmlal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)8895 int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8896   return vmlal_high_s8(a, b, c);
8897 }
8898 // CHECK-LABEL: define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
8899 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8900 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8901 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8902 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8903 // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8904 // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8905 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
8906 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8907 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8908 int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8909   return vmlal_high_s16(a, b, c);
8910 }
8911 // CHECK-LABEL: define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
8912 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8913 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8914 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8915 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8916 // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8917 // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8918 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
8919 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8920 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8921 int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8922   return vmlal_high_s32(a, b, c);
8923 }
8924 // CHECK-LABEL: define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
8925 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8926 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8927 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8928 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8929 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vmlal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)8930 uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8931   return vmlal_high_u8(a, b, c);
8932 }
8933 // CHECK-LABEL: define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
8934 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8935 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8936 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8937 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8938 // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8939 // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8940 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
8941 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8942 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)8943 uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8944   return vmlal_high_u16(a, b, c);
8945 }
8946 // CHECK-LABEL: define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
8947 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8948 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8949 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8950 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8951 // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8952 // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8953 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
8954 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8955 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)8956 uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8957   return vmlal_high_u32(a, b, c);
8958 }
8959 
8960 // CHECK-LABEL: define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8961 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
8962 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8963 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsl_s8(int16x8_t a,int8x8_t b,int8x8_t c)8964 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8965   return vmlsl_s8(a, b, c);
8966 }
8967 // CHECK-LABEL: define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8968 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8969 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8970 // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8971 // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8972 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8973 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8974 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)8975 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8976   return vmlsl_s16(a, b, c);
8977 }
8978 // CHECK-LABEL: define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8979 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8980 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8981 // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8982 // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8983 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8984 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8985 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)8986 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8987   return vmlsl_s32(a, b, c);
8988 }
8989 // CHECK-LABEL: define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8990 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
8991 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8992 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsl_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8993 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8994   return vmlsl_u8(a, b, c);
8995 }
8996 // CHECK-LABEL: define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8997 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8998 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8999 // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9000 // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9001 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
9002 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
9003 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)9004 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
9005   return vmlsl_u16(a, b, c);
9006 }
9007 // CHECK-LABEL: define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9008 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9009 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9010 // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9011 // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9012 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
9013 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
9014 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)9015 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
9016   return vmlsl_u32(a, b, c);
9017 }
9018 
9019 // CHECK-LABEL: define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
9020 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9021 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9022 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
9023 // CHECK:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
9024 // CHECK:   ret <8 x i16> [[SUB_I_I]]
test_vmlsl_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)9025 int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
9026   return vmlsl_high_s8(a, b, c);
9027 }
9028 // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
9029 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9030 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9031 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
9032 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
9033 // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9034 // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9035 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
9036 // CHECK:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
9037 // CHECK:   ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)9038 int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
9039   return vmlsl_high_s16(a, b, c);
9040 }
9041 // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
9042 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
9043 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
9044 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
9045 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
9046 // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9047 // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9048 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
9049 // CHECK:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
9050 // CHECK:   ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)9051 int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
9052   return vmlsl_high_s32(a, b, c);
9053 }
9054 // CHECK-LABEL: define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
9055 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9056 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9057 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
9058 // CHECK:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
9059 // CHECK:   ret <8 x i16> [[SUB_I_I]]
test_vmlsl_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)9060 uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
9061   return vmlsl_high_u8(a, b, c);
9062 }
9063 // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
9064 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9065 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9066 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
9067 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
9068 // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9069 // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9070 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
9071 // CHECK:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
9072 // CHECK:   ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)9073 uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
9074   return vmlsl_high_u16(a, b, c);
9075 }
9076 // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
9077 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
9078 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
9079 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
9080 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
9081 // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9082 // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9083 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
9084 // CHECK:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
9085 // CHECK:   ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)9086 uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
9087   return vmlsl_high_u32(a, b, c);
9088 }
9089 
9090 // CHECK-LABEL: define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) #0 {
9091 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9092 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9093 // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9094 // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9095 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4
9096 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
9097 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
9098 // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmull_s16(int16x4_t a,int16x4_t b)9099 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
9100   return vqdmull_s16(a, b);
9101 }
9102 // CHECK-LABEL: define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) #0 {
9103 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9104 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9105 // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9106 // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9107 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4
9108 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
9109 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
9110 // CHECK:   ret <2 x i64> [[TMP2]]
test_vqdmull_s32(int32x2_t a,int32x2_t b)9111 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
9112   return vqdmull_s32(a, b);
9113 }
9114 
9115 // CHECK-LABEL: define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9116 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9117 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9118 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9119 // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9120 // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
9121 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
9122 // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9123 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
9124 // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)9125 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9126   return vqdmlal_s16(a, b, c);
9127 }
9128 
9129 // CHECK-LABEL: define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9130 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9131 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9132 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9133 // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9134 // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
9135 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
9136 // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9137 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
9138 // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)9139 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9140   return vqdmlal_s32(a, b, c);
9141 }
9142 
9143 // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9144 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9145 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9146 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9147 // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9148 // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
9149 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
9150 // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9151 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
9152 // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)9153 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9154   return vqdmlsl_s16(a, b, c);
9155 }
9156 
9157 // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9158 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9159 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9160 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9161 // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9162 // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
9163 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
9164 // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9165 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
9166 // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)9167 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9168   return vqdmlsl_s32(a, b, c);
9169 }
9170 
9171 // CHECK-LABEL: define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
9172 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9173 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9174 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
9175 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
9176 // CHECK:   [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9177 // CHECK:   [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9178 // CHECK:   [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I_I]], <4 x i16> [[VQDMULL_V1_I_I]]) #4
9179 // CHECK:   [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
9180 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <4 x i32>
9181 // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmull_high_s16(int16x8_t a,int16x8_t b)9182 int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
9183   return vqdmull_high_s16(a, b);
9184 }
9185 // CHECK-LABEL: define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
9186 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
9187 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
9188 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
9189 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
9190 // CHECK:   [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9191 // CHECK:   [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9192 // CHECK:   [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I_I]], <2 x i32> [[VQDMULL_V1_I_I]]) #4
9193 // CHECK:   [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
9194 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <2 x i64>
9195 // CHECK:   ret <2 x i64> [[TMP2]]
test_vqdmull_high_s32(int32x4_t a,int32x4_t b)9196 int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
9197   return vqdmull_high_s32(a, b);
9198 }
9199 
9200 // CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
9201 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9202 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9203 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9204 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
9205 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
9206 // CHECK:   [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9207 // CHECK:   [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
9208 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]]) #4
9209 // CHECK:   [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9210 // CHECK:   [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]]) #4
9211 // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I_I]]
test_vqdmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)9212 int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
9213   return vqdmlal_high_s16(a, b, c);
9214 }
9215 
9216 // CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
9217 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
9218 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
9219 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9220 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
9221 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
9222 // CHECK:   [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9223 // CHECK:   [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
9224 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]]) #4
9225 // CHECK:   [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9226 // CHECK:   [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]]) #4
9227 // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I_I]]
test_vqdmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)9228 int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
9229   return vqdmlal_high_s32(a, b, c);
9230 }
9231 
9232 // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
9233 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9234 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9235 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9236 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
9237 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
9238 // CHECK:   [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9239 // CHECK:   [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
9240 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]]) #4
9241 // CHECK:   [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9242 // CHECK:   [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]]) #4
9243 // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I_I]]
test_vqdmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)9244 int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
9245   return vqdmlsl_high_s16(a, b, c);
9246 }
9247 
9248 // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
9249 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
9250 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
9251 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9252 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
9253 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
9254 // CHECK:   [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9255 // CHECK:   [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
9256 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]]) #4
9257 // CHECK:   [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9258 // CHECK:   [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]]) #4
9259 // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I_I]]
test_vqdmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)9260 int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
9261   return vqdmlsl_high_s32(a, b, c);
9262 }
9263 
9264 // CHECK-LABEL: define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) #0 {
9265 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) #4
9266 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_p8(poly8x8_t a,poly8x8_t b)9267 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
9268   return vmull_p8(a, b);
9269 }
9270 
9271 // CHECK-LABEL: define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) #0 {
9272 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9273 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9274 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
9275 // CHECK:   ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_p8(poly8x16_t a,poly8x16_t b)9276 poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
9277   return vmull_high_p8(a, b);
9278 }
9279 
9280 // CHECK-LABEL: define i64 @test_vaddd_s64(i64 %a, i64 %b) #0 {
9281 // CHECK:   [[VADDD_I:%.*]] = add i64 %a, %b
9282 // CHECK:   ret i64 [[VADDD_I]]
test_vaddd_s64(int64_t a,int64_t b)9283 int64_t test_vaddd_s64(int64_t a, int64_t b) {
9284   return vaddd_s64(a, b);
9285 }
9286 
9287 // CHECK-LABEL: define i64 @test_vaddd_u64(i64 %a, i64 %b) #0 {
9288 // CHECK:   [[VADDD_I:%.*]] = add i64 %a, %b
9289 // CHECK:   ret i64 [[VADDD_I]]
test_vaddd_u64(uint64_t a,uint64_t b)9290 uint64_t test_vaddd_u64(uint64_t a, uint64_t b) {
9291   return vaddd_u64(a, b);
9292 }
9293 
9294 // CHECK-LABEL: define i64 @test_vsubd_s64(i64 %a, i64 %b) #0 {
9295 // CHECK:   [[VSUBD_I:%.*]] = sub i64 %a, %b
9296 // CHECK:   ret i64 [[VSUBD_I]]
test_vsubd_s64(int64_t a,int64_t b)9297 int64_t test_vsubd_s64(int64_t a, int64_t b) {
9298   return vsubd_s64(a, b);
9299 }
9300 
9301 // CHECK-LABEL: define i64 @test_vsubd_u64(i64 %a, i64 %b) #0 {
9302 // CHECK:   [[VSUBD_I:%.*]] = sub i64 %a, %b
9303 // CHECK:   ret i64 [[VSUBD_I]]
test_vsubd_u64(uint64_t a,uint64_t b)9304 uint64_t test_vsubd_u64(uint64_t a, uint64_t b) {
9305   return vsubd_u64(a, b);
9306 }
9307 
9308 // CHECK-LABEL: define i8 @test_vqaddb_s8(i8 %a, i8 %b) #0 {
9309 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9310 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9311 // CHECK:   [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9312 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0
9313 // CHECK:   ret i8 [[TMP2]]
test_vqaddb_s8(int8_t a,int8_t b)9314 int8_t test_vqaddb_s8(int8_t a, int8_t b) {
9315   return vqaddb_s8(a, b);
9316 }
9317 
9318 // CHECK-LABEL: define i16 @test_vqaddh_s16(i16 %a, i16 %b) #0 {
9319 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9320 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9321 // CHECK:   [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9322 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0
9323 // CHECK:   ret i16 [[TMP2]]
test_vqaddh_s16(int16_t a,int16_t b)9324 int16_t test_vqaddh_s16(int16_t a, int16_t b) {
9325   return vqaddh_s16(a, b);
9326 }
9327 
9328 // CHECK-LABEL: define i32 @test_vqadds_s32(i32 %a, i32 %b) #0 {
9329 // CHECK:   [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b) #4
9330 // CHECK:   ret i32 [[VQADDS_S32_I]]
test_vqadds_s32(int32_t a,int32_t b)9331 int32_t test_vqadds_s32(int32_t a, int32_t b) {
9332   return vqadds_s32(a, b);
9333 }
9334 
9335 // CHECK-LABEL: define i64 @test_vqaddd_s64(i64 %a, i64 %b) #0 {
9336 // CHECK:   [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b) #4
9337 // CHECK:   ret i64 [[VQADDD_S64_I]]
test_vqaddd_s64(int64_t a,int64_t b)9338 int64_t test_vqaddd_s64(int64_t a, int64_t b) {
9339   return vqaddd_s64(a, b);
9340 }
9341 
9342 // CHECK-LABEL: define i8 @test_vqaddb_u8(i8 %a, i8 %b) #0 {
9343 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9344 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9345 // CHECK:   [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9346 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0
9347 // CHECK:   ret i8 [[TMP2]]
test_vqaddb_u8(uint8_t a,uint8_t b)9348 uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) {
9349   return vqaddb_u8(a, b);
9350 }
9351 
9352 // CHECK-LABEL: define i16 @test_vqaddh_u16(i16 %a, i16 %b) #0 {
9353 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9354 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9355 // CHECK:   [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9356 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0
9357 // CHECK:   ret i16 [[TMP2]]
test_vqaddh_u16(uint16_t a,uint16_t b)9358 uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) {
9359   return vqaddh_u16(a, b);
9360 }
9361 
9362 // CHECK-LABEL: define i32 @test_vqadds_u32(i32 %a, i32 %b) #0 {
9363 // CHECK:   [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b) #4
9364 // CHECK:   ret i32 [[VQADDS_U32_I]]
test_vqadds_u32(uint32_t a,uint32_t b)9365 uint32_t test_vqadds_u32(uint32_t a, uint32_t b) {
9366   return vqadds_u32(a, b);
9367 }
9368 
9369 // CHECK-LABEL: define i64 @test_vqaddd_u64(i64 %a, i64 %b) #0 {
9370 // CHECK:   [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b) #4
9371 // CHECK:   ret i64 [[VQADDD_U64_I]]
test_vqaddd_u64(uint64_t a,uint64_t b)9372 uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) {
9373   return vqaddd_u64(a, b);
9374 }
9375 
9376 // CHECK-LABEL: define i8 @test_vqsubb_s8(i8 %a, i8 %b) #0 {
9377 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9378 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9379 // CHECK:   [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9380 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0
9381 // CHECK:   ret i8 [[TMP2]]
test_vqsubb_s8(int8_t a,int8_t b)9382 int8_t test_vqsubb_s8(int8_t a, int8_t b) {
9383   return vqsubb_s8(a, b);
9384 }
9385 
9386 // CHECK-LABEL: define i16 @test_vqsubh_s16(i16 %a, i16 %b) #0 {
9387 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9388 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9389 // CHECK:   [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9390 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0
9391 // CHECK:   ret i16 [[TMP2]]
test_vqsubh_s16(int16_t a,int16_t b)9392 int16_t test_vqsubh_s16(int16_t a, int16_t b) {
9393   return vqsubh_s16(a, b);
9394 }
9395 
9396 // CHECK-LABEL: define i32 @test_vqsubs_s32(i32 %a, i32 %b) #0 {
9397 // CHECK:   [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b) #4
9398 // CHECK:   ret i32 [[VQSUBS_S32_I]]
test_vqsubs_s32(int32_t a,int32_t b)9399 int32_t test_vqsubs_s32(int32_t a, int32_t b) {
9400   return vqsubs_s32(a, b);
9401 }
9402 
9403 // CHECK-LABEL: define i64 @test_vqsubd_s64(i64 %a, i64 %b) #0 {
9404 // CHECK:   [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b) #4
9405 // CHECK:   ret i64 [[VQSUBD_S64_I]]
test_vqsubd_s64(int64_t a,int64_t b)9406 int64_t test_vqsubd_s64(int64_t a, int64_t b) {
9407   return vqsubd_s64(a, b);
9408 }
9409 
9410 // CHECK-LABEL: define i8 @test_vqsubb_u8(i8 %a, i8 %b) #0 {
9411 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9412 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9413 // CHECK:   [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9414 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0
9415 // CHECK:   ret i8 [[TMP2]]
test_vqsubb_u8(uint8_t a,uint8_t b)9416 uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) {
9417   return vqsubb_u8(a, b);
9418 }
9419 
9420 // CHECK-LABEL: define i16 @test_vqsubh_u16(i16 %a, i16 %b) #0 {
9421 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9422 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9423 // CHECK:   [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9424 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0
9425 // CHECK:   ret i16 [[TMP2]]
test_vqsubh_u16(uint16_t a,uint16_t b)9426 uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) {
9427   return vqsubh_u16(a, b);
9428 }
9429 
9430 // CHECK-LABEL: define i32 @test_vqsubs_u32(i32 %a, i32 %b) #0 {
9431 // CHECK:   [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b) #4
9432 // CHECK:   ret i32 [[VQSUBS_U32_I]]
test_vqsubs_u32(uint32_t a,uint32_t b)9433 uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) {
9434   return vqsubs_u32(a, b);
9435 }
9436 
9437 // CHECK-LABEL: define i64 @test_vqsubd_u64(i64 %a, i64 %b) #0 {
9438 // CHECK:   [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b) #4
9439 // CHECK:   ret i64 [[VQSUBD_U64_I]]
test_vqsubd_u64(uint64_t a,uint64_t b)9440 uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) {
9441   return vqsubd_u64(a, b);
9442 }
9443 
9444 // CHECK-LABEL: define i64 @test_vshld_s64(i64 %a, i64 %b) #0 {
9445 // CHECK:   [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b) #4
9446 // CHECK:   ret i64 [[VSHLD_S64_I]]
test_vshld_s64(int64_t a,int64_t b)9447 int64_t test_vshld_s64(int64_t a, int64_t b) {
9448   return vshld_s64(a, b);
9449 }
9450 
9451 // CHECK-LABEL: define i64 @test_vshld_u64(i64 %a, i64 %b) #0 {
9452 // CHECK:   [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b) #4
9453 // CHECK:   ret i64 [[VSHLD_U64_I]]
test_vshld_u64(uint64_t a,uint64_t b)9454 uint64_t test_vshld_u64(uint64_t a, uint64_t b) {
9455   return vshld_u64(a, b);
9456 }
9457 
9458 // CHECK-LABEL: define i8 @test_vqshlb_s8(i8 %a, i8 %b) #0 {
9459 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9460 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9461 // CHECK:   [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9462 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0
9463 // CHECK:   ret i8 [[TMP2]]
test_vqshlb_s8(int8_t a,int8_t b)9464 int8_t test_vqshlb_s8(int8_t a, int8_t b) {
9465   return vqshlb_s8(a, b);
9466 }
9467 
9468 // CHECK-LABEL: define i16 @test_vqshlh_s16(i16 %a, i16 %b) #0 {
9469 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9470 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9471 // CHECK:   [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9472 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0
9473 // CHECK:   ret i16 [[TMP2]]
test_vqshlh_s16(int16_t a,int16_t b)9474 int16_t test_vqshlh_s16(int16_t a, int16_t b) {
9475   return vqshlh_s16(a, b);
9476 }
9477 
9478 // CHECK-LABEL: define i32 @test_vqshls_s32(i32 %a, i32 %b) #0 {
9479 // CHECK:   [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b) #4
9480 // CHECK:   ret i32 [[VQSHLS_S32_I]]
test_vqshls_s32(int32_t a,int32_t b)9481 int32_t test_vqshls_s32(int32_t a, int32_t b) {
9482   return vqshls_s32(a, b);
9483 }
9484 
9485 // CHECK-LABEL: define i64 @test_vqshld_s64(i64 %a, i64 %b) #0 {
9486 // CHECK:   [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b) #4
9487 // CHECK:   ret i64 [[VQSHLD_S64_I]]
test_vqshld_s64(int64_t a,int64_t b)9488 int64_t test_vqshld_s64(int64_t a, int64_t b) {
9489   return vqshld_s64(a, b);
9490 }
9491 
9492 // CHECK-LABEL: define i8 @test_vqshlb_u8(i8 %a, i8 %b) #0 {
9493 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9494 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9495 // CHECK:   [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9496 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0
9497 // CHECK:   ret i8 [[TMP2]]
test_vqshlb_u8(uint8_t a,uint8_t b)9498 uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) {
9499   return vqshlb_u8(a, b);
9500 }
9501 
9502 // CHECK-LABEL: define i16 @test_vqshlh_u16(i16 %a, i16 %b) #0 {
9503 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9504 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9505 // CHECK:   [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9506 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0
9507 // CHECK:   ret i16 [[TMP2]]
test_vqshlh_u16(uint16_t a,uint16_t b)9508 uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) {
9509   return vqshlh_u16(a, b);
9510 }
9511 
9512 // CHECK-LABEL: define i32 @test_vqshls_u32(i32 %a, i32 %b) #0 {
9513 // CHECK:   [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b) #4
9514 // CHECK:   ret i32 [[VQSHLS_U32_I]]
test_vqshls_u32(uint32_t a,uint32_t b)9515 uint32_t test_vqshls_u32(uint32_t a, uint32_t b) {
9516   return vqshls_u32(a, b);
9517 }
9518 
9519 // CHECK-LABEL: define i64 @test_vqshld_u64(i64 %a, i64 %b) #0 {
9520 // CHECK:   [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b) #4
9521 // CHECK:   ret i64 [[VQSHLD_U64_I]]
test_vqshld_u64(uint64_t a,uint64_t b)9522 uint64_t test_vqshld_u64(uint64_t a, uint64_t b) {
9523   return vqshld_u64(a, b);
9524 }
9525 
9526 // CHECK-LABEL: define i64 @test_vrshld_s64(i64 %a, i64 %b) #0 {
9527 // CHECK:   [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b) #4
9528 // CHECK:   ret i64 [[VRSHLD_S64_I]]
test_vrshld_s64(int64_t a,int64_t b)9529 int64_t test_vrshld_s64(int64_t a, int64_t b) {
9530   return vrshld_s64(a, b);
9531 }
9532 
9533 
9534 // CHECK-LABEL: define i64 @test_vrshld_u64(i64 %a, i64 %b) #0 {
9535 // CHECK:   [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b) #4
9536 // CHECK:   ret i64 [[VRSHLD_U64_I]]
test_vrshld_u64(uint64_t a,uint64_t b)9537 uint64_t test_vrshld_u64(uint64_t a, uint64_t b) {
9538   return vrshld_u64(a, b);
9539 }
9540 
9541 // CHECK-LABEL: define i8 @test_vqrshlb_s8(i8 %a, i8 %b) #0 {
9542 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9543 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9544 // CHECK:   [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9545 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0
9546 // CHECK:   ret i8 [[TMP2]]
test_vqrshlb_s8(int8_t a,int8_t b)9547 int8_t test_vqrshlb_s8(int8_t a, int8_t b) {
9548   return vqrshlb_s8(a, b);
9549 }
9550 
9551 // CHECK-LABEL: define i16 @test_vqrshlh_s16(i16 %a, i16 %b) #0 {
9552 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9553 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9554 // CHECK:   [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9555 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0
9556 // CHECK:   ret i16 [[TMP2]]
test_vqrshlh_s16(int16_t a,int16_t b)9557 int16_t test_vqrshlh_s16(int16_t a, int16_t b) {
9558   return vqrshlh_s16(a, b);
9559 }
9560 
9561 // CHECK-LABEL: define i32 @test_vqrshls_s32(i32 %a, i32 %b) #0 {
9562 // CHECK:   [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b) #4
9563 // CHECK:   ret i32 [[VQRSHLS_S32_I]]
test_vqrshls_s32(int32_t a,int32_t b)9564 int32_t test_vqrshls_s32(int32_t a, int32_t b) {
9565   return vqrshls_s32(a, b);
9566 }
9567 
9568 // CHECK-LABEL: define i64 @test_vqrshld_s64(i64 %a, i64 %b) #0 {
9569 // CHECK:   [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b) #4
9570 // CHECK:   ret i64 [[VQRSHLD_S64_I]]
test_vqrshld_s64(int64_t a,int64_t b)9571 int64_t test_vqrshld_s64(int64_t a, int64_t b) {
9572   return vqrshld_s64(a, b);
9573 }
9574 
9575 // CHECK-LABEL: define i8 @test_vqrshlb_u8(i8 %a, i8 %b) #0 {
9576 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9577 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9578 // CHECK:   [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9579 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0
9580 // CHECK:   ret i8 [[TMP2]]
test_vqrshlb_u8(uint8_t a,uint8_t b)9581 uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) {
9582   return vqrshlb_u8(a, b);
9583 }
9584 
9585 // CHECK-LABEL: define i16 @test_vqrshlh_u16(i16 %a, i16 %b) #0 {
9586 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9587 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9588 // CHECK:   [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9589 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0
9590 // CHECK:   ret i16 [[TMP2]]
test_vqrshlh_u16(uint16_t a,uint16_t b)9591 uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) {
9592   return vqrshlh_u16(a, b);
9593 }
9594 
9595 // CHECK-LABEL: define i32 @test_vqrshls_u32(i32 %a, i32 %b) #0 {
9596 // CHECK:   [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b) #4
9597 // CHECK:   ret i32 [[VQRSHLS_U32_I]]
test_vqrshls_u32(uint32_t a,uint32_t b)9598 uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) {
9599   return vqrshls_u32(a, b);
9600 }
9601 
9602 // CHECK-LABEL: define i64 @test_vqrshld_u64(i64 %a, i64 %b) #0 {
9603 // CHECK:   [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b) #4
9604 // CHECK:   ret i64 [[VQRSHLD_U64_I]]
test_vqrshld_u64(uint64_t a,uint64_t b)9605 uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) {
9606   return vqrshld_u64(a, b);
9607 }
9608 
9609 // CHECK-LABEL: define i64 @test_vpaddd_s64(<2 x i64> %a) #0 {
9610 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9611 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9612 // CHECK:   [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4
9613 // CHECK:   ret i64 [[VPADDD_S64_I]]
test_vpaddd_s64(int64x2_t a)9614 int64_t test_vpaddd_s64(int64x2_t a) {
9615   return vpaddd_s64(a);
9616 }
9617 
9618 // CHECK-LABEL: define float @test_vpadds_f32(<2 x float> %a) #0 {
9619 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9620 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
9621 // CHECK:   [[LANE0_I:%.*]] = extractelement <2 x float> [[TMP1]], i64 0
9622 // CHECK:   [[LANE1_I:%.*]] = extractelement <2 x float> [[TMP1]], i64 1
9623 // CHECK:   [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
9624 // CHECK:   ret float [[VPADDD_I]]
test_vpadds_f32(float32x2_t a)9625 float32_t test_vpadds_f32(float32x2_t a) {
9626   return vpadds_f32(a);
9627 }
9628 
9629 // CHECK-LABEL: define double @test_vpaddd_f64(<2 x double> %a) #0 {
9630 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
9631 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
9632 // CHECK:   [[LANE0_I:%.*]] = extractelement <2 x double> [[TMP1]], i64 0
9633 // CHECK:   [[LANE1_I:%.*]] = extractelement <2 x double> [[TMP1]], i64 1
9634 // CHECK:   [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
9635 // CHECK:   ret double [[VPADDD_I]]
test_vpaddd_f64(float64x2_t a)9636 float64_t test_vpaddd_f64(float64x2_t a) {
9637   return vpaddd_f64(a);
9638 }
9639 
9640 // CHECK-LABEL: define float @test_vpmaxnms_f32(<2 x float> %a) #0 {
9641 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9642 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
9643 // CHECK:   [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[TMP1]]) #4
9644 // CHECK:   ret float [[VPMAXNMS_F32_I]]
test_vpmaxnms_f32(float32x2_t a)9645 float32_t test_vpmaxnms_f32(float32x2_t a) {
9646   return vpmaxnms_f32(a);
9647 }
9648 
9649 // CHECK-LABEL: define double @test_vpmaxnmqd_f64(<2 x double> %a) #0 {
9650 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
9651 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
9652 // CHECK:   [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[TMP1]]) #4
9653 // CHECK:   ret double [[VPMAXNMQD_F64_I]]
test_vpmaxnmqd_f64(float64x2_t a)9654 float64_t test_vpmaxnmqd_f64(float64x2_t a) {
9655   return vpmaxnmqd_f64(a);
9656 }
9657 
9658 // CHECK-LABEL: define float @test_vpmaxs_f32(<2 x float> %a) #0 {
9659 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9660 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
9661 // CHECK:   [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[TMP1]]) #4
9662 // CHECK:   ret float [[VPMAXS_F32_I]]
test_vpmaxs_f32(float32x2_t a)9663 float32_t test_vpmaxs_f32(float32x2_t a) {
9664   return vpmaxs_f32(a);
9665 }
9666 
9667 // CHECK-LABEL: define double @test_vpmaxqd_f64(<2 x double> %a) #0 {
9668 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
9669 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
9670 // CHECK:   [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[TMP1]]) #4
9671 // CHECK:   ret double [[VPMAXQD_F64_I]]
test_vpmaxqd_f64(float64x2_t a)9672 float64_t test_vpmaxqd_f64(float64x2_t a) {
9673   return vpmaxqd_f64(a);
9674 }
9675 
9676 // CHECK-LABEL: define float @test_vpminnms_f32(<2 x float> %a) #0 {
9677 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9678 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
9679 // CHECK:   [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[TMP1]]) #4
9680 // CHECK:   ret float [[VPMINNMS_F32_I]]
test_vpminnms_f32(float32x2_t a)9681 float32_t test_vpminnms_f32(float32x2_t a) {
9682   return vpminnms_f32(a);
9683 }
9684 
9685 // CHECK-LABEL: define double @test_vpminnmqd_f64(<2 x double> %a) #0 {
9686 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
9687 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
9688 // CHECK:   [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[TMP1]]) #4
9689 // CHECK:   ret double [[VPMINNMQD_F64_I]]
test_vpminnmqd_f64(float64x2_t a)9690 float64_t test_vpminnmqd_f64(float64x2_t a) {
9691   return vpminnmqd_f64(a);
9692 }
9693 
9694 // CHECK-LABEL: define float @test_vpmins_f32(<2 x float> %a) #0 {
9695 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9696 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
9697 // CHECK:   [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[TMP1]]) #4
9698 // CHECK:   ret float [[VPMINS_F32_I]]
test_vpmins_f32(float32x2_t a)9699 float32_t test_vpmins_f32(float32x2_t a) {
9700   return vpmins_f32(a);
9701 }
9702 
9703 // CHECK-LABEL: define double @test_vpminqd_f64(<2 x double> %a) #0 {
9704 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
9705 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
9706 // CHECK:   [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[TMP1]]) #4
9707 // CHECK:   ret double [[VPMINQD_F64_I]]
test_vpminqd_f64(float64x2_t a)9708 float64_t test_vpminqd_f64(float64x2_t a) {
9709   return vpminqd_f64(a);
9710 }
9711 
9712 // CHECK-LABEL: define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) #0 {
9713 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9714 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9715 // CHECK:   [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9716 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
9717 // CHECK:   ret i16 [[TMP2]]
test_vqdmulhh_s16(int16_t a,int16_t b)9718 int16_t test_vqdmulhh_s16(int16_t a, int16_t b) {
9719   return vqdmulhh_s16(a, b);
9720 }
9721 
9722 // CHECK-LABEL: define i32 @test_vqdmulhs_s32(i32 %a, i32 %b) #0 {
9723 // CHECK:   [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b) #4
9724 // CHECK:   ret i32 [[VQDMULHS_S32_I]]
test_vqdmulhs_s32(int32_t a,int32_t b)9725 int32_t test_vqdmulhs_s32(int32_t a, int32_t b) {
9726   return vqdmulhs_s32(a, b);
9727 }
9728 
9729 // CHECK-LABEL: define i16 @test_vqrdmulhh_s16(i16 %a, i16 %b) #0 {
9730 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9731 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9732 // CHECK:   [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9733 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
9734 // CHECK:   ret i16 [[TMP2]]
test_vqrdmulhh_s16(int16_t a,int16_t b)9735 int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) {
9736   return vqrdmulhh_s16(a, b);
9737 }
9738 
9739 // CHECK-LABEL: define i32 @test_vqrdmulhs_s32(i32 %a, i32 %b) #0 {
9740 // CHECK:   [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b) #4
9741 // CHECK:   ret i32 [[VQRDMULHS_S32_I]]
test_vqrdmulhs_s32(int32_t a,int32_t b)9742 int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) {
9743   return vqrdmulhs_s32(a, b);
9744 }
9745 
9746 // CHECK-LABEL: define float @test_vmulxs_f32(float %a, float %b) #0 {
9747 // CHECK:   [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) #4
9748 // CHECK:   ret float [[VMULXS_F32_I]]
test_vmulxs_f32(float32_t a,float32_t b)9749 float32_t test_vmulxs_f32(float32_t a, float32_t b) {
9750   return vmulxs_f32(a, b);
9751 }
9752 
9753 // CHECK-LABEL: define double @test_vmulxd_f64(double %a, double %b) #0 {
9754 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) #4
9755 // CHECK:   ret double [[VMULXD_F64_I]]
test_vmulxd_f64(float64_t a,float64_t b)9756 float64_t test_vmulxd_f64(float64_t a, float64_t b) {
9757   return vmulxd_f64(a, b);
9758 }
9759 
9760 // CHECK-LABEL: define <1 x double> @test_vmulx_f64(<1 x double> %a, <1 x double> %b) #0 {
9761 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
9762 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
9763 // CHECK:   [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
9764 // CHECK:   [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
9765 // CHECK:   [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> [[VMULX_I]], <1 x double> [[VMULX1_I]]) #4
9766 // CHECK:   ret <1 x double> [[VMULX2_I]]
test_vmulx_f64(float64x1_t a,float64x1_t b)9767 float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) {
9768   return vmulx_f64(a, b);
9769 }
9770 
9771 // CHECK-LABEL: define float @test_vrecpss_f32(float %a, float %b) #0 {
9772 // CHECK:   [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b) #4
9773 // CHECK:   ret float [[VRECPS_I]]
test_vrecpss_f32(float32_t a,float32_t b)9774 float32_t test_vrecpss_f32(float32_t a, float32_t b) {
9775   return vrecpss_f32(a, b);
9776 }
9777 
9778 // CHECK-LABEL: define double @test_vrecpsd_f64(double %a, double %b) #0 {
9779 // CHECK:   [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b) #4
9780 // CHECK:   ret double [[VRECPS_I]]
test_vrecpsd_f64(float64_t a,float64_t b)9781 float64_t test_vrecpsd_f64(float64_t a, float64_t b) {
9782   return vrecpsd_f64(a, b);
9783 }
9784 
9785 // CHECK-LABEL: define float @test_vrsqrtss_f32(float %a, float %b) #0 {
9786 // CHECK:   [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) #4
9787 // CHECK:   ret float [[VRSQRTSS_F32_I]]
test_vrsqrtss_f32(float32_t a,float32_t b)9788 float32_t test_vrsqrtss_f32(float32_t a, float32_t b) {
9789   return vrsqrtss_f32(a, b);
9790 }
9791 
9792 // CHECK-LABEL: define double @test_vrsqrtsd_f64(double %a, double %b) #0 {
9793 // CHECK:   [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) #4
9794 // CHECK:   ret double [[VRSQRTSD_F64_I]]
test_vrsqrtsd_f64(float64_t a,float64_t b)9795 float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) {
9796   return vrsqrtsd_f64(a, b);
9797 }
9798 
9799 // CHECK-LABEL: define float @test_vcvts_f32_s32(i32 %a) #0 {
9800 // CHECK:   [[TMP0:%.*]] = sitofp i32 %a to float
9801 // CHECK:   ret float [[TMP0]]
test_vcvts_f32_s32(int32_t a)9802 float32_t test_vcvts_f32_s32(int32_t a) {
9803   return vcvts_f32_s32(a);
9804 }
9805 
9806 // CHECK-LABEL: define double @test_vcvtd_f64_s64(i64 %a) #0 {
9807 // CHECK:   [[TMP0:%.*]] = sitofp i64 %a to double
9808 // CHECK:   ret double [[TMP0]]
test_vcvtd_f64_s64(int64_t a)9809 float64_t test_vcvtd_f64_s64(int64_t a) {
9810   return vcvtd_f64_s64(a);
9811 }
9812 
9813 // CHECK-LABEL: define float @test_vcvts_f32_u32(i32 %a) #0 {
9814 // CHECK:   [[TMP0:%.*]] = uitofp i32 %a to float
9815 // CHECK:   ret float [[TMP0]]
test_vcvts_f32_u32(uint32_t a)9816 float32_t test_vcvts_f32_u32(uint32_t a) {
9817   return vcvts_f32_u32(a);
9818 }
9819 
9820 // CHECK-LABEL: define double @test_vcvtd_f64_u64(i64 %a) #0 {
9821 // CHECK:   [[TMP0:%.*]] = uitofp i64 %a to double
9822 // CHECK:   ret double [[TMP0]]
test_vcvtd_f64_u64(uint64_t a)9823 float64_t test_vcvtd_f64_u64(uint64_t a) {
9824   return vcvtd_f64_u64(a);
9825 }
9826 
9827 // CHECK-LABEL: define float @test_vrecpes_f32(float %a) #0 {
9828 // CHECK:   [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a) #4
9829 // CHECK:   ret float [[VRECPES_F32_I]]
test_vrecpes_f32(float32_t a)9830 float32_t test_vrecpes_f32(float32_t a) {
9831   return vrecpes_f32(a);
9832 }
9833 
9834 // CHECK-LABEL: define double @test_vrecped_f64(double %a) #0 {
9835 // CHECK:   [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a) #4
9836 // CHECK:   ret double [[VRECPED_F64_I]]
test_vrecped_f64(float64_t a)9837 float64_t test_vrecped_f64(float64_t a) {
9838   return vrecped_f64(a);
9839 }
9840 
9841 // CHECK-LABEL: define float @test_vrecpxs_f32(float %a) #0 {
9842 // CHECK:   [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a) #4
9843 // CHECK:   ret float [[VRECPXS_F32_I]]
test_vrecpxs_f32(float32_t a)9844 float32_t test_vrecpxs_f32(float32_t a) {
9845   return vrecpxs_f32(a);
9846  }
9847 
9848 // CHECK-LABEL: define double @test_vrecpxd_f64(double %a) #0 {
9849 // CHECK:   [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a) #4
9850 // CHECK:   ret double [[VRECPXD_F64_I]]
test_vrecpxd_f64(float64_t a)9851 float64_t test_vrecpxd_f64(float64_t a) {
9852   return vrecpxd_f64(a);
9853 }
9854 
9855 // CHECK-LABEL: define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) #0 {
9856 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9857 // CHECK:   [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9858 // CHECK:   [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> [[VRSQRTE_V_I]]) #4
9859 // CHECK:   ret <2 x i32> [[VRSQRTE_V1_I]]
test_vrsqrte_u32(uint32x2_t a)9860 uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
9861   return vrsqrte_u32(a);
9862 }
9863 
9864 // CHECK-LABEL: define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) #0 {
9865 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9866 // CHECK:   [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9867 // CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> [[VRSQRTEQ_V_I]]) #4
9868 // CHECK:   ret <4 x i32> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_u32(uint32x4_t a)9869 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
9870   return vrsqrteq_u32(a);
9871 }
9872 
9873 // CHECK-LABEL: define float @test_vrsqrtes_f32(float %a) #0 {
9874 // CHECK:   [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a) #4
9875 // CHECK:   ret float [[VRSQRTES_F32_I]]
test_vrsqrtes_f32(float32_t a)9876 float32_t test_vrsqrtes_f32(float32_t a) {
9877   return vrsqrtes_f32(a);
9878 }
9879 
9880 // CHECK-LABEL: define double @test_vrsqrted_f64(double %a) #0 {
9881 // CHECK:   [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a) #4
9882 // CHECK:   ret double [[VRSQRTED_F64_I]]
test_vrsqrted_f64(float64_t a)9883 float64_t test_vrsqrted_f64(float64_t a) {
9884   return vrsqrted_f64(a);
9885 }
9886 
9887 // CHECK-LABEL: define <16 x i8> @test_vld1q_u8(i8* %a) #0 {
9888 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9889 // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
9890 // CHECK:   ret <16 x i8> [[TMP1]]
test_vld1q_u8(uint8_t const * a)9891 uint8x16_t test_vld1q_u8(uint8_t const *a) {
9892   return vld1q_u8(a);
9893 }
9894 
9895 // CHECK-LABEL: define <8 x i16> @test_vld1q_u16(i16* %a) #0 {
9896 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9897 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9898 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9899 // CHECK:   ret <8 x i16> [[TMP2]]
test_vld1q_u16(uint16_t const * a)9900 uint16x8_t test_vld1q_u16(uint16_t const *a) {
9901   return vld1q_u16(a);
9902 }
9903 
9904 // CHECK-LABEL: define <4 x i32> @test_vld1q_u32(i32* %a) #0 {
9905 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
9906 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
9907 // CHECK:   [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
9908 // CHECK:   ret <4 x i32> [[TMP2]]
test_vld1q_u32(uint32_t const * a)9909 uint32x4_t test_vld1q_u32(uint32_t const *a) {
9910   return vld1q_u32(a);
9911 }
9912 
9913 // CHECK-LABEL: define <2 x i64> @test_vld1q_u64(i64* %a) #0 {
9914 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
9915 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
9916 // CHECK:   [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
9917 // CHECK:   ret <2 x i64> [[TMP2]]
test_vld1q_u64(uint64_t const * a)9918 uint64x2_t test_vld1q_u64(uint64_t const *a) {
9919   return vld1q_u64(a);
9920 }
9921 
9922 // CHECK-LABEL: define <16 x i8> @test_vld1q_s8(i8* %a) #0 {
9923 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9924 // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
9925 // CHECK:   ret <16 x i8> [[TMP1]]
test_vld1q_s8(int8_t const * a)9926 int8x16_t test_vld1q_s8(int8_t const *a) {
9927   return vld1q_s8(a);
9928 }
9929 
9930 // CHECK-LABEL: define <8 x i16> @test_vld1q_s16(i16* %a) #0 {
9931 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9932 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9933 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9934 // CHECK:   ret <8 x i16> [[TMP2]]
test_vld1q_s16(int16_t const * a)9935 int16x8_t test_vld1q_s16(int16_t const *a) {
9936   return vld1q_s16(a);
9937 }
9938 
9939 // CHECK-LABEL: define <4 x i32> @test_vld1q_s32(i32* %a) #0 {
9940 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
9941 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
9942 // CHECK:   [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
9943 // CHECK:   ret <4 x i32> [[TMP2]]
test_vld1q_s32(int32_t const * a)9944 int32x4_t test_vld1q_s32(int32_t const *a) {
9945   return vld1q_s32(a);
9946 }
9947 
9948 // CHECK-LABEL: define <2 x i64> @test_vld1q_s64(i64* %a) #0 {
9949 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
9950 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
9951 // CHECK:   [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
9952 // CHECK:   ret <2 x i64> [[TMP2]]
test_vld1q_s64(int64_t const * a)9953 int64x2_t test_vld1q_s64(int64_t const *a) {
9954   return vld1q_s64(a);
9955 }
9956 
9957 // CHECK-LABEL: define <8 x half> @test_vld1q_f16(half* %a) #0 {
9958 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
9959 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9960 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9961 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <8 x half>
9962 // CHECK:   ret <8 x half> [[TMP3]]
test_vld1q_f16(float16_t const * a)9963 float16x8_t test_vld1q_f16(float16_t const *a) {
9964   return vld1q_f16(a);
9965 }
9966 
9967 // CHECK-LABEL: define <4 x float> @test_vld1q_f32(float* %a) #0 {
9968 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
9969 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
9970 // CHECK:   [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]]
9971 // CHECK:   ret <4 x float> [[TMP2]]
test_vld1q_f32(float32_t const * a)9972 float32x4_t test_vld1q_f32(float32_t const *a) {
9973   return vld1q_f32(a);
9974 }
9975 
9976 // CHECK-LABEL: define <2 x double> @test_vld1q_f64(double* %a) #0 {
9977 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
9978 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
9979 // CHECK:   [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]]
9980 // CHECK:   ret <2 x double> [[TMP2]]
test_vld1q_f64(float64_t const * a)9981 float64x2_t test_vld1q_f64(float64_t const *a) {
9982   return vld1q_f64(a);
9983 }
9984 
9985 // CHECK-LABEL: define <16 x i8> @test_vld1q_p8(i8* %a) #0 {
9986 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9987 // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
9988 // CHECK:   ret <16 x i8> [[TMP1]]
test_vld1q_p8(poly8_t const * a)9989 poly8x16_t test_vld1q_p8(poly8_t const *a) {
9990   return vld1q_p8(a);
9991 }
9992 
9993 // CHECK-LABEL: define <8 x i16> @test_vld1q_p16(i16* %a) #0 {
9994 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9995 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9996 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9997 // CHECK:   ret <8 x i16> [[TMP2]]
test_vld1q_p16(poly16_t const * a)9998 poly16x8_t test_vld1q_p16(poly16_t const *a) {
9999   return vld1q_p16(a);
10000 }
10001 
10002 // CHECK-LABEL: define <8 x i8> @test_vld1_u8(i8* %a) #0 {
10003 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10004 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
10005 // CHECK:   ret <8 x i8> [[TMP1]]
test_vld1_u8(uint8_t const * a)10006 uint8x8_t test_vld1_u8(uint8_t const *a) {
10007   return vld1_u8(a);
10008 }
10009 
10010 // CHECK-LABEL: define <4 x i16> @test_vld1_u16(i16* %a) #0 {
10011 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10012 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10013 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
10014 // CHECK:   ret <4 x i16> [[TMP2]]
test_vld1_u16(uint16_t const * a)10015 uint16x4_t test_vld1_u16(uint16_t const *a) {
10016   return vld1_u16(a);
10017 }
10018 
10019 // CHECK-LABEL: define <2 x i32> @test_vld1_u32(i32* %a) #0 {
10020 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10021 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10022 // CHECK:   [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
10023 // CHECK:   ret <2 x i32> [[TMP2]]
test_vld1_u32(uint32_t const * a)10024 uint32x2_t test_vld1_u32(uint32_t const *a) {
10025   return vld1_u32(a);
10026 }
10027 
10028 // CHECK-LABEL: define <1 x i64> @test_vld1_u64(i64* %a) #0 {
10029 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10030 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10031 // CHECK:   [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
10032 // CHECK:   ret <1 x i64> [[TMP2]]
test_vld1_u64(uint64_t const * a)10033 uint64x1_t test_vld1_u64(uint64_t const *a) {
10034   return vld1_u64(a);
10035 }
10036 
10037 // CHECK-LABEL: define <8 x i8> @test_vld1_s8(i8* %a) #0 {
10038 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10039 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
10040 // CHECK:   ret <8 x i8> [[TMP1]]
test_vld1_s8(int8_t const * a)10041 int8x8_t test_vld1_s8(int8_t const *a) {
10042   return vld1_s8(a);
10043 }
10044 
10045 // CHECK-LABEL: define <4 x i16> @test_vld1_s16(i16* %a) #0 {
10046 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10047 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10048 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
10049 // CHECK:   ret <4 x i16> [[TMP2]]
test_vld1_s16(int16_t const * a)10050 int16x4_t test_vld1_s16(int16_t const *a) {
10051   return vld1_s16(a);
10052 }
10053 
10054 // CHECK-LABEL: define <2 x i32> @test_vld1_s32(i32* %a) #0 {
10055 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10056 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10057 // CHECK:   [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
10058 // CHECK:   ret <2 x i32> [[TMP2]]
test_vld1_s32(int32_t const * a)10059 int32x2_t test_vld1_s32(int32_t const *a) {
10060   return vld1_s32(a);
10061 }
10062 
10063 // CHECK-LABEL: define <1 x i64> @test_vld1_s64(i64* %a) #0 {
10064 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10065 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10066 // CHECK:   [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
10067 // CHECK:   ret <1 x i64> [[TMP2]]
test_vld1_s64(int64_t const * a)10068 int64x1_t test_vld1_s64(int64_t const *a) {
10069   return vld1_s64(a);
10070 }
10071 
10072 // CHECK-LABEL: define <4 x half> @test_vld1_f16(half* %a) #0 {
10073 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
10074 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10075 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
10076 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
10077 // CHECK:   ret <4 x half> [[TMP3]]
test_vld1_f16(float16_t const * a)10078 float16x4_t test_vld1_f16(float16_t const *a) {
10079   return vld1_f16(a);
10080 }
10081 
10082 // CHECK-LABEL: define <2 x float> @test_vld1_f32(float* %a) #0 {
10083 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
10084 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
10085 // CHECK:   [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]]
10086 // CHECK:   ret <2 x float> [[TMP2]]
test_vld1_f32(float32_t const * a)10087 float32x2_t test_vld1_f32(float32_t const *a) {
10088   return vld1_f32(a);
10089 }
10090 
10091 // CHECK-LABEL: define <1 x double> @test_vld1_f64(double* %a) #0 {
10092 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
10093 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
10094 // CHECK:   [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]]
10095 // CHECK:   ret <1 x double> [[TMP2]]
test_vld1_f64(float64_t const * a)10096 float64x1_t test_vld1_f64(float64_t const *a) {
10097   return vld1_f64(a);
10098 }
10099 
10100 // CHECK-LABEL: define <8 x i8> @test_vld1_p8(i8* %a) #0 {
10101 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10102 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
10103 // CHECK:   ret <8 x i8> [[TMP1]]
test_vld1_p8(poly8_t const * a)10104 poly8x8_t test_vld1_p8(poly8_t const *a) {
10105   return vld1_p8(a);
10106 }
10107 
10108 // CHECK-LABEL: define <4 x i16> @test_vld1_p16(i16* %a) #0 {
10109 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10110 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10111 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
10112 // CHECK:   ret <4 x i16> [[TMP2]]
test_vld1_p16(poly16_t const * a)10113 poly16x4_t test_vld1_p16(poly16_t const *a) {
10114   return vld1_p16(a);
10115 }
10116 
10117 // CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_u8(i8* %a) #0 {
10118 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
10119 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
10120 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
10121 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10122 // CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10123 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
10124 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
10125 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
10126 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
10127 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
10128 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
10129 // CHECK:   ret %struct.uint8x16x2_t [[TMP5]]
test_vld2q_u8(uint8_t const * a)10130 uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
10131   return vld2q_u8(a);
10132 }
10133 
10134 // CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_u16(i16* %a) #0 {
10135 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
10136 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
10137 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
10138 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10139 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10140 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10141 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
10142 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
10143 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
10144 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
10145 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10146 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
10147 // CHECK:   ret %struct.uint16x8x2_t [[TMP6]]
test_vld2q_u16(uint16_t const * a)10148 uint16x8x2_t test_vld2q_u16(uint16_t const *a) {
10149   return vld2q_u16(a);
10150 }
10151 
10152 // CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_u32(i32* %a) #0 {
10153 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
10154 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
10155 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
10156 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10157 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10158 // CHECK:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10159 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
10160 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
10161 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
10162 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
10163 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10164 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
10165 // CHECK:   ret %struct.uint32x4x2_t [[TMP6]]
test_vld2q_u32(uint32_t const * a)10166 uint32x4x2_t test_vld2q_u32(uint32_t const *a) {
10167   return vld2q_u32(a);
10168 }
10169 
10170 // CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_u64(i64* %a) #0 {
10171 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
10172 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
10173 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
10174 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10175 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10176 // CHECK:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10177 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
10178 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
10179 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
10180 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
10181 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10182 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
10183 // CHECK:   ret %struct.uint64x2x2_t [[TMP6]]
test_vld2q_u64(uint64_t const * a)10184 uint64x2x2_t test_vld2q_u64(uint64_t const *a) {
10185   return vld2q_u64(a);
10186 }
10187 
10188 // CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_s8(i8* %a) #0 {
10189 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
10190 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
10191 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
10192 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10193 // CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10194 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
10195 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
10196 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
10197 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
10198 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
10199 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
10200 // CHECK:   ret %struct.int8x16x2_t [[TMP5]]
test_vld2q_s8(int8_t const * a)10201 int8x16x2_t test_vld2q_s8(int8_t const *a) {
10202   return vld2q_s8(a);
10203 }
10204 
10205 // CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_s16(i16* %a) #0 {
10206 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
10207 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
10208 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
10209 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10210 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10211 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10212 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
10213 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
10214 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
10215 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
10216 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10217 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
10218 // CHECK:   ret %struct.int16x8x2_t [[TMP6]]
test_vld2q_s16(int16_t const * a)10219 int16x8x2_t test_vld2q_s16(int16_t const *a) {
10220   return vld2q_s16(a);
10221 }
10222 
10223 // CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_s32(i32* %a) #0 {
10224 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
10225 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
10226 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
10227 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10228 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10229 // CHECK:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10230 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
10231 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
10232 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
10233 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
10234 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10235 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
10236 // CHECK:   ret %struct.int32x4x2_t [[TMP6]]
test_vld2q_s32(int32_t const * a)10237 int32x4x2_t test_vld2q_s32(int32_t const *a) {
10238   return vld2q_s32(a);
10239 }
10240 
10241 // CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_s64(i64* %a) #0 {
10242 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
10243 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
10244 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
10245 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10246 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10247 // CHECK:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10248 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
10249 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
10250 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
10251 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
10252 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10253 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
10254 // CHECK:   ret %struct.int64x2x2_t [[TMP6]]
test_vld2q_s64(int64_t const * a)10255 int64x2x2_t test_vld2q_s64(int64_t const *a) {
10256   return vld2q_s64(a);
10257 }
10258 
10259 // CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_f16(half* %a) #0 {
10260 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
10261 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
10262 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
10263 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10264 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10265 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10266 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
10267 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
10268 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
10269 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
10270 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10271 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
10272 // CHECK:   ret %struct.float16x8x2_t [[TMP6]]
test_vld2q_f16(float16_t const * a)10273 float16x8x2_t test_vld2q_f16(float16_t const *a) {
10274   return vld2q_f16(a);
10275 }
10276 
10277 // CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_f32(float* %a) #0 {
10278 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
10279 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
10280 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
10281 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10282 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
10283 // CHECK:   [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP2]])
10284 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
10285 // CHECK:   store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]]
10286 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
10287 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
10288 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10289 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
10290 // CHECK:   ret %struct.float32x4x2_t [[TMP6]]
test_vld2q_f32(float32_t const * a)10291 float32x4x2_t test_vld2q_f32(float32_t const *a) {
10292   return vld2q_f32(a);
10293 }
10294 
10295 // CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_f64(double* %a) #0 {
10296 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
10297 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
10298 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
10299 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10300 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
10301 // CHECK:   [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0v2f64(<2 x double>* [[TMP2]])
10302 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
10303 // CHECK:   store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]]
10304 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
10305 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
10306 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10307 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
10308 // CHECK:   ret %struct.float64x2x2_t [[TMP6]]
test_vld2q_f64(float64_t const * a)10309 float64x2x2_t test_vld2q_f64(float64_t const *a) {
10310   return vld2q_f64(a);
10311 }
10312 
10313 // CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_p8(i8* %a) #0 {
10314 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
10315 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
10316 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
10317 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10318 // CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10319 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
10320 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
10321 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
10322 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
10323 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
10324 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
10325 // CHECK:   ret %struct.poly8x16x2_t [[TMP5]]
test_vld2q_p8(poly8_t const * a)10326 poly8x16x2_t test_vld2q_p8(poly8_t const *a) {
10327   return vld2q_p8(a);
10328 }
10329 
10330 // CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_p16(i16* %a) #0 {
10331 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
10332 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
10333 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
10334 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10335 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10336 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10337 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
10338 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
10339 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
10340 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
10341 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10342 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
10343 // CHECK:   ret %struct.poly16x8x2_t [[TMP6]]
test_vld2q_p16(poly16_t const * a)10344 poly16x8x2_t test_vld2q_p16(poly16_t const *a) {
10345   return vld2q_p16(a);
10346 }
10347 
10348 // CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_u8(i8* %a) #0 {
10349 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
10350 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
10351 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
10352 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10353 // CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10354 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
10355 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
10356 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
10357 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
10358 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
10359 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
10360 // CHECK:   ret %struct.uint8x8x2_t [[TMP5]]
test_vld2_u8(uint8_t const * a)10361 uint8x8x2_t test_vld2_u8(uint8_t const *a) {
10362   return vld2_u8(a);
10363 }
10364 
10365 // CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_u16(i16* %a) #0 {
10366 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
10367 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
10368 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
10369 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10370 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10371 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10372 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
10373 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
10374 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
10375 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
10376 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10377 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
10378 // CHECK:   ret %struct.uint16x4x2_t [[TMP6]]
test_vld2_u16(uint16_t const * a)10379 uint16x4x2_t test_vld2_u16(uint16_t const *a) {
10380   return vld2_u16(a);
10381 }
10382 
10383 // CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_u32(i32* %a) #0 {
10384 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
10385 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
10386 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
10387 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10388 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10389 // CHECK:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10390 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
10391 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
10392 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
10393 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
10394 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10395 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
10396 // CHECK:   ret %struct.uint32x2x2_t [[TMP6]]
test_vld2_u32(uint32_t const * a)10397 uint32x2x2_t test_vld2_u32(uint32_t const *a) {
10398   return vld2_u32(a);
10399 }
10400 
10401 // CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_u64(i64* %a) #0 {
10402 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
10403 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
10404 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
10405 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10406 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10407 // CHECK:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10408 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
10409 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
10410 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
10411 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
10412 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10413 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
10414 // CHECK:   ret %struct.uint64x1x2_t [[TMP6]]
test_vld2_u64(uint64_t const * a)10415 uint64x1x2_t test_vld2_u64(uint64_t const *a) {
10416   return vld2_u64(a);
10417 }
10418 
10419 // CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_s8(i8* %a) #0 {
10420 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
10421 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
10422 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
10423 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10424 // CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10425 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
10426 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
10427 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
10428 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
10429 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
10430 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
10431 // CHECK:   ret %struct.int8x8x2_t [[TMP5]]
test_vld2_s8(int8_t const * a)10432 int8x8x2_t test_vld2_s8(int8_t const *a) {
10433   return vld2_s8(a);
10434 }
10435 
10436 // CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_s16(i16* %a) #0 {
10437 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
10438 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
10439 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
10440 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10441 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10442 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10443 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
10444 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
10445 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
10446 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
10447 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10448 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
10449 // CHECK:   ret %struct.int16x4x2_t [[TMP6]]
test_vld2_s16(int16_t const * a)10450 int16x4x2_t test_vld2_s16(int16_t const *a) {
10451   return vld2_s16(a);
10452 }
10453 
10454 // CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_s32(i32* %a) #0 {
10455 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
10456 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
10457 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
10458 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10459 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10460 // CHECK:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10461 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
10462 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
10463 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
10464 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
10465 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10466 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
10467 // CHECK:   ret %struct.int32x2x2_t [[TMP6]]
test_vld2_s32(int32_t const * a)10468 int32x2x2_t test_vld2_s32(int32_t const *a) {
10469   return vld2_s32(a);
10470 }
10471 
10472 // CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_s64(i64* %a) #0 {
10473 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
10474 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
10475 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
10476 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10477 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10478 // CHECK:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10479 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
10480 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
10481 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
10482 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
10483 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10484 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
10485 // CHECK:   ret %struct.int64x1x2_t [[TMP6]]
test_vld2_s64(int64_t const * a)10486 int64x1x2_t test_vld2_s64(int64_t const *a) {
10487   return vld2_s64(a);
10488 }
10489 
10490 // CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_f16(half* %a) #0 {
10491 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
10492 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
10493 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
10494 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10495 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10496 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10497 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
10498 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
10499 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
10500 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
10501 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10502 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
10503 // CHECK:   ret %struct.float16x4x2_t [[TMP6]]
test_vld2_f16(float16_t const * a)10504 float16x4x2_t test_vld2_f16(float16_t const *a) {
10505   return vld2_f16(a);
10506 }
10507 
10508 // CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_f32(float* %a) #0 {
10509 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
10510 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
10511 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
10512 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10513 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10514 // CHECK:   [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10515 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
10516 // CHECK:   store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]]
10517 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
10518 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
10519 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10520 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
10521 // CHECK:   ret %struct.float32x2x2_t [[TMP6]]
test_vld2_f32(float32_t const * a)10522 float32x2x2_t test_vld2_f32(float32_t const *a) {
10523   return vld2_f32(a);
10524 }
10525 
10526 // CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_f64(double* %a) #0 {
10527 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
10528 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
10529 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
10530 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10531 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10532 // CHECK:   [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10533 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
10534 // CHECK:   store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]]
10535 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
10536 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
10537 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10538 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
10539 // CHECK:   ret %struct.float64x1x2_t [[TMP6]]
test_vld2_f64(float64_t const * a)10540 float64x1x2_t test_vld2_f64(float64_t const *a) {
10541   return vld2_f64(a);
10542 }
10543 
10544 // CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_p8(i8* %a) #0 {
10545 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
10546 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
10547 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
10548 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10549 // CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10550 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
10551 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
10552 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
10553 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
10554 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
10555 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
10556 // CHECK:   ret %struct.poly8x8x2_t [[TMP5]]
test_vld2_p8(poly8_t const * a)10557 poly8x8x2_t test_vld2_p8(poly8_t const *a) {
10558   return vld2_p8(a);
10559 }
10560 
10561 // CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_p16(i16* %a) #0 {
10562 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
10563 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
10564 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
10565 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10566 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10567 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10568 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
10569 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
10570 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
10571 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
10572 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10573 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
10574 // CHECK:   ret %struct.poly16x4x2_t [[TMP6]]
test_vld2_p16(poly16_t const * a)10575 poly16x4x2_t test_vld2_p16(poly16_t const *a) {
10576   return vld2_p16(a);
10577 }
10578 
10579 // CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_u8(i8* %a) #0 {
10580 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
10581 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
10582 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
10583 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10584 // CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10585 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
10586 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10587 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
10588 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
10589 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
10590 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
10591 // CHECK:   ret %struct.uint8x16x3_t [[TMP5]]
test_vld3q_u8(uint8_t const * a)10592 uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
10593   return vld3q_u8(a);
10594 }
10595 
10596 // CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_u16(i16* %a) #0 {
10597 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
10598 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
10599 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
10600 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10601 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10602 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10603 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
10604 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10605 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
10606 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
10607 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10608 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
10609 // CHECK:   ret %struct.uint16x8x3_t [[TMP6]]
test_vld3q_u16(uint16_t const * a)10610 uint16x8x3_t test_vld3q_u16(uint16_t const *a) {
10611   return vld3q_u16(a);
10612 }
10613 
10614 // CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_u32(i32* %a) #0 {
10615 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
10616 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
10617 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
10618 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10619 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10620 // CHECK:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10621 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
10622 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10623 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
10624 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
10625 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10626 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
10627 // CHECK:   ret %struct.uint32x4x3_t [[TMP6]]
test_vld3q_u32(uint32_t const * a)10628 uint32x4x3_t test_vld3q_u32(uint32_t const *a) {
10629   return vld3q_u32(a);
10630 }
10631 
10632 // CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_u64(i64* %a) #0 {
10633 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
10634 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
10635 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
10636 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10637 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10638 // CHECK:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10639 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
10640 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10641 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
10642 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
10643 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10644 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
10645 // CHECK:   ret %struct.uint64x2x3_t [[TMP6]]
test_vld3q_u64(uint64_t const * a)10646 uint64x2x3_t test_vld3q_u64(uint64_t const *a) {
10647   return vld3q_u64(a);
10648 }
10649 
10650 // CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_s8(i8* %a) #0 {
10651 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
10652 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
10653 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
10654 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10655 // CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10656 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
10657 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10658 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
10659 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
10660 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
10661 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
10662 // CHECK:   ret %struct.int8x16x3_t [[TMP5]]
test_vld3q_s8(int8_t const * a)10663 int8x16x3_t test_vld3q_s8(int8_t const *a) {
10664   return vld3q_s8(a);
10665 }
10666 
10667 // CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_s16(i16* %a) #0 {
10668 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
10669 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
10670 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
10671 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10672 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10673 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10674 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
10675 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10676 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
10677 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
10678 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10679 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
10680 // CHECK:   ret %struct.int16x8x3_t [[TMP6]]
test_vld3q_s16(int16_t const * a)10681 int16x8x3_t test_vld3q_s16(int16_t const *a) {
10682   return vld3q_s16(a);
10683 }
10684 
10685 // CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_s32(i32* %a) #0 {
10686 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
10687 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
10688 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
10689 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10690 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10691 // CHECK:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10692 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
10693 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10694 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
10695 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
10696 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10697 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
10698 // CHECK:   ret %struct.int32x4x3_t [[TMP6]]
test_vld3q_s32(int32_t const * a)10699 int32x4x3_t test_vld3q_s32(int32_t const *a) {
10700   return vld3q_s32(a);
10701 }
10702 
10703 // CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_s64(i64* %a) #0 {
10704 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
10705 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
10706 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
10707 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10708 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10709 // CHECK:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10710 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
10711 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10712 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
10713 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
10714 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10715 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
10716 // CHECK:   ret %struct.int64x2x3_t [[TMP6]]
test_vld3q_s64(int64_t const * a)10717 int64x2x3_t test_vld3q_s64(int64_t const *a) {
10718   return vld3q_s64(a);
10719 }
10720 
10721 // CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_f16(half* %a) #0 {
10722 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
10723 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
10724 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
10725 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10726 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10727 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10728 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
10729 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10730 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
10731 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
10732 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10733 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
10734 // CHECK:   ret %struct.float16x8x3_t [[TMP6]]
test_vld3q_f16(float16_t const * a)10735 float16x8x3_t test_vld3q_f16(float16_t const *a) {
10736   return vld3q_f16(a);
10737 }
10738 
10739 // CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_f32(float* %a) #0 {
10740 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
10741 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
10742 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
10743 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10744 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
10745 // CHECK:   [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0v4f32(<4 x float>* [[TMP2]])
10746 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
10747 // CHECK:   store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
10748 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
10749 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
10750 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10751 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
10752 // CHECK:   ret %struct.float32x4x3_t [[TMP6]]
test_vld3q_f32(float32_t const * a)10753 float32x4x3_t test_vld3q_f32(float32_t const *a) {
10754   return vld3q_f32(a);
10755 }
10756 
10757 // CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_f64(double* %a) #0 {
10758 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
10759 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
10760 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
10761 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10762 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
10763 // CHECK:   [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0v2f64(<2 x double>* [[TMP2]])
10764 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
10765 // CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
10766 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
10767 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
10768 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10769 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
10770 // CHECK:   ret %struct.float64x2x3_t [[TMP6]]
test_vld3q_f64(float64_t const * a)10771 float64x2x3_t test_vld3q_f64(float64_t const *a) {
10772   return vld3q_f64(a);
10773 }
10774 
10775 // CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_p8(i8* %a) #0 {
10776 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
10777 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
10778 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
10779 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10780 // CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10781 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
10782 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10783 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
10784 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
10785 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
10786 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
10787 // CHECK:   ret %struct.poly8x16x3_t [[TMP5]]
test_vld3q_p8(poly8_t const * a)10788 poly8x16x3_t test_vld3q_p8(poly8_t const *a) {
10789   return vld3q_p8(a);
10790 }
10791 
10792 // CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_p16(i16* %a) #0 {
10793 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
10794 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
10795 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
10796 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10797 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10798 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10799 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
10800 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10801 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
10802 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
10803 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10804 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
10805 // CHECK:   ret %struct.poly16x8x3_t [[TMP6]]
test_vld3q_p16(poly16_t const * a)10806 poly16x8x3_t test_vld3q_p16(poly16_t const *a) {
10807   return vld3q_p16(a);
10808 }
10809 
10810 // CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_u8(i8* %a) #0 {
10811 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
10812 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
10813 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
10814 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10815 // CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10816 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
10817 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10818 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
10819 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
10820 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
10821 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
10822 // CHECK:   ret %struct.uint8x8x3_t [[TMP5]]
test_vld3_u8(uint8_t const * a)10823 uint8x8x3_t test_vld3_u8(uint8_t const *a) {
10824   return vld3_u8(a);
10825 }
10826 
10827 // CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_u16(i16* %a) #0 {
10828 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
10829 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
10830 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
10831 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10832 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10833 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10834 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10835 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10836 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
10837 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
10838 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10839 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
10840 // CHECK:   ret %struct.uint16x4x3_t [[TMP6]]
test_vld3_u16(uint16_t const * a)10841 uint16x4x3_t test_vld3_u16(uint16_t const *a) {
10842   return vld3_u16(a);
10843 }
10844 
10845 // CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_u32(i32* %a) #0 {
10846 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
10847 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
10848 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
10849 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10850 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10851 // CHECK:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10852 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
10853 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10854 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
10855 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
10856 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10857 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
10858 // CHECK:   ret %struct.uint32x2x3_t [[TMP6]]
test_vld3_u32(uint32_t const * a)10859 uint32x2x3_t test_vld3_u32(uint32_t const *a) {
10860   return vld3_u32(a);
10861 }
10862 
10863 // CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_u64(i64* %a) #0 {
10864 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
10865 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
10866 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
10867 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10868 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10869 // CHECK:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10870 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
10871 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10872 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
10873 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
10874 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10875 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
10876 // CHECK:   ret %struct.uint64x1x3_t [[TMP6]]
test_vld3_u64(uint64_t const * a)10877 uint64x1x3_t test_vld3_u64(uint64_t const *a) {
10878   return vld3_u64(a);
10879 }
10880 
10881 // CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_s8(i8* %a) #0 {
10882 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
10883 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
10884 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
10885 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10886 // CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10887 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
10888 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10889 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
10890 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
10891 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
10892 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
10893 // CHECK:   ret %struct.int8x8x3_t [[TMP5]]
test_vld3_s8(int8_t const * a)10894 int8x8x3_t test_vld3_s8(int8_t const *a) {
10895   return vld3_s8(a);
10896 }
10897 
10898 // CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_s16(i16* %a) #0 {
10899 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
10900 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
10901 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
10902 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10903 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10904 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10905 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10906 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10907 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
10908 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
10909 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10910 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
10911 // CHECK:   ret %struct.int16x4x3_t [[TMP6]]
test_vld3_s16(int16_t const * a)10912 int16x4x3_t test_vld3_s16(int16_t const *a) {
10913   return vld3_s16(a);
10914 }
10915 
10916 // CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_s32(i32* %a) #0 {
10917 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
10918 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
10919 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
10920 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10921 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10922 // CHECK:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10923 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
10924 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10925 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
10926 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
10927 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10928 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
10929 // CHECK:   ret %struct.int32x2x3_t [[TMP6]]
test_vld3_s32(int32_t const * a)10930 int32x2x3_t test_vld3_s32(int32_t const *a) {
10931   return vld3_s32(a);
10932 }
10933 
10934 // CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_s64(i64* %a) #0 {
10935 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
10936 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
10937 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10938 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10939 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10940 // CHECK:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10941 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
10942 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10943 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
10944 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10945 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10946 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
10947 // CHECK:   ret %struct.int64x1x3_t [[TMP6]]
test_vld3_s64(int64_t const * a)10948 int64x1x3_t test_vld3_s64(int64_t const *a) {
10949   return vld3_s64(a);
10950 }
10951 
10952 // CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_f16(half* %a) #0 {
10953 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
10954 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
10955 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10956 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10957 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10958 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10959 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10960 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10961 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
10962 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10963 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10964 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
10965 // CHECK:   ret %struct.float16x4x3_t [[TMP6]]
test_vld3_f16(float16_t const * a)10966 float16x4x3_t test_vld3_f16(float16_t const *a) {
10967   return vld3_f16(a);
10968 }
10969 
10970 // CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_f32(float* %a) #0 {
10971 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
10972 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
10973 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10974 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10975 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10976 // CHECK:   [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10977 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
10978 // CHECK:   store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
10979 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
10980 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10981 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10982 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
10983 // CHECK:   ret %struct.float32x2x3_t [[TMP6]]
test_vld3_f32(float32_t const * a)10984 float32x2x3_t test_vld3_f32(float32_t const *a) {
10985   return vld3_f32(a);
10986 }
10987 
10988 // CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_f64(double* %a) #0 {
10989 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
10990 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
10991 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10992 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10993 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10994 // CHECK:   [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10995 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
10996 // CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
10997 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
10998 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10999 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
11000 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
11001 // CHECK:   ret %struct.float64x1x3_t [[TMP6]]
test_vld3_f64(float64_t const * a)11002 float64x1x3_t test_vld3_f64(float64_t const *a) {
11003   return vld3_f64(a);
11004 }
11005 
11006 // CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_p8(i8* %a) #0 {
11007 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
11008 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
11009 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
11010 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
11011 // CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
11012 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
11013 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
11014 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
11015 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
11016 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
11017 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
11018 // CHECK:   ret %struct.poly8x8x3_t [[TMP5]]
test_vld3_p8(poly8_t const * a)11019 poly8x8x3_t test_vld3_p8(poly8_t const *a) {
11020   return vld3_p8(a);
11021 }
11022 
11023 // CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_p16(i16* %a) #0 {
11024 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
11025 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
11026 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
11027 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
11028 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
11029 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
11030 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
11031 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
11032 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
11033 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
11034 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
11035 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
11036 // CHECK:   ret %struct.poly16x4x3_t [[TMP6]]
test_vld3_p16(poly16_t const * a)11037 poly16x4x3_t test_vld3_p16(poly16_t const *a) {
11038   return vld3_p16(a);
11039 }
11040 
11041 // CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_u8(i8* %a) #0 {
11042 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
11043 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
11044 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
11045 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
11046 // CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
11047 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
11048 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
11049 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
11050 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
11051 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
11052 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
11053 // CHECK:   ret %struct.uint8x16x4_t [[TMP5]]
test_vld4q_u8(uint8_t const * a)11054 uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
11055   return vld4q_u8(a);
11056 }
11057 
11058 // CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_u16(i16* %a) #0 {
11059 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
11060 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
11061 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
11062 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
11063 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
11064 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
11065 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
11066 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
11067 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
11068 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
11069 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11070 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
11071 // CHECK:   ret %struct.uint16x8x4_t [[TMP6]]
test_vld4q_u16(uint16_t const * a)11072 uint16x8x4_t test_vld4q_u16(uint16_t const *a) {
11073   return vld4q_u16(a);
11074 }
11075 
11076 // CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_u32(i32* %a) #0 {
11077 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
11078 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
11079 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
11080 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
11081 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
11082 // CHECK:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
11083 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
11084 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
11085 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
11086 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
11087 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11088 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
11089 // CHECK:   ret %struct.uint32x4x4_t [[TMP6]]
test_vld4q_u32(uint32_t const * a)11090 uint32x4x4_t test_vld4q_u32(uint32_t const *a) {
11091   return vld4q_u32(a);
11092 }
11093 
11094 // CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_u64(i64* %a) #0 {
11095 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
11096 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
11097 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
11098 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
11099 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
11100 // CHECK:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
11101 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
11102 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
11103 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
11104 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
11105 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11106 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
11107 // CHECK:   ret %struct.uint64x2x4_t [[TMP6]]
test_vld4q_u64(uint64_t const * a)11108 uint64x2x4_t test_vld4q_u64(uint64_t const *a) {
11109   return vld4q_u64(a);
11110 }
11111 
11112 // CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_s8(i8* %a) #0 {
11113 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
11114 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
11115 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
11116 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
11117 // CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
11118 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
11119 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
11120 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
11121 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
11122 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
11123 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
11124 // CHECK:   ret %struct.int8x16x4_t [[TMP5]]
test_vld4q_s8(int8_t const * a)11125 int8x16x4_t test_vld4q_s8(int8_t const *a) {
11126   return vld4q_s8(a);
11127 }
11128 
11129 // CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_s16(i16* %a) #0 {
11130 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
11131 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
11132 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
11133 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
11134 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
11135 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
11136 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
11137 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
11138 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
11139 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
11140 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11141 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
11142 // CHECK:   ret %struct.int16x8x4_t [[TMP6]]
test_vld4q_s16(int16_t const * a)11143 int16x8x4_t test_vld4q_s16(int16_t const *a) {
11144   return vld4q_s16(a);
11145 }
11146 
11147 // CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_s32(i32* %a) #0 {
11148 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
11149 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
11150 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
11151 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
11152 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
11153 // CHECK:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
11154 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
11155 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
11156 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
11157 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
11158 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11159 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
11160 // CHECK:   ret %struct.int32x4x4_t [[TMP6]]
test_vld4q_s32(int32_t const * a)11161 int32x4x4_t test_vld4q_s32(int32_t const *a) {
11162   return vld4q_s32(a);
11163 }
11164 
11165 // CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_s64(i64* %a) #0 {
11166 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
11167 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
11168 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
11169 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
11170 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
11171 // CHECK:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
11172 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
11173 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
11174 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
11175 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
11176 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11177 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
11178 // CHECK:   ret %struct.int64x2x4_t [[TMP6]]
test_vld4q_s64(int64_t const * a)11179 int64x2x4_t test_vld4q_s64(int64_t const *a) {
11180   return vld4q_s64(a);
11181 }
11182 
11183 // CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_f16(half* %a) #0 {
11184 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
11185 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
11186 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
11187 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
11188 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
11189 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
11190 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
11191 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
11192 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
11193 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
11194 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11195 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
11196 // CHECK:   ret %struct.float16x8x4_t [[TMP6]]
test_vld4q_f16(float16_t const * a)11197 float16x8x4_t test_vld4q_f16(float16_t const *a) {
11198   return vld4q_f16(a);
11199 }
11200 
11201 // CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_f32(float* %a) #0 {
11202 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
11203 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
11204 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
11205 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
11206 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
11207 // CHECK:   [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0v4f32(<4 x float>* [[TMP2]])
11208 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
11209 // CHECK:   store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
11210 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
11211 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
11212 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11213 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
11214 // CHECK:   ret %struct.float32x4x4_t [[TMP6]]
test_vld4q_f32(float32_t const * a)11215 float32x4x4_t test_vld4q_f32(float32_t const *a) {
11216   return vld4q_f32(a);
11217 }
11218 
11219 // CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_f64(double* %a) #0 {
11220 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
11221 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
11222 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
11223 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
11224 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
11225 // CHECK:   [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0v2f64(<2 x double>* [[TMP2]])
11226 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
11227 // CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
11228 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
11229 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
11230 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11231 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
11232 // CHECK:   ret %struct.float64x2x4_t [[TMP6]]
test_vld4q_f64(float64_t const * a)11233 float64x2x4_t test_vld4q_f64(float64_t const *a) {
11234   return vld4q_f64(a);
11235 }
11236 
11237 // CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_p8(i8* %a) #0 {
11238 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
11239 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
11240 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
11241 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
11242 // CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
11243 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
11244 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
11245 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
11246 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
11247 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
11248 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
11249 // CHECK:   ret %struct.poly8x16x4_t [[TMP5]]
test_vld4q_p8(poly8_t const * a)11250 poly8x16x4_t test_vld4q_p8(poly8_t const *a) {
11251   return vld4q_p8(a);
11252 }
11253 
11254 // CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_p16(i16* %a) #0 {
11255 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
11256 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
11257 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
11258 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
11259 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
11260 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
11261 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
11262 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
11263 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
11264 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
11265 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11266 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
11267 // CHECK:   ret %struct.poly16x8x4_t [[TMP6]]
test_vld4q_p16(poly16_t const * a)11268 poly16x8x4_t test_vld4q_p16(poly16_t const *a) {
11269   return vld4q_p16(a);
11270 }
11271 
11272 // CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_u8(i8* %a) #0 {
11273 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
11274 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
11275 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
11276 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
11277 // CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
11278 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
11279 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
11280 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
11281 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
11282 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
11283 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
11284 // CHECK:   ret %struct.uint8x8x4_t [[TMP5]]
test_vld4_u8(uint8_t const * a)11285 uint8x8x4_t test_vld4_u8(uint8_t const *a) {
11286   return vld4_u8(a);
11287 }
11288 
11289 // CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_u16(i16* %a) #0 {
11290 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
11291 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
11292 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
11293 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
11294 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
11295 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
11296 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
11297 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
11298 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
11299 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
11300 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11301 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
11302 // CHECK:   ret %struct.uint16x4x4_t [[TMP6]]
test_vld4_u16(uint16_t const * a)11303 uint16x4x4_t test_vld4_u16(uint16_t const *a) {
11304   return vld4_u16(a);
11305 }
11306 
11307 // CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_u32(i32* %a) #0 {
11308 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
11309 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
11310 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
11311 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
11312 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
11313 // CHECK:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
11314 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
11315 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
11316 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
11317 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
11318 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11319 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
11320 // CHECK:   ret %struct.uint32x2x4_t [[TMP6]]
test_vld4_u32(uint32_t const * a)11321 uint32x2x4_t test_vld4_u32(uint32_t const *a) {
11322   return vld4_u32(a);
11323 }
11324 
11325 // CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_u64(i64* %a) #0 {
11326 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
11327 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
11328 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
11329 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
11330 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
11331 // CHECK:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
11332 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
11333 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
11334 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
11335 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
11336 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11337 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
11338 // CHECK:   ret %struct.uint64x1x4_t [[TMP6]]
test_vld4_u64(uint64_t const * a)11339 uint64x1x4_t test_vld4_u64(uint64_t const *a) {
11340   return vld4_u64(a);
11341 }
11342 
11343 // CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_s8(i8* %a) #0 {
11344 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
11345 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
11346 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
11347 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
11348 // CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
11349 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
11350 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
11351 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
11352 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
11353 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
11354 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
11355 // CHECK:   ret %struct.int8x8x4_t [[TMP5]]
test_vld4_s8(int8_t const * a)11356 int8x8x4_t test_vld4_s8(int8_t const *a) {
11357   return vld4_s8(a);
11358 }
11359 
11360 // CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_s16(i16* %a) #0 {
11361 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
11362 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
11363 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
11364 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
11365 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
11366 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
11367 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
11368 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
11369 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
11370 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
11371 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11372 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
11373 // CHECK:   ret %struct.int16x4x4_t [[TMP6]]
test_vld4_s16(int16_t const * a)11374 int16x4x4_t test_vld4_s16(int16_t const *a) {
11375   return vld4_s16(a);
11376 }
11377 
11378 // CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_s32(i32* %a) #0 {
11379 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
11380 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
11381 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
11382 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
11383 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
11384 // CHECK:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
11385 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
11386 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
11387 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
11388 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
11389 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11390 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
11391 // CHECK:   ret %struct.int32x2x4_t [[TMP6]]
test_vld4_s32(int32_t const * a)11392 int32x2x4_t test_vld4_s32(int32_t const *a) {
11393   return vld4_s32(a);
11394 }
11395 
11396 // CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_s64(i64* %a) #0 {
11397 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
11398 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
11399 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
11400 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
11401 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
11402 // CHECK:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
11403 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
11404 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
11405 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
11406 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
11407 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11408 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
11409 // CHECK:   ret %struct.int64x1x4_t [[TMP6]]
test_vld4_s64(int64_t const * a)11410 int64x1x4_t test_vld4_s64(int64_t const *a) {
11411   return vld4_s64(a);
11412 }
11413 
11414 // CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_f16(half* %a) #0 {
11415 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
11416 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
11417 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
11418 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
11419 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
11420 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
11421 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
11422 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
11423 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
11424 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
11425 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11426 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
11427 // CHECK:   ret %struct.float16x4x4_t [[TMP6]]
test_vld4_f16(float16_t const * a)11428 float16x4x4_t test_vld4_f16(float16_t const *a) {
11429   return vld4_f16(a);
11430 }
11431 
11432 // CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_f32(float* %a) #0 {
11433 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
11434 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
11435 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
11436 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
11437 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
11438 // CHECK:   [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0v2f32(<2 x float>* [[TMP2]])
11439 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
11440 // CHECK:   store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
11441 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
11442 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
11443 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11444 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
11445 // CHECK:   ret %struct.float32x2x4_t [[TMP6]]
test_vld4_f32(float32_t const * a)11446 float32x2x4_t test_vld4_f32(float32_t const *a) {
11447   return vld4_f32(a);
11448 }
11449 
11450 // CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_f64(double* %a) #0 {
11451 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
11452 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
11453 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
11454 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
11455 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
11456 // CHECK:   [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0v1f64(<1 x double>* [[TMP2]])
11457 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
11458 // CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
11459 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
11460 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
11461 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11462 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
11463 // CHECK:   ret %struct.float64x1x4_t [[TMP6]]
test_vld4_f64(float64_t const * a)11464 float64x1x4_t test_vld4_f64(float64_t const *a) {
11465   return vld4_f64(a);
11466 }
11467 
11468 // CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_p8(i8* %a) #0 {
11469 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
11470 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
11471 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
11472 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
11473 // CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
11474 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
11475 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
11476 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
11477 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
11478 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
11479 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
11480 // CHECK:   ret %struct.poly8x8x4_t [[TMP5]]
test_vld4_p8(poly8_t const * a)11481 poly8x8x4_t test_vld4_p8(poly8_t const *a) {
11482   return vld4_p8(a);
11483 }
11484 
11485 // CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_p16(i16* %a) #0 {
11486 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
11487 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
11488 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
11489 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
11490 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
11491 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
11492 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
11493 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
11494 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
11495 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
11496 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11497 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
11498 // CHECK:   ret %struct.poly16x4x4_t [[TMP6]]
test_vld4_p16(poly16_t const * a)11499 poly16x4x4_t test_vld4_p16(poly16_t const *a) {
11500   return vld4_p16(a);
11501 }
11502 
11503 // CHECK-LABEL: define void @test_vst1q_u8(i8* %a, <16 x i8> %b) #0 {
11504 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
11505 // CHECK:   store <16 x i8> %b, <16 x i8>* [[TMP0]]
11506 // CHECK:   ret void
test_vst1q_u8(uint8_t * a,uint8x16_t b)11507 void test_vst1q_u8(uint8_t *a, uint8x16_t b) {
11508   vst1q_u8(a, b);
11509 }
11510 
11511 // CHECK-LABEL: define void @test_vst1q_u16(i16* %a, <8 x i16> %b) #0 {
11512 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
11513 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11514 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
11515 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11516 // CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
11517 // CHECK:   ret void
test_vst1q_u16(uint16_t * a,uint16x8_t b)11518 void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
11519   vst1q_u16(a, b);
11520 }
11521 
11522 // CHECK-LABEL: define void @test_vst1q_u32(i32* %a, <4 x i32> %b) #0 {
11523 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
11524 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11525 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
11526 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
11527 // CHECK:   store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
11528 // CHECK:   ret void
test_vst1q_u32(uint32_t * a,uint32x4_t b)11529 void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
11530   vst1q_u32(a, b);
11531 }
11532 
11533 // CHECK-LABEL: define void @test_vst1q_u64(i64* %a, <2 x i64> %b) #0 {
11534 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
11535 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11536 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
11537 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
11538 // CHECK:   store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
11539 // CHECK:   ret void
test_vst1q_u64(uint64_t * a,uint64x2_t b)11540 void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
11541   vst1q_u64(a, b);
11542 }
11543 
11544 // CHECK-LABEL: define void @test_vst1q_s8(i8* %a, <16 x i8> %b) #0 {
11545 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
11546 // CHECK:   store <16 x i8> %b, <16 x i8>* [[TMP0]]
11547 // CHECK:   ret void
test_vst1q_s8(int8_t * a,int8x16_t b)11548 void test_vst1q_s8(int8_t *a, int8x16_t b) {
11549   vst1q_s8(a, b);
11550 }
11551 
11552 // CHECK-LABEL: define void @test_vst1q_s16(i16* %a, <8 x i16> %b) #0 {
11553 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
11554 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11555 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
11556 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11557 // CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
11558 // CHECK:   ret void
test_vst1q_s16(int16_t * a,int16x8_t b)11559 void test_vst1q_s16(int16_t *a, int16x8_t b) {
11560   vst1q_s16(a, b);
11561 }
11562 
11563 // CHECK-LABEL: define void @test_vst1q_s32(i32* %a, <4 x i32> %b) #0 {
11564 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
11565 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11566 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
11567 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
11568 // CHECK:   store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
11569 // CHECK:   ret void
test_vst1q_s32(int32_t * a,int32x4_t b)11570 void test_vst1q_s32(int32_t *a, int32x4_t b) {
11571   vst1q_s32(a, b);
11572 }
11573 
11574 // CHECK-LABEL: define void @test_vst1q_s64(i64* %a, <2 x i64> %b) #0 {
11575 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
11576 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11577 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
11578 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
11579 // CHECK:   store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
11580 // CHECK:   ret void
test_vst1q_s64(int64_t * a,int64x2_t b)11581 void test_vst1q_s64(int64_t *a, int64x2_t b) {
11582   vst1q_s64(a, b);
11583 }
11584 
11585 // CHECK-LABEL: define void @test_vst1q_f16(half* %a, <8 x half> %b) #0 {
11586 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
11587 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
11588 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
11589 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11590 // CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
11591 // CHECK:   ret void
test_vst1q_f16(float16_t * a,float16x8_t b)11592 void test_vst1q_f16(float16_t *a, float16x8_t b) {
11593   vst1q_f16(a, b);
11594 }
11595 
11596 // CHECK-LABEL: define void @test_vst1q_f32(float* %a, <4 x float> %b) #0 {
11597 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
11598 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
11599 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
11600 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
11601 // CHECK:   store <4 x float> [[TMP3]], <4 x float>* [[TMP2]]
11602 // CHECK:   ret void
test_vst1q_f32(float32_t * a,float32x4_t b)11603 void test_vst1q_f32(float32_t *a, float32x4_t b) {
11604   vst1q_f32(a, b);
11605 }
11606 
11607 // CHECK-LABEL: define void @test_vst1q_f64(double* %a, <2 x double> %b) #0 {
11608 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
11609 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
11610 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
11611 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
11612 // CHECK:   store <2 x double> [[TMP3]], <2 x double>* [[TMP2]]
11613 // CHECK:   ret void
test_vst1q_f64(float64_t * a,float64x2_t b)11614 void test_vst1q_f64(float64_t *a, float64x2_t b) {
11615   vst1q_f64(a, b);
11616 }
11617 
11618 // CHECK-LABEL: define void @test_vst1q_p8(i8* %a, <16 x i8> %b) #0 {
11619 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
11620 // CHECK:   store <16 x i8> %b, <16 x i8>* [[TMP0]]
11621 // CHECK:   ret void
test_vst1q_p8(poly8_t * a,poly8x16_t b)11622 void test_vst1q_p8(poly8_t *a, poly8x16_t b) {
11623   vst1q_p8(a, b);
11624 }
11625 
11626 // CHECK-LABEL: define void @test_vst1q_p16(i16* %a, <8 x i16> %b) #0 {
11627 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
11628 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11629 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
11630 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11631 // CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
11632 // CHECK:   ret void
test_vst1q_p16(poly16_t * a,poly16x8_t b)11633 void test_vst1q_p16(poly16_t *a, poly16x8_t b) {
11634   vst1q_p16(a, b);
11635 }
11636 
11637 // CHECK-LABEL: define void @test_vst1_u8(i8* %a, <8 x i8> %b) #0 {
11638 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
11639 // CHECK:   store <8 x i8> %b, <8 x i8>* [[TMP0]]
11640 // CHECK:   ret void
test_vst1_u8(uint8_t * a,uint8x8_t b)11641 void test_vst1_u8(uint8_t *a, uint8x8_t b) {
11642   vst1_u8(a, b);
11643 }
11644 
11645 // CHECK-LABEL: define void @test_vst1_u16(i16* %a, <4 x i16> %b) #0 {
11646 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
11647 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11648 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
11649 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11650 // CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
11651 // CHECK:   ret void
test_vst1_u16(uint16_t * a,uint16x4_t b)11652 void test_vst1_u16(uint16_t *a, uint16x4_t b) {
11653   vst1_u16(a, b);
11654 }
11655 
11656 // CHECK-LABEL: define void @test_vst1_u32(i32* %a, <2 x i32> %b) #0 {
11657 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
11658 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11659 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
11660 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11661 // CHECK:   store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
11662 // CHECK:   ret void
test_vst1_u32(uint32_t * a,uint32x2_t b)11663 void test_vst1_u32(uint32_t *a, uint32x2_t b) {
11664   vst1_u32(a, b);
11665 }
11666 
11667 // CHECK-LABEL: define void @test_vst1_u64(i64* %a, <1 x i64> %b) #0 {
11668 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
11669 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11670 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
11671 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
11672 // CHECK:   store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
11673 // CHECK:   ret void
test_vst1_u64(uint64_t * a,uint64x1_t b)11674 void test_vst1_u64(uint64_t *a, uint64x1_t b) {
11675   vst1_u64(a, b);
11676 }
11677 
11678 // CHECK-LABEL: define void @test_vst1_s8(i8* %a, <8 x i8> %b) #0 {
11679 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
11680 // CHECK:   store <8 x i8> %b, <8 x i8>* [[TMP0]]
11681 // CHECK:   ret void
test_vst1_s8(int8_t * a,int8x8_t b)11682 void test_vst1_s8(int8_t *a, int8x8_t b) {
11683   vst1_s8(a, b);
11684 }
11685 
11686 // CHECK-LABEL: define void @test_vst1_s16(i16* %a, <4 x i16> %b) #0 {
11687 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
11688 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11689 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
11690 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11691 // CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
11692 // CHECK:   ret void
test_vst1_s16(int16_t * a,int16x4_t b)11693 void test_vst1_s16(int16_t *a, int16x4_t b) {
11694   vst1_s16(a, b);
11695 }
11696 
11697 // CHECK-LABEL: define void @test_vst1_s32(i32* %a, <2 x i32> %b) #0 {
11698 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
11699 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11700 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
11701 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11702 // CHECK:   store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
11703 // CHECK:   ret void
test_vst1_s32(int32_t * a,int32x2_t b)11704 void test_vst1_s32(int32_t *a, int32x2_t b) {
11705   vst1_s32(a, b);
11706 }
11707 
11708 // CHECK-LABEL: define void @test_vst1_s64(i64* %a, <1 x i64> %b) #0 {
11709 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
11710 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11711 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
11712 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
11713 // CHECK:   store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
11714 // CHECK:   ret void
test_vst1_s64(int64_t * a,int64x1_t b)11715 void test_vst1_s64(int64_t *a, int64x1_t b) {
11716   vst1_s64(a, b);
11717 }
11718 
11719 // CHECK-LABEL: define void @test_vst1_f16(half* %a, <4 x half> %b) #0 {
11720 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
11721 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
11722 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
11723 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11724 // CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
11725 // CHECK:   ret void
test_vst1_f16(float16_t * a,float16x4_t b)11726 void test_vst1_f16(float16_t *a, float16x4_t b) {
11727   vst1_f16(a, b);
11728 }
11729 
11730 // CHECK-LABEL: define void @test_vst1_f32(float* %a, <2 x float> %b) #0 {
11731 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
11732 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
11733 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
11734 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
11735 // CHECK:   store <2 x float> [[TMP3]], <2 x float>* [[TMP2]]
11736 // CHECK:   ret void
test_vst1_f32(float32_t * a,float32x2_t b)11737 void test_vst1_f32(float32_t *a, float32x2_t b) {
11738   vst1_f32(a, b);
11739 }
11740 
11741 // CHECK-LABEL: define void @test_vst1_f64(double* %a, <1 x double> %b) #0 {
11742 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
11743 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
11744 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
11745 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
11746 // CHECK:   store <1 x double> [[TMP3]], <1 x double>* [[TMP2]]
11747 // CHECK:   ret void
test_vst1_f64(float64_t * a,float64x1_t b)11748 void test_vst1_f64(float64_t *a, float64x1_t b) {
11749   vst1_f64(a, b);
11750 }
11751 
11752 // CHECK-LABEL: define void @test_vst1_p8(i8* %a, <8 x i8> %b) #0 {
11753 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
11754 // CHECK:   store <8 x i8> %b, <8 x i8>* [[TMP0]]
11755 // CHECK:   ret void
test_vst1_p8(poly8_t * a,poly8x8_t b)11756 void test_vst1_p8(poly8_t *a, poly8x8_t b) {
11757   vst1_p8(a, b);
11758 }
11759 
11760 // CHECK-LABEL: define void @test_vst1_p16(i16* %a, <4 x i16> %b) #0 {
11761 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
11762 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11763 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
11764 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11765 // CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
11766 // CHECK:   ret void
test_vst1_p16(poly16_t * a,poly16x4_t b)11767 void test_vst1_p16(poly16_t *a, poly16x4_t b) {
11768   vst1_p16(a, b);
11769 }
11770 
11771 // CHECK-LABEL: define void @test_vst2q_u8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
11772 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
11773 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
11774 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
11775 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
11776 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
11777 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
11778 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11779 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
11780 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
11781 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11782 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
11783 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11784 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11785 // CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
11786 // CHECK:   ret void
test_vst2q_u8(uint8_t * a,uint8x16x2_t b)11787 void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) {
11788   vst2q_u8(a, b);
11789 }
11790 
11791 // CHECK-LABEL: define void @test_vst2q_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
11792 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
11793 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
11794 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
11795 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
11796 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
11797 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
11798 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11799 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11800 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
11801 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
11802 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11803 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11804 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
11805 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11806 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11807 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11808 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11809 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11810 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
11811 // CHECK:   ret void
test_vst2q_u16(uint16_t * a,uint16x8x2_t b)11812 void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) {
11813   vst2q_u16(a, b);
11814 }
11815 
11816 // CHECK-LABEL: define void @test_vst2q_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
11817 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
11818 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
11819 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
11820 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
11821 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
11822 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
11823 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11824 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11825 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
11826 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
11827 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11828 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11829 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
11830 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11831 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11832 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11833 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11834 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11835 // CHECK:   call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
11836 // CHECK:   ret void
test_vst2q_u32(uint32_t * a,uint32x4x2_t b)11837 void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) {
11838   vst2q_u32(a, b);
11839 }
11840 
11841 // CHECK-LABEL: define void @test_vst2q_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
11842 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
11843 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
11844 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0
11845 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
11846 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
11847 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
11848 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11849 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11850 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
11851 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
11852 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11853 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11854 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
11855 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11856 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11857 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11858 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11859 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11860 // CHECK:   call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
11861 // CHECK:   ret void
test_vst2q_u64(uint64_t * a,uint64x2x2_t b)11862 void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) {
11863   vst2q_u64(a, b);
11864 }
11865 
11866 // CHECK-LABEL: define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
11867 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
11868 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
11869 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
11870 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
11871 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
11872 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
11873 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11874 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
11875 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
11876 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11877 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
11878 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11879 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11880 // CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
11881 // CHECK:   ret void
test_vst2q_s8(int8_t * a,int8x16x2_t b)11882 void test_vst2q_s8(int8_t *a, int8x16x2_t b) {
11883   vst2q_s8(a, b);
11884 }
11885 
11886 // CHECK-LABEL: define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
11887 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
11888 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
11889 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
11890 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
11891 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
11892 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
11893 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11894 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11895 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
11896 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
11897 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11898 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11899 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
11900 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11901 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11902 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11903 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11904 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11905 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
11906 // CHECK:   ret void
test_vst2q_s16(int16_t * a,int16x8x2_t b)11907 void test_vst2q_s16(int16_t *a, int16x8x2_t b) {
11908   vst2q_s16(a, b);
11909 }
11910 
11911 // CHECK-LABEL: define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
11912 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
11913 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
11914 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
11915 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
11916 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
11917 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
11918 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11919 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11920 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
11921 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
11922 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11923 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11924 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
11925 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11926 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11927 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11928 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11929 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11930 // CHECK:   call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
11931 // CHECK:   ret void
test_vst2q_s32(int32_t * a,int32x4x2_t b)11932 void test_vst2q_s32(int32_t *a, int32x4x2_t b) {
11933   vst2q_s32(a, b);
11934 }
11935 
11936 // CHECK-LABEL: define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
11937 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
11938 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
11939 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0
11940 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
11941 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
11942 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
11943 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11944 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11945 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11946 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
11947 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11948 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11949 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11950 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11951 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11952 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11953 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11954 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11955 // CHECK:   call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
11956 // CHECK:   ret void
test_vst2q_s64(int64_t * a,int64x2x2_t b)11957 void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
11958   vst2q_s64(a, b);
11959 }
11960 
11961 // CHECK-LABEL: define void @test_vst2q_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 {
11962 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
11963 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
11964 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
11965 // CHECK:   store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
11966 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
11967 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
11968 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11969 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
11970 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11971 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
11972 // CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
11973 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11974 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11975 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
11976 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
11977 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11978 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11979 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11980 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
11981 // CHECK:   ret void
test_vst2q_f16(float16_t * a,float16x8x2_t b)11982 void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
11983   vst2q_f16(a, b);
11984 }
11985 
11986 // CHECK-LABEL: define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 {
11987 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
11988 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
11989 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
11990 // CHECK:   store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
11991 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
11992 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
11993 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11994 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
11995 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
11996 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
11997 // CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
11998 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11999 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
12000 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1
12001 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
12002 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
12003 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
12004 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
12005 // CHECK:   call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> [[TMP7]], <4 x float> [[TMP8]], i8* [[TMP2]])
12006 // CHECK:   ret void
test_vst2q_f32(float32_t * a,float32x4x2_t b)12007 void test_vst2q_f32(float32_t *a, float32x4x2_t b) {
12008   vst2q_f32(a, b);
12009 }
12010 
12011 // CHECK-LABEL: define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) #0 {
12012 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
12013 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
12014 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
12015 // CHECK:   store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
12016 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
12017 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
12018 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
12019 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
12020 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
12021 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
12022 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
12023 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12024 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
12025 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
12026 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
12027 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12028 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12029 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12030 // CHECK:   call void @llvm.aarch64.neon.st2.v2f64.p0i8(<2 x double> [[TMP7]], <2 x double> [[TMP8]], i8* [[TMP2]])
12031 // CHECK:   ret void
test_vst2q_f64(float64_t * a,float64x2x2_t b)12032 void test_vst2q_f64(float64_t *a, float64x2x2_t b) {
12033   vst2q_f64(a, b);
12034 }
12035 
12036 // CHECK-LABEL: define void @test_vst2q_p8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
12037 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
12038 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
12039 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
12040 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
12041 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
12042 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
12043 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
12044 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
12045 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
12046 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12047 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
12048 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12049 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12050 // CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
12051 // CHECK:   ret void
test_vst2q_p8(poly8_t * a,poly8x16x2_t b)12052 void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) {
12053   vst2q_p8(a, b);
12054 }
12055 
12056 // CHECK-LABEL: define void @test_vst2q_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
12057 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
12058 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
12059 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
12060 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
12061 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
12062 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
12063 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
12064 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12065 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
12066 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
12067 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12068 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12069 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
12070 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12071 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12072 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12073 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12074 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12075 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
12076 // CHECK:   ret void
test_vst2q_p16(poly16_t * a,poly16x8x2_t b)12077 void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) {
12078   vst2q_p16(a, b);
12079 }
12080 
12081 // CHECK-LABEL: define void @test_vst2_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
12082 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
12083 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
12084 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
12085 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
12086 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
12087 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
12088 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12089 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
12090 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
12091 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12092 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
12093 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12094 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12095 // CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
12096 // CHECK:   ret void
test_vst2_u8(uint8_t * a,uint8x8x2_t b)12097 void test_vst2_u8(uint8_t *a, uint8x8x2_t b) {
12098   vst2_u8(a, b);
12099 }
12100 
12101 // CHECK-LABEL: define void @test_vst2_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
12102 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
12103 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
12104 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
12105 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
12106 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
12107 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
12108 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12109 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12110 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
12111 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
12112 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12113 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12114 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
12115 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12116 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12117 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12118 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12119 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12120 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
12121 // CHECK:   ret void
test_vst2_u16(uint16_t * a,uint16x4x2_t b)12122 void test_vst2_u16(uint16_t *a, uint16x4x2_t b) {
12123   vst2_u16(a, b);
12124 }
12125 
12126 // CHECK-LABEL: define void @test_vst2_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
12127 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
12128 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
12129 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
12130 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
12131 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
12132 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
12133 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12134 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12135 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
12136 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
12137 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12138 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12139 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
12140 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12141 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12142 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12143 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12144 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12145 // CHECK:   call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
12146 // CHECK:   ret void
test_vst2_u32(uint32_t * a,uint32x2x2_t b)12147 void test_vst2_u32(uint32_t *a, uint32x2x2_t b) {
12148   vst2_u32(a, b);
12149 }
12150 
12151 // CHECK-LABEL: define void @test_vst2_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
12152 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
12153 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
12154 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
12155 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
12156 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
12157 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
12158 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12159 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12160 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
12161 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
12162 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12163 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12164 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
12165 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12166 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12167 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12168 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12169 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12170 // CHECK:   call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
12171 // CHECK:   ret void
test_vst2_u64(uint64_t * a,uint64x1x2_t b)12172 void test_vst2_u64(uint64_t *a, uint64x1x2_t b) {
12173   vst2_u64(a, b);
12174 }
12175 
12176 // CHECK-LABEL: define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
12177 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
12178 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
12179 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
12180 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
12181 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
12182 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
12183 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12184 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
12185 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
12186 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12187 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
12188 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12189 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12190 // CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
12191 // CHECK:   ret void
test_vst2_s8(int8_t * a,int8x8x2_t b)12192 void test_vst2_s8(int8_t *a, int8x8x2_t b) {
12193   vst2_s8(a, b);
12194 }
12195 
12196 // CHECK-LABEL: define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
12197 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
12198 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
12199 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
12200 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
12201 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
12202 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
12203 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12204 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12205 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
12206 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
12207 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12208 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12209 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
12210 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12211 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12212 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12213 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12214 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12215 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
12216 // CHECK:   ret void
test_vst2_s16(int16_t * a,int16x4x2_t b)12217 void test_vst2_s16(int16_t *a, int16x4x2_t b) {
12218   vst2_s16(a, b);
12219 }
12220 
12221 // CHECK-LABEL: define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
12222 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
12223 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
12224 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
12225 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
12226 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
12227 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
12228 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12229 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12230 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
12231 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
12232 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12233 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12234 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
12235 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12236 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12237 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12238 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12239 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12240 // CHECK:   call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
12241 // CHECK:   ret void
test_vst2_s32(int32_t * a,int32x2x2_t b)12242 void test_vst2_s32(int32_t *a, int32x2x2_t b) {
12243   vst2_s32(a, b);
12244 }
12245 
12246 // CHECK-LABEL: define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
12247 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
12248 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
12249 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
12250 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
12251 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
12252 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
12253 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12254 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12255 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
12256 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
12257 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12258 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12259 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
12260 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12261 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12262 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12263 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12264 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12265 // CHECK:   call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
12266 // CHECK:   ret void
test_vst2_s64(int64_t * a,int64x1x2_t b)12267 void test_vst2_s64(int64_t *a, int64x1x2_t b) {
12268   vst2_s64(a, b);
12269 }
12270 
12271 // CHECK-LABEL: define void @test_vst2_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 {
12272 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
12273 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
12274 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
12275 // CHECK:   store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
12276 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
12277 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
12278 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12279 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
12280 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
12281 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
12282 // CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
12283 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12284 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
12285 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
12286 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
12287 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
12288 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12289 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12290 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
12291 // CHECK:   ret void
test_vst2_f16(float16_t * a,float16x4x2_t b)12292 void test_vst2_f16(float16_t *a, float16x4x2_t b) {
12293   vst2_f16(a, b);
12294 }
12295 
12296 // CHECK-LABEL: define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 {
12297 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
12298 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
12299 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
12300 // CHECK:   store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
12301 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
12302 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
12303 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12304 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
12305 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
12306 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
12307 // CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
12308 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
12309 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
12310 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1
12311 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
12312 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
12313 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
12314 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
12315 // CHECK:   call void @llvm.aarch64.neon.st2.v2f32.p0i8(<2 x float> [[TMP7]], <2 x float> [[TMP8]], i8* [[TMP2]])
12316 // CHECK:   ret void
test_vst2_f32(float32_t * a,float32x2x2_t b)12317 void test_vst2_f32(float32_t *a, float32x2x2_t b) {
12318   vst2_f32(a, b);
12319 }
12320 
12321 // CHECK-LABEL: define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) #0 {
12322 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
12323 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
12324 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
12325 // CHECK:   store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
12326 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
12327 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
12328 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12329 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
12330 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
12331 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
12332 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
12333 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12334 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
12335 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
12336 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
12337 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12338 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12339 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12340 // CHECK:   call void @llvm.aarch64.neon.st2.v1f64.p0i8(<1 x double> [[TMP7]], <1 x double> [[TMP8]], i8* [[TMP2]])
12341 // CHECK:   ret void
test_vst2_f64(float64_t * a,float64x1x2_t b)12342 void test_vst2_f64(float64_t *a, float64x1x2_t b) {
12343   vst2_f64(a, b);
12344 }
12345 
12346 // CHECK-LABEL: define void @test_vst2_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
12347 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
12348 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
12349 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
12350 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
12351 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
12352 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
12353 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12354 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
12355 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
12356 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12357 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
12358 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12359 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12360 // CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
12361 // CHECK:   ret void
test_vst2_p8(poly8_t * a,poly8x8x2_t b)12362 void test_vst2_p8(poly8_t *a, poly8x8x2_t b) {
12363   vst2_p8(a, b);
12364 }
12365 
12366 // CHECK-LABEL: define void @test_vst2_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
12367 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
12368 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
12369 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
12370 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
12371 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
12372 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
12373 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12374 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12375 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
12376 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
12377 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12378 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12379 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
12380 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12381 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12382 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12383 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12384 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12385 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
12386 // CHECK:   ret void
test_vst2_p16(poly16_t * a,poly16x4x2_t b)12387 void test_vst2_p16(poly16_t *a, poly16x4x2_t b) {
12388   vst2_p16(a, b);
12389 }
12390 
12391 // CHECK-LABEL: define void @test_vst3q_u8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
12392 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
12393 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
12394 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
12395 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
12396 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
12397 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
12398 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12399 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
12400 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
12401 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12402 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
12403 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12404 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12405 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
12406 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12407 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12408 // CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
12409 // CHECK:   ret void
test_vst3q_u8(uint8_t * a,uint8x16x3_t b)12410 void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) {
12411   vst3q_u8(a, b);
12412 }
12413 
12414 // CHECK-LABEL: define void @test_vst3q_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
12415 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
12416 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
12417 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
12418 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
12419 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
12420 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
12421 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12422 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12423 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
12424 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
12425 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12426 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12427 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
12428 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12429 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12430 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12431 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
12432 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12433 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12434 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12435 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12436 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12437 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12438 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
12439 // CHECK:   ret void
test_vst3q_u16(uint16_t * a,uint16x8x3_t b)12440 void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) {
12441   vst3q_u16(a, b);
12442 }
12443 
12444 // CHECK-LABEL: define void @test_vst3q_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
12445 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
12446 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
12447 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
12448 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
12449 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
12450 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
12451 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12452 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12453 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
12454 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
12455 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12456 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12457 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
12458 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12459 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12460 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12461 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
12462 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12463 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12464 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12465 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12466 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12467 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12468 // CHECK:   call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
12469 // CHECK:   ret void
test_vst3q_u32(uint32_t * a,uint32x4x3_t b)12470 void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) {
12471   vst3q_u32(a, b);
12472 }
12473 
12474 // CHECK-LABEL: define void @test_vst3q_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
12475 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
12476 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
12477 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0
12478 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
12479 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
12480 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
12481 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12482 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12483 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
12484 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
12485 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12486 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12487 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
12488 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12489 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12490 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12491 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
12492 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12493 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12494 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12495 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12496 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12497 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12498 // CHECK:   call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
12499 // CHECK:   ret void
test_vst3q_u64(uint64_t * a,uint64x2x3_t b)12500 void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) {
12501   vst3q_u64(a, b);
12502 }
12503 
12504 // CHECK-LABEL: define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
12505 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
12506 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
12507 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
12508 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
12509 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
12510 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
12511 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12512 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
12513 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
12514 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12515 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
12516 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12517 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12518 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
12519 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12520 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12521 // CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
12522 // CHECK:   ret void
test_vst3q_s8(int8_t * a,int8x16x3_t b)12523 void test_vst3q_s8(int8_t *a, int8x16x3_t b) {
12524   vst3q_s8(a, b);
12525 }
12526 
12527 // CHECK-LABEL: define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
12528 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
12529 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
12530 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
12531 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
12532 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
12533 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
12534 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12535 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12536 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
12537 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
12538 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12539 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12540 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
12541 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12542 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12543 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12544 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
12545 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12546 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12547 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12548 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12549 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12550 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12551 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
12552 // CHECK:   ret void
test_vst3q_s16(int16_t * a,int16x8x3_t b)12553 void test_vst3q_s16(int16_t *a, int16x8x3_t b) {
12554   vst3q_s16(a, b);
12555 }
12556 
12557 // CHECK-LABEL: define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
12558 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
12559 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
12560 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
12561 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
12562 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
12563 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
12564 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12565 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12566 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
12567 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
12568 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12569 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12570 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
12571 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12572 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12573 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12574 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
12575 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12576 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12577 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12578 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12579 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12580 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12581 // CHECK:   call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
12582 // CHECK:   ret void
test_vst3q_s32(int32_t * a,int32x4x3_t b)12583 void test_vst3q_s32(int32_t *a, int32x4x3_t b) {
12584   vst3q_s32(a, b);
12585 }
12586 
12587 // CHECK-LABEL: define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
12588 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
12589 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
12590 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0
12591 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
12592 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
12593 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
12594 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12595 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12596 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
12597 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
12598 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12599 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12600 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
12601 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12602 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12603 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12604 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
12605 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12606 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12607 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12608 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12609 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12610 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12611 // CHECK:   call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
12612 // CHECK:   ret void
test_vst3q_s64(int64_t * a,int64x2x3_t b)12613 void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
12614   vst3q_s64(a, b);
12615 }
12616 
12617 // CHECK-LABEL: define void @test_vst3q_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 {
12618 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
12619 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
12620 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
12621 // CHECK:   store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
12622 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
12623 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
12624 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12625 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
12626 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
12627 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
12628 // CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
12629 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
12630 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
12631 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1
12632 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
12633 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
12634 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
12635 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
12636 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
12637 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
12638 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12639 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12640 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12641 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
12642 // CHECK:   ret void
test_vst3q_f16(float16_t * a,float16x8x3_t b)12643 void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
12644   vst3q_f16(a, b);
12645 }
12646 
12647 // CHECK-LABEL: define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 {
12648 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
12649 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
12650 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
12651 // CHECK:   store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
12652 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
12653 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
12654 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12655 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
12656 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
12657 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
12658 // CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
12659 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
12660 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
12661 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1
12662 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
12663 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
12664 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
12665 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2
12666 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
12667 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
12668 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
12669 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
12670 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
12671 // CHECK:   call void @llvm.aarch64.neon.st3.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i8* [[TMP2]])
12672 // CHECK:   ret void
test_vst3q_f32(float32_t * a,float32x4x3_t b)12673 void test_vst3q_f32(float32_t *a, float32x4x3_t b) {
12674   vst3q_f32(a, b);
12675 }
12676 
12677 // CHECK-LABEL: define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) #0 {
12678 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
12679 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
12680 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
12681 // CHECK:   store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
12682 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
12683 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
12684 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12685 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
12686 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
12687 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
12688 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
12689 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12690 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
12691 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
12692 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
12693 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12694 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
12695 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
12696 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
12697 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
12698 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12699 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12700 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
12701 // CHECK:   call void @llvm.aarch64.neon.st3.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i8* [[TMP2]])
12702 // CHECK:   ret void
test_vst3q_f64(float64_t * a,float64x2x3_t b)12703 void test_vst3q_f64(float64_t *a, float64x2x3_t b) {
12704   vst3q_f64(a, b);
12705 }
12706 
12707 // CHECK-LABEL: define void @test_vst3q_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
12708 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
12709 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
12710 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
12711 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
12712 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
12713 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
12714 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12715 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
12716 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
12717 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12718 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
12719 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12720 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12721 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
12722 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12723 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12724 // CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
12725 // CHECK:   ret void
test_vst3q_p8(poly8_t * a,poly8x16x3_t b)12726 void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) {
12727   vst3q_p8(a, b);
12728 }
12729 
12730 // CHECK-LABEL: define void @test_vst3q_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
12731 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
12732 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
12733 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
12734 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
12735 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
12736 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
12737 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12738 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12739 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
12740 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
12741 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12742 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12743 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
12744 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12745 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12746 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12747 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
12748 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12749 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12750 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12751 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12752 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12753 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12754 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
12755 // CHECK:   ret void
test_vst3q_p16(poly16_t * a,poly16x8x3_t b)12756 void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) {
12757   vst3q_p16(a, b);
12758 }
12759 
12760 // CHECK-LABEL: define void @test_vst3_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
12761 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
12762 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
12763 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
12764 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
12765 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
12766 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
12767 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12768 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
12769 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
12770 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12771 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
12772 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12773 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12774 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
12775 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12776 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12777 // CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
12778 // CHECK:   ret void
test_vst3_u8(uint8_t * a,uint8x8x3_t b)12779 void test_vst3_u8(uint8_t *a, uint8x8x3_t b) {
12780   vst3_u8(a, b);
12781 }
12782 
12783 // CHECK-LABEL: define void @test_vst3_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
12784 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
12785 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
12786 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
12787 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
12788 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
12789 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
12790 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12791 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12792 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
12793 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
12794 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12795 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12796 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
12797 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12798 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12799 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12800 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
12801 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12802 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12803 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12804 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12805 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12806 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12807 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
12808 // CHECK:   ret void
test_vst3_u16(uint16_t * a,uint16x4x3_t b)12809 void test_vst3_u16(uint16_t *a, uint16x4x3_t b) {
12810   vst3_u16(a, b);
12811 }
12812 
12813 // CHECK-LABEL: define void @test_vst3_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
12814 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
12815 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
12816 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
12817 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
12818 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
12819 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
12820 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12821 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12822 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
12823 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
12824 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12825 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12826 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
12827 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12828 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12829 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12830 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
12831 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12832 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12833 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12834 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12835 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12836 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12837 // CHECK:   call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
12838 // CHECK:   ret void
test_vst3_u32(uint32_t * a,uint32x2x3_t b)12839 void test_vst3_u32(uint32_t *a, uint32x2x3_t b) {
12840   vst3_u32(a, b);
12841 }
12842 
12843 // CHECK-LABEL: define void @test_vst3_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
12844 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
12845 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
12846 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
12847 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
12848 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
12849 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
12850 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12851 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12852 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
12853 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
12854 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12855 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12856 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
12857 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12858 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12859 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12860 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
12861 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12862 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12863 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12864 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12865 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12866 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12867 // CHECK:   call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
12868 // CHECK:   ret void
test_vst3_u64(uint64_t * a,uint64x1x3_t b)12869 void test_vst3_u64(uint64_t *a, uint64x1x3_t b) {
12870   vst3_u64(a, b);
12871 }
12872 
12873 // CHECK-LABEL: define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
12874 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
12875 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
12876 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
12877 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
12878 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
12879 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
12880 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12881 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
12882 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
12883 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12884 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
12885 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12886 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12887 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
12888 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12889 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12890 // CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
12891 // CHECK:   ret void
test_vst3_s8(int8_t * a,int8x8x3_t b)12892 void test_vst3_s8(int8_t *a, int8x8x3_t b) {
12893   vst3_s8(a, b);
12894 }
12895 
12896 // CHECK-LABEL: define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
12897 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
12898 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
12899 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
12900 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
12901 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
12902 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
12903 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12904 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12905 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
12906 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
12907 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12908 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12909 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
12910 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12911 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12912 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12913 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
12914 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12915 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12916 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12917 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12918 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12919 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12920 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
12921 // CHECK:   ret void
test_vst3_s16(int16_t * a,int16x4x3_t b)12922 void test_vst3_s16(int16_t *a, int16x4x3_t b) {
12923   vst3_s16(a, b);
12924 }
12925 
12926 // CHECK-LABEL: define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
12927 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
12928 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
12929 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
12930 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
12931 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
12932 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
12933 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12934 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12935 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12936 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
12937 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12938 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12939 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12940 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12941 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12942 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12943 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12944 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12945 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12946 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12947 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12948 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12949 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12950 // CHECK:   call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
12951 // CHECK:   ret void
test_vst3_s32(int32_t * a,int32x2x3_t b)12952 void test_vst3_s32(int32_t *a, int32x2x3_t b) {
12953   vst3_s32(a, b);
12954 }
12955 
12956 // CHECK-LABEL: define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
12957 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
12958 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
12959 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
12960 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
12961 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
12962 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
12963 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12964 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12965 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12966 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
12967 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12968 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12969 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12970 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12971 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12972 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12973 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12974 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12975 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12976 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12977 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12978 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12979 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12980 // CHECK:   call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
12981 // CHECK:   ret void
test_vst3_s64(int64_t * a,int64x1x3_t b)12982 void test_vst3_s64(int64_t *a, int64x1x3_t b) {
12983   vst3_s64(a, b);
12984 }
12985 
12986 // CHECK-LABEL: define void @test_vst3_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 {
12987 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
12988 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
12989 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
12990 // CHECK:   store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
12991 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
12992 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
12993 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12994 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
12995 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12996 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
12997 // CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
12998 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12999 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
13000 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1
13001 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
13002 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
13003 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
13004 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
13005 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
13006 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
13007 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13008 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13009 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13010 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
13011 // CHECK:   ret void
test_vst3_f16(float16_t * a,float16x4x3_t b)13012 void test_vst3_f16(float16_t *a, float16x4x3_t b) {
13013   vst3_f16(a, b);
13014 }
13015 
13016 // CHECK-LABEL: define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 {
13017 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
13018 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
13019 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
13020 // CHECK:   store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
13021 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
13022 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
13023 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
13024 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
13025 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
13026 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
13027 // CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
13028 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
13029 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
13030 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1
13031 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
13032 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
13033 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
13034 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2
13035 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
13036 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
13037 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
13038 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
13039 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
13040 // CHECK:   call void @llvm.aarch64.neon.st3.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i8* [[TMP2]])
13041 // CHECK:   ret void
test_vst3_f32(float32_t * a,float32x2x3_t b)13042 void test_vst3_f32(float32_t *a, float32x2x3_t b) {
13043   vst3_f32(a, b);
13044 }
13045 
13046 // CHECK-LABEL: define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) #0 {
13047 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
13048 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
13049 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
13050 // CHECK:   store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
13051 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
13052 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
13053 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
13054 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13055 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13056 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
13057 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13058 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13059 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13060 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
13061 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13062 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13063 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13064 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
13065 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13066 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13067 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13068 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13069 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13070 // CHECK:   call void @llvm.aarch64.neon.st3.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i8* [[TMP2]])
13071 // CHECK:   ret void
test_vst3_f64(float64_t * a,float64x1x3_t b)13072 void test_vst3_f64(float64_t *a, float64x1x3_t b) {
13073   vst3_f64(a, b);
13074 }
13075 
13076 // CHECK-LABEL: define void @test_vst3_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
13077 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
13078 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
13079 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
13080 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
13081 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
13082 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
13083 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
13084 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
13085 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
13086 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13087 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
13088 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13089 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13090 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
13091 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13092 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13093 // CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
13094 // CHECK:   ret void
test_vst3_p8(poly8_t * a,poly8x8x3_t b)13095 void test_vst3_p8(poly8_t *a, poly8x8x3_t b) {
13096   vst3_p8(a, b);
13097 }
13098 
13099 // CHECK-LABEL: define void @test_vst3_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
13100 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
13101 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
13102 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
13103 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
13104 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
13105 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
13106 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
13107 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13108 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
13109 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
13110 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13111 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13112 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
13113 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13114 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13115 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13116 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
13117 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13118 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13119 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13120 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13121 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13122 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13123 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
13124 // CHECK:   ret void
test_vst3_p16(poly16_t * a,poly16x4x3_t b)13125 void test_vst3_p16(poly16_t *a, poly16x4x3_t b) {
13126   vst3_p16(a, b);
13127 }
13128 
13129 // CHECK-LABEL: define void @test_vst4q_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
13130 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
13131 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
13132 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
13133 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
13134 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
13135 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
13136 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13137 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
13138 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
13139 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
13140 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
13141 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
13142 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
13143 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
13144 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
13145 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
13146 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
13147 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
13148 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
13149 // CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
13150 // CHECK:   ret void
test_vst4q_u8(uint8_t * a,uint8x16x4_t b)13151 void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) {
13152   vst4q_u8(a, b);
13153 }
13154 
13155 // CHECK-LABEL: define void @test_vst4q_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
13156 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
13157 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
13158 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
13159 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
13160 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
13161 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
13162 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13163 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13164 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
13165 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
13166 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
13167 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
13168 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
13169 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
13170 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
13171 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
13172 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
13173 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
13174 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
13175 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
13176 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
13177 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
13178 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
13179 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
13180 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
13181 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
13182 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
13183 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
13184 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
13185 // CHECK:   ret void
test_vst4q_u16(uint16_t * a,uint16x8x4_t b)13186 void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) {
13187   vst4q_u16(a, b);
13188 }
13189 
13190 // CHECK-LABEL: define void @test_vst4q_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
13191 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
13192 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
13193 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
13194 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
13195 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
13196 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
13197 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13198 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
13199 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
13200 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
13201 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
13202 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
13203 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
13204 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
13205 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
13206 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
13207 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
13208 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
13209 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
13210 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
13211 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
13212 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
13213 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
13214 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
13215 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
13216 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
13217 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
13218 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
13219 // CHECK:   call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
13220 // CHECK:   ret void
test_vst4q_u32(uint32_t * a,uint32x4x4_t b)13221 void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) {
13222   vst4q_u32(a, b);
13223 }
13224 
13225 // CHECK-LABEL: define void @test_vst4q_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
13226 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
13227 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
13228 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0
13229 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
13230 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
13231 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
13232 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13233 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13234 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
13235 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
13236 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13237 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13238 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
13239 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13240 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13241 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13242 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
13243 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13244 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13245 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13246 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
13247 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
13248 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
13249 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
13250 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13251 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13252 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13253 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
13254 // CHECK:   call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
13255 // CHECK:   ret void
test_vst4q_u64(uint64_t * a,uint64x2x4_t b)13256 void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) {
13257   vst4q_u64(a, b);
13258 }
13259 
13260 // CHECK-LABEL: define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
13261 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
13262 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
13263 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
13264 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
13265 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
13266 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
13267 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13268 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
13269 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
13270 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
13271 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
13272 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
13273 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
13274 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
13275 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
13276 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
13277 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
13278 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
13279 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
13280 // CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
13281 // CHECK:   ret void
test_vst4q_s8(int8_t * a,int8x16x4_t b)13282 void test_vst4q_s8(int8_t *a, int8x16x4_t b) {
13283   vst4q_s8(a, b);
13284 }
13285 
13286 // CHECK-LABEL: define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
13287 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
13288 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
13289 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
13290 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
13291 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
13292 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
13293 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13294 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13295 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
13296 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
13297 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
13298 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
13299 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
13300 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
13301 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
13302 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
13303 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
13304 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
13305 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
13306 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
13307 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
13308 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
13309 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
13310 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
13311 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
13312 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
13313 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
13314 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
13315 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
13316 // CHECK:   ret void
test_vst4q_s16(int16_t * a,int16x8x4_t b)13317 void test_vst4q_s16(int16_t *a, int16x8x4_t b) {
13318   vst4q_s16(a, b);
13319 }
13320 
13321 // CHECK-LABEL: define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
13322 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
13323 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
13324 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
13325 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
13326 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
13327 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
13328 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13329 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
13330 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
13331 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
13332 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
13333 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
13334 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
13335 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
13336 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
13337 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
13338 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
13339 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
13340 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
13341 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
13342 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
13343 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
13344 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
13345 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
13346 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
13347 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
13348 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
13349 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
13350 // CHECK:   call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
13351 // CHECK:   ret void
test_vst4q_s32(int32_t * a,int32x4x4_t b)13352 void test_vst4q_s32(int32_t *a, int32x4x4_t b) {
13353   vst4q_s32(a, b);
13354 }
13355 
13356 // CHECK-LABEL: define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
13357 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
13358 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
13359 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0
13360 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
13361 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
13362 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
13363 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13364 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13365 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
13366 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
13367 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13368 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13369 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
13370 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13371 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13372 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13373 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
13374 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13375 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13376 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13377 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
13378 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
13379 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
13380 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
13381 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13382 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13383 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13384 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
13385 // CHECK:   call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
13386 // CHECK:   ret void
test_vst4q_s64(int64_t * a,int64x2x4_t b)13387 void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
13388   vst4q_s64(a, b);
13389 }
13390 
13391 // CHECK-LABEL: define void @test_vst4q_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 {
13392 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
13393 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
13394 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
13395 // CHECK:   store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
13396 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
13397 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
13398 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13399 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
13400 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
13401 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
13402 // CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
13403 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
13404 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
13405 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1
13406 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
13407 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
13408 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
13409 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2
13410 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
13411 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
13412 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
13413 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
13414 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
13415 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
13416 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
13417 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
13418 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
13419 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
13420 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
13421 // CHECK:   ret void
test_vst4q_f16(float16_t * a,float16x8x4_t b)13422 void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
13423   vst4q_f16(a, b);
13424 }
13425 
13426 // CHECK-LABEL: define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 {
13427 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
13428 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
13429 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
13430 // CHECK:   store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
13431 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
13432 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
13433 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13434 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
13435 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
13436 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
13437 // CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
13438 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
13439 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
13440 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1
13441 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
13442 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
13443 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
13444 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2
13445 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
13446 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
13447 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
13448 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3
13449 // CHECK:   [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
13450 // CHECK:   [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
13451 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
13452 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
13453 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
13454 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
13455 // CHECK:   call void @llvm.aarch64.neon.st4.v4f32.p0i8(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], i8* [[TMP2]])
13456 // CHECK:   ret void
test_vst4q_f32(float32_t * a,float32x4x4_t b)13457 void test_vst4q_f32(float32_t *a, float32x4x4_t b) {
13458   vst4q_f32(a, b);
13459 }
13460 
13461 // CHECK-LABEL: define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) #0 {
13462 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
13463 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
13464 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
13465 // CHECK:   store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
13466 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
13467 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
13468 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13469 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13470 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13471 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
13472 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13473 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13474 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13475 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
13476 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13477 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13478 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13479 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
13480 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
13481 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
13482 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13483 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
13484 // CHECK:   [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
13485 // CHECK:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
13486 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13487 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13488 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
13489 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
13490 // CHECK:   call void @llvm.aarch64.neon.st4.v2f64.p0i8(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], i8* [[TMP2]])
13491 // CHECK:   ret void
test_vst4q_f64(float64_t * a,float64x2x4_t b)13492 void test_vst4q_f64(float64_t *a, float64x2x4_t b) {
13493   vst4q_f64(a, b);
13494 }
13495 
13496 // CHECK-LABEL: define void @test_vst4q_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
13497 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
13498 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
13499 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
13500 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
13501 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
13502 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
13503 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13504 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
13505 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
13506 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
13507 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
13508 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
13509 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
13510 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
13511 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
13512 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
13513 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
13514 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
13515 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
13516 // CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
13517 // CHECK:   ret void
test_vst4q_p8(poly8_t * a,poly8x16x4_t b)13518 void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) {
13519   vst4q_p8(a, b);
13520 }
13521 
13522 // CHECK-LABEL: define void @test_vst4q_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
13523 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
13524 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
13525 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
13526 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
13527 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
13528 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
13529 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13530 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13531 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
13532 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
13533 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
13534 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
13535 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
13536 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
13537 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
13538 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
13539 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
13540 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
13541 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
13542 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
13543 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
13544 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
13545 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
13546 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
13547 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
13548 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
13549 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
13550 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
13551 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
13552 // CHECK:   ret void
test_vst4q_p16(poly16_t * a,poly16x8x4_t b)13553 void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) {
13554   vst4q_p16(a, b);
13555 }
13556 
13557 // CHECK-LABEL: define void @test_vst4_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
13558 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
13559 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
13560 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
13561 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
13562 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
13563 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
13564 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13565 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
13566 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
13567 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13568 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
13569 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13570 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13571 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
13572 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13573 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13574 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
13575 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
13576 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
13577 // CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
13578 // CHECK:   ret void
test_vst4_u8(uint8_t * a,uint8x8x4_t b)13579 void test_vst4_u8(uint8_t *a, uint8x8x4_t b) {
13580   vst4_u8(a, b);
13581 }
13582 
13583 // CHECK-LABEL: define void @test_vst4_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
13584 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
13585 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
13586 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
13587 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
13588 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
13589 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
13590 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13591 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13592 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
13593 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
13594 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13595 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13596 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
13597 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13598 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13599 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13600 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
13601 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13602 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13603 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13604 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
13605 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
13606 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
13607 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
13608 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13609 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13610 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13611 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13612 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13613 // CHECK:   ret void
test_vst4_u16(uint16_t * a,uint16x4x4_t b)13614 void test_vst4_u16(uint16_t *a, uint16x4x4_t b) {
13615   vst4_u16(a, b);
13616 }
13617 
13618 // CHECK-LABEL: define void @test_vst4_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
13619 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
13620 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
13621 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
13622 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
13623 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
13624 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
13625 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13626 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
13627 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
13628 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
13629 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
13630 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
13631 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
13632 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
13633 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
13634 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
13635 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
13636 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
13637 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
13638 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
13639 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
13640 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
13641 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
13642 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
13643 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
13644 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
13645 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
13646 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
13647 // CHECK:   call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
13648 // CHECK:   ret void
test_vst4_u32(uint32_t * a,uint32x2x4_t b)13649 void test_vst4_u32(uint32_t *a, uint32x2x4_t b) {
13650   vst4_u32(a, b);
13651 }
13652 
13653 // CHECK-LABEL: define void @test_vst4_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
13654 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
13655 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
13656 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
13657 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
13658 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
13659 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
13660 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13661 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13662 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
13663 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
13664 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13665 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13666 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
13667 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13668 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13669 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13670 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
13671 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13672 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13673 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13674 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
13675 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
13676 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
13677 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
13678 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13679 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13680 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13681 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
13682 // CHECK:   call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
13683 // CHECK:   ret void
test_vst4_u64(uint64_t * a,uint64x1x4_t b)13684 void test_vst4_u64(uint64_t *a, uint64x1x4_t b) {
13685   vst4_u64(a, b);
13686 }
13687 
13688 // CHECK-LABEL: define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
13689 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
13690 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
13691 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
13692 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
13693 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
13694 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
13695 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13696 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
13697 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
13698 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13699 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
13700 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13701 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13702 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
13703 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13704 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13705 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
13706 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
13707 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
13708 // CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
13709 // CHECK:   ret void
test_vst4_s8(int8_t * a,int8x8x4_t b)13710 void test_vst4_s8(int8_t *a, int8x8x4_t b) {
13711   vst4_s8(a, b);
13712 }
13713 
13714 // CHECK-LABEL: define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
13715 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
13716 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
13717 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
13718 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
13719 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
13720 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
13721 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13722 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13723 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
13724 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
13725 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13726 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13727 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
13728 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13729 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13730 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13731 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
13732 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13733 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13734 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13735 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
13736 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
13737 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
13738 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
13739 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13740 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13741 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13742 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13743 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13744 // CHECK:   ret void
test_vst4_s16(int16_t * a,int16x4x4_t b)13745 void test_vst4_s16(int16_t *a, int16x4x4_t b) {
13746   vst4_s16(a, b);
13747 }
13748 
13749 // CHECK-LABEL: define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
13750 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
13751 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
13752 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
13753 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
13754 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
13755 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
13756 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13757 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
13758 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
13759 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
13760 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
13761 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
13762 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
13763 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
13764 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
13765 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
13766 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
13767 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
13768 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
13769 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
13770 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
13771 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
13772 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
13773 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
13774 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
13775 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
13776 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
13777 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
13778 // CHECK:   call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
13779 // CHECK:   ret void
test_vst4_s32(int32_t * a,int32x2x4_t b)13780 void test_vst4_s32(int32_t *a, int32x2x4_t b) {
13781   vst4_s32(a, b);
13782 }
13783 
13784 // CHECK-LABEL: define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
13785 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
13786 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
13787 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
13788 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
13789 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
13790 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
13791 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13792 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13793 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
13794 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
13795 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13796 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13797 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
13798 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13799 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13800 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13801 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
13802 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13803 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13804 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13805 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
13806 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
13807 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
13808 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
13809 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13810 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13811 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13812 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
13813 // CHECK:   call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
13814 // CHECK:   ret void
test_vst4_s64(int64_t * a,int64x1x4_t b)13815 void test_vst4_s64(int64_t *a, int64x1x4_t b) {
13816   vst4_s64(a, b);
13817 }
13818 
13819 // CHECK-LABEL: define void @test_vst4_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 {
13820 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
13821 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
13822 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
13823 // CHECK:   store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
13824 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
13825 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
13826 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13827 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
13828 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13829 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
13830 // CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
13831 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
13832 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13833 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1
13834 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
13835 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
13836 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13837 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2
13838 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
13839 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
13840 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13841 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
13842 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
13843 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
13844 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13845 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13846 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13847 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13848 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13849 // CHECK:   ret void
test_vst4_f16(float16_t * a,float16x4x4_t b)13850 void test_vst4_f16(float16_t *a, float16x4x4_t b) {
13851   vst4_f16(a, b);
13852 }
13853 
13854 // CHECK-LABEL: define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 {
13855 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
13856 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
13857 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
13858 // CHECK:   store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
13859 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
13860 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
13861 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13862 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
13863 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13864 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
13865 // CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
13866 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
13867 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13868 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1
13869 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
13870 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
13871 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13872 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2
13873 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
13874 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
13875 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13876 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3
13877 // CHECK:   [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
13878 // CHECK:   [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
13879 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
13880 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
13881 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
13882 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
13883 // CHECK:   call void @llvm.aarch64.neon.st4.v2f32.p0i8(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], i8* [[TMP2]])
13884 // CHECK:   ret void
test_vst4_f32(float32_t * a,float32x2x4_t b)13885 void test_vst4_f32(float32_t *a, float32x2x4_t b) {
13886   vst4_f32(a, b);
13887 }
13888 
13889 // CHECK-LABEL: define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) #0 {
13890 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
13891 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
13892 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
13893 // CHECK:   store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
13894 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
13895 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
13896 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13897 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13898 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13899 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
13900 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13901 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13902 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13903 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
13904 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13905 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13906 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13907 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
13908 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13909 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13910 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13911 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
13912 // CHECK:   [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
13913 // CHECK:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
13914 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13915 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13916 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13917 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
13918 // CHECK:   call void @llvm.aarch64.neon.st4.v1f64.p0i8(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], i8* [[TMP2]])
13919 // CHECK:   ret void
test_vst4_f64(float64_t * a,float64x1x4_t b)13920 void test_vst4_f64(float64_t *a, float64x1x4_t b) {
13921   vst4_f64(a, b);
13922 }
13923 
13924 // CHECK-LABEL: define void @test_vst4_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
13925 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
13926 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
13927 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
13928 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
13929 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
13930 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
13931 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13932 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13933 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
13934 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13935 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13936 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13937 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13938 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13939 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13940 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13941 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13942 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
13943 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
13944 // CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
13945 // CHECK:   ret void
test_vst4_p8(poly8_t * a,poly8x8x4_t b)13946 void test_vst4_p8(poly8_t *a, poly8x8x4_t b) {
13947   vst4_p8(a, b);
13948 }
13949 
13950 // CHECK-LABEL: define void @test_vst4_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
13951 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
13952 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
13953 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
13954 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
13955 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
13956 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
13957 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13958 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13959 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13960 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
13961 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13962 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13963 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13964 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13965 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13966 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13967 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13968 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13969 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13970 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13971 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13972 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
13973 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
13974 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
13975 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13976 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13977 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13978 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13979 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13980 // CHECK:   ret void
test_vst4_p16(poly16_t * a,poly16x4x4_t b)13981 void test_vst4_p16(poly16_t *a, poly16x4x4_t b) {
13982   vst4_p16(a, b);
13983 }
13984 
13985 // CHECK-LABEL: define %struct.uint8x16x2_t @test_vld1q_u8_x2(i8* %a) #0 {
13986 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
13987 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
13988 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
13989 // CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a)
13990 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
13991 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
13992 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
13993 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
13994 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
13995 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
13996 // CHECK:   ret %struct.uint8x16x2_t [[TMP4]]
test_vld1q_u8_x2(uint8_t const * a)13997 uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) {
13998   return vld1q_u8_x2(a);
13999 }
14000 
14001 // CHECK-LABEL: define %struct.uint16x8x2_t @test_vld1q_u16_x2(i16* %a) #0 {
14002 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
14003 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
14004 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
14005 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14006 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14007 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
14008 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
14009 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
14010 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
14011 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
14012 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14013 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
14014 // CHECK:   ret %struct.uint16x8x2_t [[TMP6]]
test_vld1q_u16_x2(uint16_t const * a)14015 uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) {
14016   return vld1q_u16_x2(a);
14017 }
14018 
14019 // CHECK-LABEL: define %struct.uint32x4x2_t @test_vld1q_u32_x2(i32* %a) #0 {
14020 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
14021 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
14022 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
14023 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14024 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14025 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* [[TMP2]])
14026 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
14027 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]]
14028 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
14029 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
14030 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14031 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
14032 // CHECK:   ret %struct.uint32x4x2_t [[TMP6]]
test_vld1q_u32_x2(uint32_t const * a)14033 uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) {
14034   return vld1q_u32_x2(a);
14035 }
14036 
14037 // CHECK-LABEL: define %struct.uint64x2x2_t @test_vld1q_u64_x2(i64* %a) #0 {
14038 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
14039 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
14040 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
14041 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14042 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14043 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
14044 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
14045 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
14046 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
14047 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
14048 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14049 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
14050 // CHECK:   ret %struct.uint64x2x2_t [[TMP6]]
test_vld1q_u64_x2(uint64_t const * a)14051 uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) {
14052   return vld1q_u64_x2(a);
14053 }
14054 
14055 // CHECK-LABEL: define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a) #0 {
14056 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
14057 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
14058 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
14059 // CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a)
14060 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
14061 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
14062 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
14063 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
14064 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
14065 // CHECK:   [[TMP4:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
14066 // CHECK:   ret %struct.int8x16x2_t [[TMP4]]
test_vld1q_s8_x2(int8_t const * a)14067 int8x16x2_t test_vld1q_s8_x2(int8_t const *a) {
14068   return vld1q_s8_x2(a);
14069 }
14070 
14071 // CHECK-LABEL: define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a) #0 {
14072 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
14073 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
14074 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
14075 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14076 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14077 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
14078 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
14079 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
14080 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
14081 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
14082 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14083 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
14084 // CHECK:   ret %struct.int16x8x2_t [[TMP6]]
test_vld1q_s16_x2(int16_t const * a)14085 int16x8x2_t test_vld1q_s16_x2(int16_t const *a) {
14086   return vld1q_s16_x2(a);
14087 }
14088 
14089 // CHECK-LABEL: define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a) #0 {
14090 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
14091 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
14092 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
14093 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14094 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14095 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* [[TMP2]])
14096 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
14097 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]]
14098 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
14099 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
14100 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14101 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
14102 // CHECK:   ret %struct.int32x4x2_t [[TMP6]]
test_vld1q_s32_x2(int32_t const * a)14103 int32x4x2_t test_vld1q_s32_x2(int32_t const *a) {
14104   return vld1q_s32_x2(a);
14105 }
14106 
14107 // CHECK-LABEL: define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a) #0 {
14108 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
14109 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
14110 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
14111 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14112 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14113 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
14114 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
14115 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
14116 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
14117 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
14118 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14119 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
14120 // CHECK:   ret %struct.int64x2x2_t [[TMP6]]
test_vld1q_s64_x2(int64_t const * a)14121 int64x2x2_t test_vld1q_s64_x2(int64_t const *a) {
14122   return vld1q_s64_x2(a);
14123 }
14124 
14125 // CHECK-LABEL: define %struct.float16x8x2_t @test_vld1q_f16_x2(half* %a) #0 {
14126 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
14127 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
14128 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
14129 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
14130 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14131 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
14132 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
14133 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
14134 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
14135 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
14136 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14137 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
14138 // CHECK:   ret %struct.float16x8x2_t [[TMP6]]
test_vld1q_f16_x2(float16_t const * a)14139 float16x8x2_t test_vld1q_f16_x2(float16_t const *a) {
14140   return vld1q_f16_x2(a);
14141 }
14142 
14143 // CHECK-LABEL: define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a) #0 {
14144 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
14145 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
14146 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
14147 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
14148 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
14149 // CHECK:   [[VLD1XN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* [[TMP2]])
14150 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
14151 // CHECK:   store { <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float> }* [[TMP3]]
14152 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
14153 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
14154 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14155 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
14156 // CHECK:   ret %struct.float32x4x2_t [[TMP6]]
test_vld1q_f32_x2(float32_t const * a)14157 float32x4x2_t test_vld1q_f32_x2(float32_t const *a) {
14158   return vld1q_f32_x2(a);
14159 }
14160 
14161 // CHECK-LABEL: define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a) #0 {
14162 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
14163 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
14164 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
14165 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
14166 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
14167 // CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* [[TMP2]])
14168 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
14169 // CHECK:   store { <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double> }* [[TMP3]]
14170 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
14171 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
14172 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14173 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
14174 // CHECK:   ret %struct.float64x2x2_t [[TMP6]]
test_vld1q_f64_x2(float64_t const * a)14175 float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
14176   return vld1q_f64_x2(a);
14177 }
14178 
14179 // CHECK-LABEL: define %struct.poly8x16x2_t @test_vld1q_p8_x2(i8* %a) #0 {
14180 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
14181 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
14182 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
14183 // CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a)
14184 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
14185 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
14186 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
14187 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
14188 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
14189 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
14190 // CHECK:   ret %struct.poly8x16x2_t [[TMP4]]
test_vld1q_p8_x2(poly8_t const * a)14191 poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) {
14192   return vld1q_p8_x2(a);
14193 }
14194 
14195 // CHECK-LABEL: define %struct.poly16x8x2_t @test_vld1q_p16_x2(i16* %a) #0 {
14196 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
14197 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
14198 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
14199 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14200 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14201 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
14202 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
14203 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
14204 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
14205 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
14206 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14207 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
14208 // CHECK:   ret %struct.poly16x8x2_t [[TMP6]]
test_vld1q_p16_x2(poly16_t const * a)14209 poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) {
14210   return vld1q_p16_x2(a);
14211 }
14212 
14213 // CHECK-LABEL: define %struct.poly64x2x2_t @test_vld1q_p64_x2(i64* %a) #0 {
14214 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
14215 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
14216 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
14217 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14218 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14219 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
14220 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
14221 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
14222 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8*
14223 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
14224 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14225 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16
14226 // CHECK:   ret %struct.poly64x2x2_t [[TMP6]]
test_vld1q_p64_x2(poly64_t const * a)14227 poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
14228   return vld1q_p64_x2(a);
14229 }
14230 
14231 // CHECK-LABEL: define %struct.uint8x8x2_t @test_vld1_u8_x2(i8* %a) #0 {
14232 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
14233 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
14234 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
14235 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a)
14236 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
14237 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
14238 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
14239 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
14240 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
14241 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
14242 // CHECK:   ret %struct.uint8x8x2_t [[TMP4]]
test_vld1_u8_x2(uint8_t const * a)14243 uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) {
14244   return vld1_u8_x2(a);
14245 }
14246 
14247 // CHECK-LABEL: define %struct.uint16x4x2_t @test_vld1_u16_x2(i16* %a) #0 {
14248 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
14249 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
14250 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
14251 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14252 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14253 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
14254 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
14255 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
14256 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
14257 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
14258 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14259 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
14260 // CHECK:   ret %struct.uint16x4x2_t [[TMP6]]
test_vld1_u16_x2(uint16_t const * a)14261 uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) {
14262   return vld1_u16_x2(a);
14263 }
14264 
14265 // CHECK-LABEL: define %struct.uint32x2x2_t @test_vld1_u32_x2(i32* %a) #0 {
14266 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
14267 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
14268 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
14269 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14270 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14271 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* [[TMP2]])
14272 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
14273 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]]
14274 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
14275 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
14276 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14277 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
14278 // CHECK:   ret %struct.uint32x2x2_t [[TMP6]]
test_vld1_u32_x2(uint32_t const * a)14279 uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) {
14280   return vld1_u32_x2(a);
14281 }
14282 
14283 // CHECK-LABEL: define %struct.uint64x1x2_t @test_vld1_u64_x2(i64* %a) #0 {
14284 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
14285 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
14286 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
14287 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14288 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14289 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
14290 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
14291 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
14292 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
14293 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
14294 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14295 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
14296 // CHECK:   ret %struct.uint64x1x2_t [[TMP6]]
test_vld1_u64_x2(uint64_t const * a)14297 uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) {
14298   return vld1_u64_x2(a);
14299 }
14300 
14301 // CHECK-LABEL: define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a) #0 {
14302 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
14303 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
14304 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
14305 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a)
14306 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
14307 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
14308 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
14309 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
14310 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
14311 // CHECK:   [[TMP4:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
14312 // CHECK:   ret %struct.int8x8x2_t [[TMP4]]
test_vld1_s8_x2(int8_t const * a)14313 int8x8x2_t test_vld1_s8_x2(int8_t const *a) {
14314   return vld1_s8_x2(a);
14315 }
14316 
14317 // CHECK-LABEL: define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a) #0 {
14318 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
14319 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
14320 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
14321 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14322 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14323 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
14324 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
14325 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
14326 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
14327 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
14328 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14329 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
14330 // CHECK:   ret %struct.int16x4x2_t [[TMP6]]
test_vld1_s16_x2(int16_t const * a)14331 int16x4x2_t test_vld1_s16_x2(int16_t const *a) {
14332   return vld1_s16_x2(a);
14333 }
14334 
14335 // CHECK-LABEL: define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a) #0 {
14336 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
14337 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
14338 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
14339 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14340 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14341 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* [[TMP2]])
14342 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
14343 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]]
14344 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
14345 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
14346 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14347 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
14348 // CHECK:   ret %struct.int32x2x2_t [[TMP6]]
test_vld1_s32_x2(int32_t const * a)14349 int32x2x2_t test_vld1_s32_x2(int32_t const *a) {
14350   return vld1_s32_x2(a);
14351 }
14352 
14353 // CHECK-LABEL: define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a) #0 {
14354 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
14355 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
14356 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
14357 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14358 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14359 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
14360 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
14361 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
14362 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
14363 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
14364 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14365 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
14366 // CHECK:   ret %struct.int64x1x2_t [[TMP6]]
test_vld1_s64_x2(int64_t const * a)14367 int64x1x2_t test_vld1_s64_x2(int64_t const *a) {
14368   return vld1_s64_x2(a);
14369 }
14370 
14371 // CHECK-LABEL: define %struct.float16x4x2_t @test_vld1_f16_x2(half* %a) #0 {
14372 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
14373 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
14374 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
14375 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
14376 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14377 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
14378 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
14379 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
14380 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
14381 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
14382 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14383 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
14384 // CHECK:   ret %struct.float16x4x2_t [[TMP6]]
test_vld1_f16_x2(float16_t const * a)14385 float16x4x2_t test_vld1_f16_x2(float16_t const *a) {
14386   return vld1_f16_x2(a);
14387 }
14388 
14389 // CHECK-LABEL: define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a) #0 {
14390 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
14391 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
14392 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
14393 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
14394 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
14395 // CHECK:   [[VLD1XN:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* [[TMP2]])
14396 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
14397 // CHECK:   store { <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float> }* [[TMP3]]
14398 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
14399 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
14400 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14401 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
14402 // CHECK:   ret %struct.float32x2x2_t [[TMP6]]
test_vld1_f32_x2(float32_t const * a)14403 float32x2x2_t test_vld1_f32_x2(float32_t const *a) {
14404   return vld1_f32_x2(a);
14405 }
14406 
14407 // CHECK-LABEL: define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a) #0 {
14408 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
14409 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
14410 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
14411 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
14412 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
14413 // CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* [[TMP2]])
14414 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
14415 // CHECK:   store { <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double> }* [[TMP3]]
14416 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
14417 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
14418 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14419 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
14420 // CHECK:   ret %struct.float64x1x2_t [[TMP6]]
test_vld1_f64_x2(float64_t const * a)14421 float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
14422   return vld1_f64_x2(a);
14423 }
14424 
14425 // CHECK-LABEL: define %struct.poly8x8x2_t @test_vld1_p8_x2(i8* %a) #0 {
14426 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
14427 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
14428 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
14429 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a)
14430 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
14431 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
14432 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
14433 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
14434 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
14435 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
14436 // CHECK:   ret %struct.poly8x8x2_t [[TMP4]]
test_vld1_p8_x2(poly8_t const * a)14437 poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) {
14438   return vld1_p8_x2(a);
14439 }
14440 
14441 // CHECK-LABEL: define %struct.poly16x4x2_t @test_vld1_p16_x2(i16* %a) #0 {
14442 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
14443 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
14444 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
14445 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14446 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14447 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
14448 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
14449 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
14450 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
14451 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
14452 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14453 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
14454 // CHECK:   ret %struct.poly16x4x2_t [[TMP6]]
test_vld1_p16_x2(poly16_t const * a)14455 poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) {
14456   return vld1_p16_x2(a);
14457 }
14458 
14459 // CHECK-LABEL: define %struct.poly64x1x2_t @test_vld1_p64_x2(i64* %a) #0 {
14460 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
14461 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
14462 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
14463 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14464 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14465 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
14466 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
14467 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
14468 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8*
14469 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
14470 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14471 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8
14472 // CHECK:   ret %struct.poly64x1x2_t [[TMP6]]
test_vld1_p64_x2(poly64_t const * a)14473 poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
14474   return vld1_p64_x2(a);
14475 }
14476 
14477 // CHECK-LABEL: define %struct.uint8x16x3_t @test_vld1q_u8_x3(i8* %a) #0 {
14478 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
14479 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
14480 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
14481 // CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a)
14482 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
14483 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
14484 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
14485 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
14486 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
14487 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
14488 // CHECK:   ret %struct.uint8x16x3_t [[TMP4]]
test_vld1q_u8_x3(uint8_t const * a)14489 uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) {
14490   return vld1q_u8_x3(a);
14491 }
14492 
14493 // CHECK-LABEL: define %struct.uint16x8x3_t @test_vld1q_u16_x3(i16* %a) #0 {
14494 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
14495 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
14496 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
14497 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14498 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14499 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
14500 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
14501 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
14502 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
14503 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
14504 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14505 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
14506 // CHECK:   ret %struct.uint16x8x3_t [[TMP6]]
test_vld1q_u16_x3(uint16_t const * a)14507 uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) {
14508   return vld1q_u16_x3(a);
14509 }
14510 
14511 // CHECK-LABEL: define %struct.uint32x4x3_t @test_vld1q_u32_x3(i32* %a) #0 {
14512 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
14513 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
14514 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
14515 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14516 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14517 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* [[TMP2]])
14518 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
14519 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
14520 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
14521 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
14522 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14523 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
14524 // CHECK:   ret %struct.uint32x4x3_t [[TMP6]]
test_vld1q_u32_x3(uint32_t const * a)14525 uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) {
14526   return vld1q_u32_x3(a);
14527 }
14528 
14529 // CHECK-LABEL: define %struct.uint64x2x3_t @test_vld1q_u64_x3(i64* %a) #0 {
14530 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
14531 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
14532 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
14533 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14534 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14535 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
14536 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
14537 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
14538 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
14539 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
14540 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14541 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
14542 // CHECK:   ret %struct.uint64x2x3_t [[TMP6]]
test_vld1q_u64_x3(uint64_t const * a)14543 uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) {
14544   return vld1q_u64_x3(a);
14545 }
14546 
14547 // CHECK-LABEL: define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a) #0 {
14548 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
14549 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
14550 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
14551 // CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a)
14552 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
14553 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
14554 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
14555 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
14556 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
14557 // CHECK:   [[TMP4:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
14558 // CHECK:   ret %struct.int8x16x3_t [[TMP4]]
test_vld1q_s8_x3(int8_t const * a)14559 int8x16x3_t test_vld1q_s8_x3(int8_t const *a) {
14560   return vld1q_s8_x3(a);
14561 }
14562 
14563 // CHECK-LABEL: define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a) #0 {
14564 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
14565 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
14566 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
14567 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14568 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14569 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
14570 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
14571 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
14572 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
14573 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
14574 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14575 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
14576 // CHECK:   ret %struct.int16x8x3_t [[TMP6]]
test_vld1q_s16_x3(int16_t const * a)14577 int16x8x3_t test_vld1q_s16_x3(int16_t const *a) {
14578   return vld1q_s16_x3(a);
14579 }
14580 
14581 // CHECK-LABEL: define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a) #0 {
14582 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
14583 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
14584 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
14585 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14586 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14587 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* [[TMP2]])
14588 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
14589 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
14590 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
14591 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
14592 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14593 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
14594 // CHECK:   ret %struct.int32x4x3_t [[TMP6]]
test_vld1q_s32_x3(int32_t const * a)14595 int32x4x3_t test_vld1q_s32_x3(int32_t const *a) {
14596   return vld1q_s32_x3(a);
14597 }
14598 
14599 // CHECK-LABEL: define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a) #0 {
14600 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
14601 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
14602 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
14603 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14604 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14605 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
14606 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
14607 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
14608 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
14609 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
14610 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14611 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
14612 // CHECK:   ret %struct.int64x2x3_t [[TMP6]]
test_vld1q_s64_x3(int64_t const * a)14613 int64x2x3_t test_vld1q_s64_x3(int64_t const *a) {
14614   return vld1q_s64_x3(a);
14615 }
14616 
14617 // CHECK-LABEL: define %struct.float16x8x3_t @test_vld1q_f16_x3(half* %a) #0 {
14618 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
14619 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
14620 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
14621 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
14622 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14623 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
14624 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
14625 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
14626 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
14627 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
14628 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14629 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
14630 // CHECK:   ret %struct.float16x8x3_t [[TMP6]]
test_vld1q_f16_x3(float16_t const * a)14631 float16x8x3_t test_vld1q_f16_x3(float16_t const *a) {
14632   return vld1q_f16_x3(a);
14633 }
14634 
14635 // CHECK-LABEL: define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a) #0 {
14636 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
14637 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
14638 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
14639 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
14640 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
14641 // CHECK:   [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* [[TMP2]])
14642 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
14643 // CHECK:   store { <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
14644 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
14645 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
14646 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14647 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
14648 // CHECK:   ret %struct.float32x4x3_t [[TMP6]]
test_vld1q_f32_x3(float32_t const * a)14649 float32x4x3_t test_vld1q_f32_x3(float32_t const *a) {
14650   return vld1q_f32_x3(a);
14651 }
14652 
14653 // CHECK-LABEL: define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a) #0 {
14654 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
14655 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
14656 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
14657 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
14658 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
14659 // CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* [[TMP2]])
14660 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
14661 // CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
14662 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
14663 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
14664 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14665 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
14666 // CHECK:   ret %struct.float64x2x3_t [[TMP6]]
test_vld1q_f64_x3(float64_t const * a)14667 float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
14668   return vld1q_f64_x3(a);
14669 }
14670 
14671 // CHECK-LABEL: define %struct.poly8x16x3_t @test_vld1q_p8_x3(i8* %a) #0 {
14672 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
14673 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
14674 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
14675 // CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a)
14676 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
14677 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
14678 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
14679 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
14680 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
14681 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
14682 // CHECK:   ret %struct.poly8x16x3_t [[TMP4]]
test_vld1q_p8_x3(poly8_t const * a)14683 poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) {
14684   return vld1q_p8_x3(a);
14685 }
14686 
14687 // CHECK-LABEL: define %struct.poly16x8x3_t @test_vld1q_p16_x3(i16* %a) #0 {
14688 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
14689 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
14690 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
14691 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14692 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14693 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
14694 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
14695 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
14696 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
14697 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
14698 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14699 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
14700 // CHECK:   ret %struct.poly16x8x3_t [[TMP6]]
test_vld1q_p16_x3(poly16_t const * a)14701 poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) {
14702   return vld1q_p16_x3(a);
14703 }
14704 
14705 // CHECK-LABEL: define %struct.poly64x2x3_t @test_vld1q_p64_x3(i64* %a) #0 {
14706 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
14707 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
14708 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
14709 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14710 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14711 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
14712 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
14713 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
14714 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8*
14715 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
14716 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14717 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16
14718 // CHECK:   ret %struct.poly64x2x3_t [[TMP6]]
test_vld1q_p64_x3(poly64_t const * a)14719 poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
14720   return vld1q_p64_x3(a);
14721 }
14722 
14723 // CHECK-LABEL: define %struct.uint8x8x3_t @test_vld1_u8_x3(i8* %a) #0 {
14724 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
14725 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
14726 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
14727 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a)
14728 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
14729 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
14730 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
14731 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
14732 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
14733 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
14734 // CHECK:   ret %struct.uint8x8x3_t [[TMP4]]
test_vld1_u8_x3(uint8_t const * a)14735 uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) {
14736   return vld1_u8_x3(a);
14737 }
14738 
14739 // CHECK-LABEL: define %struct.uint16x4x3_t @test_vld1_u16_x3(i16* %a) #0 {
14740 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
14741 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
14742 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
14743 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14744 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14745 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
14746 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
14747 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
14748 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
14749 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
14750 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14751 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
14752 // CHECK:   ret %struct.uint16x4x3_t [[TMP6]]
test_vld1_u16_x3(uint16_t const * a)14753 uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) {
14754   return vld1_u16_x3(a);
14755 }
14756 
14757 // CHECK-LABEL: define %struct.uint32x2x3_t @test_vld1_u32_x3(i32* %a) #0 {
14758 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
14759 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
14760 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
14761 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14762 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14763 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* [[TMP2]])
14764 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
14765 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
14766 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
14767 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
14768 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14769 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
14770 // CHECK:   ret %struct.uint32x2x3_t [[TMP6]]
test_vld1_u32_x3(uint32_t const * a)14771 uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) {
14772   return vld1_u32_x3(a);
14773 }
14774 
14775 // CHECK-LABEL: define %struct.uint64x1x3_t @test_vld1_u64_x3(i64* %a) #0 {
14776 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
14777 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
14778 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
14779 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14780 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14781 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
14782 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
14783 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
14784 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
14785 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
14786 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14787 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
14788 // CHECK:   ret %struct.uint64x1x3_t [[TMP6]]
test_vld1_u64_x3(uint64_t const * a)14789 uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) {
14790   return vld1_u64_x3(a);
14791 }
14792 
14793 // CHECK-LABEL: define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a) #0 {
14794 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
14795 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
14796 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
14797 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a)
14798 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
14799 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
14800 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
14801 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
14802 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
14803 // CHECK:   [[TMP4:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
14804 // CHECK:   ret %struct.int8x8x3_t [[TMP4]]
test_vld1_s8_x3(int8_t const * a)14805 int8x8x3_t test_vld1_s8_x3(int8_t const *a) {
14806   return vld1_s8_x3(a);
14807 }
14808 
14809 // CHECK-LABEL: define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a) #0 {
14810 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
14811 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
14812 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
14813 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14814 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14815 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
14816 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
14817 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
14818 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
14819 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
14820 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14821 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
14822 // CHECK:   ret %struct.int16x4x3_t [[TMP6]]
test_vld1_s16_x3(int16_t const * a)14823 int16x4x3_t test_vld1_s16_x3(int16_t const *a) {
14824   return vld1_s16_x3(a);
14825 }
14826 
14827 // CHECK-LABEL: define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a) #0 {
14828 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
14829 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
14830 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
14831 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14832 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14833 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* [[TMP2]])
14834 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
14835 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
14836 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
14837 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
14838 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14839 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
14840 // CHECK:   ret %struct.int32x2x3_t [[TMP6]]
test_vld1_s32_x3(int32_t const * a)14841 int32x2x3_t test_vld1_s32_x3(int32_t const *a) {
14842   return vld1_s32_x3(a);
14843 }
14844 
14845 // CHECK-LABEL: define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a) #0 {
14846 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
14847 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
14848 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
14849 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14850 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14851 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
14852 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
14853 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
14854 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
14855 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
14856 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14857 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
14858 // CHECK:   ret %struct.int64x1x3_t [[TMP6]]
test_vld1_s64_x3(int64_t const * a)14859 int64x1x3_t test_vld1_s64_x3(int64_t const *a) {
14860   return vld1_s64_x3(a);
14861 }
14862 
14863 // CHECK-LABEL: define %struct.float16x4x3_t @test_vld1_f16_x3(half* %a) #0 {
14864 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
14865 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
14866 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
14867 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
14868 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14869 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
14870 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
14871 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
14872 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
14873 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
14874 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14875 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
14876 // CHECK:   ret %struct.float16x4x3_t [[TMP6]]
test_vld1_f16_x3(float16_t const * a)14877 float16x4x3_t test_vld1_f16_x3(float16_t const *a) {
14878   return vld1_f16_x3(a);
14879 }
14880 
14881 // CHECK-LABEL: define %struct.float32x2x3_t @test_vld1_f32_x3(float* %a) #0 {
14882 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
14883 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
14884 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
14885 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
14886 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
14887 // CHECK:   [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* [[TMP2]])
14888 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
14889 // CHECK:   store { <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
14890 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
14891 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
14892 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14893 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
14894 // CHECK:   ret %struct.float32x2x3_t [[TMP6]]
test_vld1_f32_x3(float32_t const * a)14895 float32x2x3_t test_vld1_f32_x3(float32_t const *a) {
14896   return vld1_f32_x3(a);
14897 }
14898 
14899 // CHECK-LABEL: define %struct.float64x1x3_t @test_vld1_f64_x3(double* %a) #0 {
14900 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
14901 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
14902 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
14903 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
14904 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
14905 // CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* [[TMP2]])
14906 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
14907 // CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
14908 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
14909 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
14910 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14911 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
14912 // CHECK:   ret %struct.float64x1x3_t [[TMP6]]
test_vld1_f64_x3(float64_t const * a)14913 float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
14914   return vld1_f64_x3(a);
14915 }
14916 
14917 // CHECK-LABEL: define %struct.poly8x8x3_t @test_vld1_p8_x3(i8* %a) #0 {
14918 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
14919 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
14920 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
14921 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a)
14922 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
14923 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
14924 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
14925 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
14926 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
14927 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
14928 // CHECK:   ret %struct.poly8x8x3_t [[TMP4]]
test_vld1_p8_x3(poly8_t const * a)14929 poly8x8x3_t test_vld1_p8_x3(poly8_t const *a) {
14930   return vld1_p8_x3(a);
14931 }
14932 
14933 // CHECK-LABEL: define %struct.poly16x4x3_t @test_vld1_p16_x3(i16* %a) #0 {
14934 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
14935 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
14936 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
14937 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14938 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14939 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
14940 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
14941 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
14942 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
14943 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
14944 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14945 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
14946 // CHECK:   ret %struct.poly16x4x3_t [[TMP6]]
test_vld1_p16_x3(poly16_t const * a)14947 poly16x4x3_t test_vld1_p16_x3(poly16_t const *a) {
14948   return vld1_p16_x3(a);
14949 }
14950 
14951 // CHECK-LABEL: define %struct.poly64x1x3_t @test_vld1_p64_x3(i64* %a) #0 {
14952 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
14953 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
14954 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
14955 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14956 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14957 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
14958 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
14959 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
14960 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8*
14961 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
14962 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14963 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8
14964 // CHECK:   ret %struct.poly64x1x3_t [[TMP6]]
test_vld1_p64_x3(poly64_t const * a)14965 poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) {
14966   return vld1_p64_x3(a);
14967 }
14968 
14969 // CHECK-LABEL: define %struct.uint8x16x4_t @test_vld1q_u8_x4(i8* %a) #0 {
14970 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
14971 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
14972 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
14973 // CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %a)
14974 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
14975 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
14976 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
14977 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
14978 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false)
14979 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
14980 // CHECK:   ret %struct.uint8x16x4_t [[TMP4]]
test_vld1q_u8_x4(uint8_t const * a)14981 uint8x16x4_t test_vld1q_u8_x4(uint8_t const *a) {
14982   return vld1q_u8_x4(a);
14983 }
14984 
14985 // CHECK-LABEL: define %struct.uint16x8x4_t @test_vld1q_u16_x4(i16* %a) #0 {
14986 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
14987 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
14988 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
14989 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14990 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14991 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]])
14992 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
14993 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
14994 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
14995 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
14996 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
14997 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
14998 // CHECK:   ret %struct.uint16x8x4_t [[TMP6]]
test_vld1q_u16_x4(uint16_t const * a)14999 uint16x8x4_t test_vld1q_u16_x4(uint16_t const *a) {
15000   return vld1q_u16_x4(a);
15001 }
15002 
15003 // CHECK-LABEL: define %struct.uint32x4x4_t @test_vld1q_u32_x4(i32* %a) #0 {
15004 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
15005 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
15006 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
15007 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
15008 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
15009 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* [[TMP2]])
15010 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
15011 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
15012 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
15013 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
15014 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15015 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
15016 // CHECK:   ret %struct.uint32x4x4_t [[TMP6]]
test_vld1q_u32_x4(uint32_t const * a)15017 uint32x4x4_t test_vld1q_u32_x4(uint32_t const *a) {
15018   return vld1q_u32_x4(a);
15019 }
15020 
15021 // CHECK-LABEL: define %struct.uint64x2x4_t @test_vld1q_u64_x4(i64* %a) #0 {
15022 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
15023 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
15024 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
15025 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
15026 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15027 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]])
15028 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
15029 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
15030 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
15031 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
15032 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15033 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
15034 // CHECK:   ret %struct.uint64x2x4_t [[TMP6]]
test_vld1q_u64_x4(uint64_t const * a)15035 uint64x2x4_t test_vld1q_u64_x4(uint64_t const *a) {
15036   return vld1q_u64_x4(a);
15037 }
15038 
15039 // CHECK-LABEL: define %struct.int8x16x4_t @test_vld1q_s8_x4(i8* %a) #0 {
15040 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
15041 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
15042 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
15043 // CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %a)
15044 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
15045 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
15046 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
15047 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
15048 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false)
15049 // CHECK:   [[TMP4:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
15050 // CHECK:   ret %struct.int8x16x4_t [[TMP4]]
test_vld1q_s8_x4(int8_t const * a)15051 int8x16x4_t test_vld1q_s8_x4(int8_t const *a) {
15052   return vld1q_s8_x4(a);
15053 }
15054 
15055 // CHECK-LABEL: define %struct.int16x8x4_t @test_vld1q_s16_x4(i16* %a) #0 {
15056 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
15057 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
15058 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
15059 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
15060 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15061 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]])
15062 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
15063 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
15064 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
15065 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
15066 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15067 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
15068 // CHECK:   ret %struct.int16x8x4_t [[TMP6]]
test_vld1q_s16_x4(int16_t const * a)15069 int16x8x4_t test_vld1q_s16_x4(int16_t const *a) {
15070   return vld1q_s16_x4(a);
15071 }
15072 
15073 // CHECK-LABEL: define %struct.int32x4x4_t @test_vld1q_s32_x4(i32* %a) #0 {
15074 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
15075 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
15076 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
15077 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
15078 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
15079 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* [[TMP2]])
15080 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
15081 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
15082 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
15083 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
15084 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15085 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
15086 // CHECK:   ret %struct.int32x4x4_t [[TMP6]]
test_vld1q_s32_x4(int32_t const * a)15087 int32x4x4_t test_vld1q_s32_x4(int32_t const *a) {
15088   return vld1q_s32_x4(a);
15089 }
15090 
15091 // CHECK-LABEL: define %struct.int64x2x4_t @test_vld1q_s64_x4(i64* %a) #0 {
15092 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
15093 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
15094 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
15095 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
15096 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15097 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]])
15098 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
15099 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
15100 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
15101 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
15102 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15103 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
15104 // CHECK:   ret %struct.int64x2x4_t [[TMP6]]
test_vld1q_s64_x4(int64_t const * a)15105 int64x2x4_t test_vld1q_s64_x4(int64_t const *a) {
15106   return vld1q_s64_x4(a);
15107 }
15108 
15109 // CHECK-LABEL: define %struct.float16x8x4_t @test_vld1q_f16_x4(half* %a) #0 {
15110 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
15111 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
15112 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
15113 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
15114 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15115 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]])
15116 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
15117 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
15118 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
15119 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
15120 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15121 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
15122 // CHECK:   ret %struct.float16x8x4_t [[TMP6]]
test_vld1q_f16_x4(float16_t const * a)15123 float16x8x4_t test_vld1q_f16_x4(float16_t const *a) {
15124   return vld1q_f16_x4(a);
15125 }
15126 
15127 // CHECK-LABEL: define %struct.float32x4x4_t @test_vld1q_f32_x4(float* %a) #0 {
15128 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
15129 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
15130 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
15131 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
15132 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
15133 // CHECK:   [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* [[TMP2]])
15134 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
15135 // CHECK:   store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
15136 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
15137 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
15138 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15139 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
15140 // CHECK:   ret %struct.float32x4x4_t [[TMP6]]
test_vld1q_f32_x4(float32_t const * a)15141 float32x4x4_t test_vld1q_f32_x4(float32_t const *a) {
15142   return vld1q_f32_x4(a);
15143 }
15144 
15145 // CHECK-LABEL: define %struct.float64x2x4_t @test_vld1q_f64_x4(double* %a) #0 {
15146 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
15147 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
15148 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
15149 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
15150 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
15151 // CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* [[TMP2]])
15152 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
15153 // CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
15154 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
15155 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
15156 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15157 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
15158 // CHECK:   ret %struct.float64x2x4_t [[TMP6]]
test_vld1q_f64_x4(float64_t const * a)15159 float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
15160   return vld1q_f64_x4(a);
15161 }
15162 
15163 // CHECK-LABEL: define %struct.poly8x16x4_t @test_vld1q_p8_x4(i8* %a) #0 {
15164 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
15165 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
15166 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
15167 // CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %a)
15168 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
15169 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
15170 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
15171 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
15172 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false)
15173 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
15174 // CHECK:   ret %struct.poly8x16x4_t [[TMP4]]
test_vld1q_p8_x4(poly8_t const * a)15175 poly8x16x4_t test_vld1q_p8_x4(poly8_t const *a) {
15176   return vld1q_p8_x4(a);
15177 }
15178 
15179 // CHECK-LABEL: define %struct.poly16x8x4_t @test_vld1q_p16_x4(i16* %a) #0 {
15180 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
15181 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
15182 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
15183 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
15184 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15185 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]])
15186 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
15187 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
15188 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
15189 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
15190 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15191 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
15192 // CHECK:   ret %struct.poly16x8x4_t [[TMP6]]
test_vld1q_p16_x4(poly16_t const * a)15193 poly16x8x4_t test_vld1q_p16_x4(poly16_t const *a) {
15194   return vld1q_p16_x4(a);
15195 }
15196 
15197 // CHECK-LABEL: define %struct.poly64x2x4_t @test_vld1q_p64_x4(i64* %a) #0 {
15198 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
15199 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
15200 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
15201 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
15202 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15203 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]])
15204 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
15205 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
15206 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8*
15207 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
15208 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15209 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16
15210 // CHECK:   ret %struct.poly64x2x4_t [[TMP6]]
test_vld1q_p64_x4(poly64_t const * a)15211 poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) {
15212   return vld1q_p64_x4(a);
15213 }
15214 
15215 // CHECK-LABEL: define %struct.uint8x8x4_t @test_vld1_u8_x4(i8* %a) #0 {
15216 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
15217 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
15218 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
15219 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %a)
15220 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
15221 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
15222 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
15223 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
15224 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
15225 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
15226 // CHECK:   ret %struct.uint8x8x4_t [[TMP4]]
test_vld1_u8_x4(uint8_t const * a)15227 uint8x8x4_t test_vld1_u8_x4(uint8_t const *a) {
15228   return vld1_u8_x4(a);
15229 }
15230 
15231 // CHECK-LABEL: define %struct.uint16x4x4_t @test_vld1_u16_x4(i16* %a) #0 {
15232 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
15233 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
15234 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
15235 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
15236 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15237 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]])
15238 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
15239 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
15240 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
15241 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
15242 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15243 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
15244 // CHECK:   ret %struct.uint16x4x4_t [[TMP6]]
test_vld1_u16_x4(uint16_t const * a)15245 uint16x4x4_t test_vld1_u16_x4(uint16_t const *a) {
15246   return vld1_u16_x4(a);
15247 }
15248 
15249 // CHECK-LABEL: define %struct.uint32x2x4_t @test_vld1_u32_x4(i32* %a) #0 {
15250 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
15251 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
15252 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
15253 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
15254 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
15255 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* [[TMP2]])
15256 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
15257 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
15258 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
15259 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
15260 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15261 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
15262 // CHECK:   ret %struct.uint32x2x4_t [[TMP6]]
test_vld1_u32_x4(uint32_t const * a)15263 uint32x2x4_t test_vld1_u32_x4(uint32_t const *a) {
15264   return vld1_u32_x4(a);
15265 }
15266 
15267 // CHECK-LABEL: define %struct.uint64x1x4_t @test_vld1_u64_x4(i64* %a) #0 {
15268 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
15269 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
15270 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
15271 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
15272 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15273 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]])
15274 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
15275 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
15276 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
15277 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
15278 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15279 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
15280 // CHECK:   ret %struct.uint64x1x4_t [[TMP6]]
test_vld1_u64_x4(uint64_t const * a)15281 uint64x1x4_t test_vld1_u64_x4(uint64_t const *a) {
15282   return vld1_u64_x4(a);
15283 }
15284 
15285 // CHECK-LABEL: define %struct.int8x8x4_t @test_vld1_s8_x4(i8* %a) #0 {
15286 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
15287 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
15288 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
15289 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %a)
15290 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
15291 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
15292 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
15293 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
15294 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
15295 // CHECK:   [[TMP4:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
15296 // CHECK:   ret %struct.int8x8x4_t [[TMP4]]
test_vld1_s8_x4(int8_t const * a)15297 int8x8x4_t test_vld1_s8_x4(int8_t const *a) {
15298   return vld1_s8_x4(a);
15299 }
15300 
15301 // CHECK-LABEL: define %struct.int16x4x4_t @test_vld1_s16_x4(i16* %a) #0 {
15302 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
15303 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
15304 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
15305 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
15306 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15307 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]])
15308 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
15309 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
15310 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
15311 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
15312 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15313 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
15314 // CHECK:   ret %struct.int16x4x4_t [[TMP6]]
test_vld1_s16_x4(int16_t const * a)15315 int16x4x4_t test_vld1_s16_x4(int16_t const *a) {
15316   return vld1_s16_x4(a);
15317 }
15318 
15319 // CHECK-LABEL: define %struct.int32x2x4_t @test_vld1_s32_x4(i32* %a) #0 {
15320 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
15321 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
15322 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
15323 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
15324 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
15325 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* [[TMP2]])
15326 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
15327 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
15328 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
15329 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
15330 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15331 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
15332 // CHECK:   ret %struct.int32x2x4_t [[TMP6]]
test_vld1_s32_x4(int32_t const * a)15333 int32x2x4_t test_vld1_s32_x4(int32_t const *a) {
15334   return vld1_s32_x4(a);
15335 }
15336 
15337 // CHECK-LABEL: define %struct.int64x1x4_t @test_vld1_s64_x4(i64* %a) #0 {
15338 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
15339 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
15340 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
15341 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
15342 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15343 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]])
15344 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
15345 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
15346 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
15347 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
15348 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15349 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
15350 // CHECK:   ret %struct.int64x1x4_t [[TMP6]]
test_vld1_s64_x4(int64_t const * a)15351 int64x1x4_t test_vld1_s64_x4(int64_t const *a) {
15352   return vld1_s64_x4(a);
15353 }
15354 
15355 // CHECK-LABEL: define %struct.float16x4x4_t @test_vld1_f16_x4(half* %a) #0 {
15356 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
15357 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
15358 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
15359 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
15360 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15361 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]])
15362 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
15363 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
15364 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
15365 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
15366 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15367 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
15368 // CHECK:   ret %struct.float16x4x4_t [[TMP6]]
test_vld1_f16_x4(float16_t const * a)15369 float16x4x4_t test_vld1_f16_x4(float16_t const *a) {
15370   return vld1_f16_x4(a);
15371 }
15372 
15373 // CHECK-LABEL: define %struct.float32x2x4_t @test_vld1_f32_x4(float* %a) #0 {
15374 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
15375 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
15376 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
15377 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
15378 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
15379 // CHECK:   [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* [[TMP2]])
15380 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
15381 // CHECK:   store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
15382 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
15383 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
15384 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15385 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
15386 // CHECK:   ret %struct.float32x2x4_t [[TMP6]]
test_vld1_f32_x4(float32_t const * a)15387 float32x2x4_t test_vld1_f32_x4(float32_t const *a) {
15388   return vld1_f32_x4(a);
15389 }
15390 
15391 // CHECK-LABEL: define %struct.float64x1x4_t @test_vld1_f64_x4(double* %a) #0 {
15392 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
15393 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
15394 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
15395 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
15396 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
15397 // CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* [[TMP2]])
15398 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
15399 // CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
15400 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
15401 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
15402 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15403 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
15404 // CHECK:   ret %struct.float64x1x4_t [[TMP6]]
test_vld1_f64_x4(float64_t const * a)15405 float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
15406   return vld1_f64_x4(a);
15407 }
15408 
15409 // CHECK-LABEL: define %struct.poly8x8x4_t @test_vld1_p8_x4(i8* %a) #0 {
15410 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
15411 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
15412 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
15413 // CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %a)
15414 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
15415 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
15416 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
15417 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
15418 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
15419 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
15420 // CHECK:   ret %struct.poly8x8x4_t [[TMP4]]
test_vld1_p8_x4(poly8_t const * a)15421 poly8x8x4_t test_vld1_p8_x4(poly8_t const *a) {
15422   return vld1_p8_x4(a);
15423 }
15424 
15425 // CHECK-LABEL: define %struct.poly16x4x4_t @test_vld1_p16_x4(i16* %a) #0 {
15426 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
15427 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
15428 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
15429 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
15430 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15431 // CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]])
15432 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
15433 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
15434 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
15435 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
15436 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15437 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
15438 // CHECK:   ret %struct.poly16x4x4_t [[TMP6]]
test_vld1_p16_x4(poly16_t const * a)15439 poly16x4x4_t test_vld1_p16_x4(poly16_t const *a) {
15440   return vld1_p16_x4(a);
15441 }
15442 
15443 // CHECK-LABEL: define %struct.poly64x1x4_t @test_vld1_p64_x4(i64* %a) #0 {
15444 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
15445 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
15446 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
15447 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
15448 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15449 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]])
15450 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
15451 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
15452 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8*
15453 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
15454 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15455 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8
15456 // CHECK:   ret %struct.poly64x1x4_t [[TMP6]]
test_vld1_p64_x4(poly64_t const * a)15457 poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
15458   return vld1_p64_x4(a);
15459 }
15460 
15461 // CHECK-LABEL: define void @test_vst1q_u8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
15462 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
15463 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
15464 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
15465 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
15466 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
15467 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
15468 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15469 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
15470 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
15471 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
15472 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
15473 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
15474 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
15475 // CHECK:   call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
15476 // CHECK:   ret void
test_vst1q_u8_x2(uint8_t * a,uint8x16x2_t b)15477 void test_vst1q_u8_x2(uint8_t *a, uint8x16x2_t b) {
15478   vst1q_u8_x2(a, b);
15479 }
15480 
15481 // CHECK-LABEL: define void @test_vst1q_u16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
15482 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
15483 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
15484 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
15485 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
15486 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
15487 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
15488 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15489 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
15490 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
15491 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
15492 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
15493 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
15494 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
15495 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
15496 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
15497 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
15498 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
15499 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
15500 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15501 // CHECK:   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]])
15502 // CHECK:   ret void
test_vst1q_u16_x2(uint16_t * a,uint16x8x2_t b)15503 void test_vst1q_u16_x2(uint16_t *a, uint16x8x2_t b) {
15504   vst1q_u16_x2(a, b);
15505 }
15506 
15507 // CHECK-LABEL: define void @test_vst1q_u32_x2(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
15508 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
15509 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
15510 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
15511 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
15512 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
15513 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
15514 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15515 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
15516 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
15517 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
15518 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
15519 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
15520 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
15521 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
15522 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
15523 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
15524 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
15525 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
15526 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32*
15527 // CHECK:   call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32* [[TMP9]])
15528 // CHECK:   ret void
test_vst1q_u32_x2(uint32_t * a,uint32x4x2_t b)15529 void test_vst1q_u32_x2(uint32_t *a, uint32x4x2_t b) {
15530   vst1q_u32_x2(a, b);
15531 }
15532 
15533 // CHECK-LABEL: define void @test_vst1q_u64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
15534 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
15535 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
15536 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0
15537 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
15538 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
15539 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
15540 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15541 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
15542 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
15543 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
15544 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
15545 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
15546 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
15547 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
15548 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
15549 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
15550 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
15551 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
15552 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
15553 // CHECK:   call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]])
15554 // CHECK:   ret void
test_vst1q_u64_x2(uint64_t * a,uint64x2x2_t b)15555 void test_vst1q_u64_x2(uint64_t *a, uint64x2x2_t b) {
15556   vst1q_u64_x2(a, b);
15557 }
15558 
15559 // CHECK-LABEL: define void @test_vst1q_s8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
15560 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
15561 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
15562 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
15563 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
15564 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
15565 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
15566 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15567 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
15568 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
15569 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
15570 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
15571 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
15572 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
15573 // CHECK:   call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
15574 // CHECK:   ret void
test_vst1q_s8_x2(int8_t * a,int8x16x2_t b)15575 void test_vst1q_s8_x2(int8_t *a, int8x16x2_t b) {
15576   vst1q_s8_x2(a, b);
15577 }
15578 
15579 // CHECK-LABEL: define void @test_vst1q_s16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
15580 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
15581 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
15582 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
15583 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
15584 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
15585 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
15586 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15587 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
15588 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
15589 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
15590 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
15591 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
15592 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
15593 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
15594 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
15595 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
15596 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
15597 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
15598 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15599 // CHECK:   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]])
15600 // CHECK:   ret void
test_vst1q_s16_x2(int16_t * a,int16x8x2_t b)15601 void test_vst1q_s16_x2(int16_t *a, int16x8x2_t b) {
15602   vst1q_s16_x2(a, b);
15603 }
15604 
15605 // CHECK-LABEL: define void @test_vst1q_s32_x2(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
15606 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
15607 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
15608 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
15609 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
15610 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
15611 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
15612 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15613 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
15614 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
15615 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
15616 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
15617 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
15618 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
15619 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
15620 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
15621 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
15622 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
15623 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
15624 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32*
15625 // CHECK:   call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32* [[TMP9]])
15626 // CHECK:   ret void
test_vst1q_s32_x2(int32_t * a,int32x4x2_t b)15627 void test_vst1q_s32_x2(int32_t *a, int32x4x2_t b) {
15628   vst1q_s32_x2(a, b);
15629 }
15630 
15631 // CHECK-LABEL: define void @test_vst1q_s64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
15632 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
15633 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
15634 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0
15635 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
15636 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
15637 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
15638 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15639 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
15640 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
15641 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
15642 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
15643 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
15644 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
15645 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
15646 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
15647 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
15648 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
15649 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
15650 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
15651 // CHECK:   call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]])
15652 // CHECK:   ret void
test_vst1q_s64_x2(int64_t * a,int64x2x2_t b)15653 void test_vst1q_s64_x2(int64_t *a, int64x2x2_t b) {
15654   vst1q_s64_x2(a, b);
15655 }
15656 
15657 // CHECK-LABEL: define void @test_vst1q_f16_x2(half* %a, [2 x <8 x half>] %b.coerce) #0 {
15658 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
15659 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
15660 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
15661 // CHECK:   store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
15662 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
15663 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
15664 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15665 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
15666 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
15667 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
15668 // CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
15669 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
15670 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
15671 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
15672 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
15673 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
15674 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
15675 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
15676 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15677 // CHECK:   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]])
15678 // CHECK:   ret void
test_vst1q_f16_x2(float16_t * a,float16x8x2_t b)15679 void test_vst1q_f16_x2(float16_t *a, float16x8x2_t b) {
15680   vst1q_f16_x2(a, b);
15681 }
15682 
15683 // CHECK-LABEL: define void @test_vst1q_f32_x2(float* %a, [2 x <4 x float>] %b.coerce) #0 {
15684 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
15685 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
15686 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
15687 // CHECK:   store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
15688 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
15689 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
15690 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15691 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
15692 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
15693 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
15694 // CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
15695 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
15696 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
15697 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1
15698 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
15699 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
15700 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
15701 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
15702 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to float*
15703 // CHECK:   call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], float* [[TMP9]])
15704 // CHECK:   ret void
test_vst1q_f32_x2(float32_t * a,float32x4x2_t b)15705 void test_vst1q_f32_x2(float32_t *a, float32x4x2_t b) {
15706   vst1q_f32_x2(a, b);
15707 }
15708 
15709 // CHECK-LABEL: define void @test_vst1q_f64_x2(double* %a, [2 x <2 x double>] %b.coerce) #0 {
15710 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
15711 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
15712 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
15713 // CHECK:   store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
15714 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
15715 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
15716 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15717 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
15718 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
15719 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
15720 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
15721 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
15722 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
15723 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
15724 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
15725 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
15726 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
15727 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
15728 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
15729 // CHECK:   call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], double* [[TMP9]])
15730 // CHECK:   ret void
test_vst1q_f64_x2(float64_t * a,float64x2x2_t b)15731 void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) {
15732   vst1q_f64_x2(a, b);
15733 }
15734 
15735 // CHECK-LABEL: define void @test_vst1q_p8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
15736 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
15737 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
15738 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
15739 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
15740 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
15741 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
15742 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15743 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
15744 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
15745 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
15746 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
15747 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
15748 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
15749 // CHECK:   call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
15750 // CHECK:   ret void
test_vst1q_p8_x2(poly8_t * a,poly8x16x2_t b)15751 void test_vst1q_p8_x2(poly8_t *a, poly8x16x2_t b) {
15752   vst1q_p8_x2(a, b);
15753 }
15754 
15755 // CHECK-LABEL: define void @test_vst1q_p16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
15756 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
15757 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
15758 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
15759 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
15760 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
15761 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
15762 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15763 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
15764 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
15765 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
15766 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
15767 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
15768 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
15769 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
15770 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
15771 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
15772 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
15773 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
15774 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15775 // CHECK:   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]])
15776 // CHECK:   ret void
test_vst1q_p16_x2(poly16_t * a,poly16x8x2_t b)15777 void test_vst1q_p16_x2(poly16_t *a, poly16x8x2_t b) {
15778   vst1q_p16_x2(a, b);
15779 }
15780 
15781 // CHECK-LABEL: define void @test_vst1q_p64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
15782 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
15783 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
15784 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0
15785 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
15786 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8*
15787 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8*
15788 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15789 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
15790 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
15791 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
15792 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
15793 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
15794 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
15795 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
15796 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
15797 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
15798 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
15799 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
15800 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
15801 // CHECK:   call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]])
15802 // CHECK:   ret void
test_vst1q_p64_x2(poly64_t * a,poly64x2x2_t b)15803 void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) {
15804   vst1q_p64_x2(a, b);
15805 }
15806 
15807 // CHECK-LABEL: define void @test_vst1_u8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
15808 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
15809 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
15810 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
15811 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
15812 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
15813 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
15814 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15815 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
15816 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
15817 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
15818 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
15819 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
15820 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
15821 // CHECK:   call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
15822 // CHECK:   ret void
test_vst1_u8_x2(uint8_t * a,uint8x8x2_t b)15823 void test_vst1_u8_x2(uint8_t *a, uint8x8x2_t b) {
15824   vst1_u8_x2(a, b);
15825 }
15826 
15827 // CHECK-LABEL: define void @test_vst1_u16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
15828 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
15829 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
15830 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
15831 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
15832 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
15833 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
15834 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15835 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
15836 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
15837 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
15838 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
15839 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
15840 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
15841 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
15842 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
15843 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
15844 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
15845 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
15846 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15847 // CHECK:   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]])
15848 // CHECK:   ret void
test_vst1_u16_x2(uint16_t * a,uint16x4x2_t b)15849 void test_vst1_u16_x2(uint16_t *a, uint16x4x2_t b) {
15850   vst1_u16_x2(a, b);
15851 }
15852 
15853 // CHECK-LABEL: define void @test_vst1_u32_x2(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
15854 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
15855 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
15856 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
15857 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
15858 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
15859 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
15860 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15861 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
15862 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
15863 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
15864 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
15865 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
15866 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
15867 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
15868 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
15869 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
15870 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
15871 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
15872 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32*
15873 // CHECK:   call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32* [[TMP9]])
15874 // CHECK:   ret void
test_vst1_u32_x2(uint32_t * a,uint32x2x2_t b)15875 void test_vst1_u32_x2(uint32_t *a, uint32x2x2_t b) {
15876   vst1_u32_x2(a, b);
15877 }
15878 
15879 // CHECK-LABEL: define void @test_vst1_u64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
15880 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
15881 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
15882 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
15883 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
15884 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
15885 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
15886 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15887 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
15888 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
15889 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
15890 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
15891 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
15892 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
15893 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
15894 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
15895 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
15896 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
15897 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
15898 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
15899 // CHECK:   call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]])
15900 // CHECK:   ret void
test_vst1_u64_x2(uint64_t * a,uint64x1x2_t b)15901 void test_vst1_u64_x2(uint64_t *a, uint64x1x2_t b) {
15902   vst1_u64_x2(a, b);
15903 }
15904 
15905 // CHECK-LABEL: define void @test_vst1_s8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
15906 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
15907 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
15908 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
15909 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
15910 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
15911 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
15912 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15913 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
15914 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
15915 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
15916 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
15917 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
15918 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
15919 // CHECK:   call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
15920 // CHECK:   ret void
test_vst1_s8_x2(int8_t * a,int8x8x2_t b)15921 void test_vst1_s8_x2(int8_t *a, int8x8x2_t b) {
15922   vst1_s8_x2(a, b);
15923 }
15924 
15925 // CHECK-LABEL: define void @test_vst1_s16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
15926 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
15927 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
15928 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
15929 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
15930 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
15931 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
15932 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15933 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
15934 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
15935 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
15936 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
15937 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
15938 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
15939 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
15940 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
15941 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
15942 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
15943 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
15944 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15945 // CHECK:   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]])
15946 // CHECK:   ret void
test_vst1_s16_x2(int16_t * a,int16x4x2_t b)15947 void test_vst1_s16_x2(int16_t *a, int16x4x2_t b) {
15948   vst1_s16_x2(a, b);
15949 }
15950 
15951 // CHECK-LABEL: define void @test_vst1_s32_x2(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
15952 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
15953 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
15954 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
15955 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
15956 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
15957 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
15958 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15959 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
15960 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
15961 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
15962 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
15963 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
15964 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
15965 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
15966 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
15967 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
15968 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
15969 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
15970 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32*
15971 // CHECK:   call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32* [[TMP9]])
15972 // CHECK:   ret void
test_vst1_s32_x2(int32_t * a,int32x2x2_t b)15973 void test_vst1_s32_x2(int32_t *a, int32x2x2_t b) {
15974   vst1_s32_x2(a, b);
15975 }
15976 
15977 // CHECK-LABEL: define void @test_vst1_s64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
15978 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
15979 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
15980 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
15981 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
15982 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
15983 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
15984 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15985 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
15986 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
15987 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
15988 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
15989 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
15990 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
15991 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
15992 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
15993 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
15994 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
15995 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
15996 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
15997 // CHECK:   call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]])
15998 // CHECK:   ret void
test_vst1_s64_x2(int64_t * a,int64x1x2_t b)15999 void test_vst1_s64_x2(int64_t *a, int64x1x2_t b) {
16000   vst1_s64_x2(a, b);
16001 }
16002 
16003 // CHECK-LABEL: define void @test_vst1_f16_x2(half* %a, [2 x <4 x half>] %b.coerce) #0 {
16004 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
16005 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
16006 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
16007 // CHECK:   store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
16008 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
16009 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
16010 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16011 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
16012 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16013 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
16014 // CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
16015 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
16016 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16017 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
16018 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
16019 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
16020 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16021 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16022 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
16023 // CHECK:   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]])
16024 // CHECK:   ret void
test_vst1_f16_x2(float16_t * a,float16x4x2_t b)16025 void test_vst1_f16_x2(float16_t *a, float16x4x2_t b) {
16026   vst1_f16_x2(a, b);
16027 }
16028 
16029 // CHECK-LABEL: define void @test_vst1_f32_x2(float* %a, [2 x <2 x float>] %b.coerce) #0 {
16030 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
16031 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
16032 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
16033 // CHECK:   store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
16034 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
16035 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
16036 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16037 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
16038 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16039 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
16040 // CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
16041 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
16042 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16043 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1
16044 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
16045 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
16046 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
16047 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
16048 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to float*
16049 // CHECK:   call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], float* [[TMP9]])
16050 // CHECK:   ret void
test_vst1_f32_x2(float32_t * a,float32x2x2_t b)16051 void test_vst1_f32_x2(float32_t *a, float32x2x2_t b) {
16052   vst1_f32_x2(a, b);
16053 }
16054 
16055 // CHECK-LABEL: define void @test_vst1_f64_x2(double* %a, [2 x <1 x double>] %b.coerce) #0 {
16056 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
16057 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
16058 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
16059 // CHECK:   store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
16060 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
16061 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
16062 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16063 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
16064 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
16065 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
16066 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
16067 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
16068 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
16069 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
16070 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
16071 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
16072 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
16073 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
16074 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
16075 // CHECK:   call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], double* [[TMP9]])
16076 // CHECK:   ret void
test_vst1_f64_x2(float64_t * a,float64x1x2_t b)16077 void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) {
16078   vst1_f64_x2(a, b);
16079 }
16080 
16081 // CHECK-LABEL: define void @test_vst1_p8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
16082 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
16083 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
16084 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
16085 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
16086 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
16087 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
16088 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16089 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16090 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
16091 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16092 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16093 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
16094 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16095 // CHECK:   call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
16096 // CHECK:   ret void
test_vst1_p8_x2(poly8_t * a,poly8x8x2_t b)16097 void test_vst1_p8_x2(poly8_t *a, poly8x8x2_t b) {
16098   vst1_p8_x2(a, b);
16099 }
16100 
16101 // CHECK-LABEL: define void @test_vst1_p16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
16102 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
16103 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
16104 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
16105 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
16106 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
16107 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
16108 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16109 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
16110 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16111 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
16112 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16113 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
16114 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16115 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
16116 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16117 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
16118 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16119 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16120 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
16121 // CHECK:   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]])
16122 // CHECK:   ret void
test_vst1_p16_x2(poly16_t * a,poly16x4x2_t b)16123 void test_vst1_p16_x2(poly16_t *a, poly16x4x2_t b) {
16124   vst1_p16_x2(a, b);
16125 }
16126 
16127 // CHECK-LABEL: define void @test_vst1_p64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
16128 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
16129 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
16130 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0
16131 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
16132 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8*
16133 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8*
16134 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16135 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
16136 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
16137 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
16138 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16139 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
16140 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
16141 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
16142 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16143 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
16144 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
16145 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
16146 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
16147 // CHECK:   call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]])
16148 // CHECK:   ret void
test_vst1_p64_x2(poly64_t * a,poly64x1x2_t b)16149 void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) {
16150   vst1_p64_x2(a, b);
16151 }
16152 
16153 // CHECK-LABEL: define void @test_vst1q_u8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
16154 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
16155 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
16156 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
16157 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
16158 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
16159 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
16160 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16161 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16162 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
16163 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16164 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16165 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
16166 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16167 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16168 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
16169 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16170 // CHECK:   call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
16171 // CHECK:   ret void
test_vst1q_u8_x3(uint8_t * a,uint8x16x3_t b)16172 void test_vst1q_u8_x3(uint8_t *a, uint8x16x3_t b) {
16173   vst1q_u8_x3(a, b);
16174 }
16175 
16176 // CHECK-LABEL: define void @test_vst1q_u16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
16177 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
16178 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
16179 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
16180 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
16181 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
16182 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
16183 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16184 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
16185 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
16186 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
16187 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16188 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
16189 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
16190 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
16191 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16192 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
16193 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
16194 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
16195 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
16196 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
16197 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
16198 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
16199 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
16200 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16201 // CHECK:   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]])
16202 // CHECK:   ret void
test_vst1q_u16_x3(uint16_t * a,uint16x8x3_t b)16203 void test_vst1q_u16_x3(uint16_t *a, uint16x8x3_t b) {
16204   vst1q_u16_x3(a, b);
16205 }
16206 
16207 // CHECK-LABEL: define void @test_vst1q_u32_x3(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
16208 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
16209 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
16210 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
16211 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
16212 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
16213 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
16214 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16215 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
16216 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
16217 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
16218 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16219 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
16220 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
16221 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
16222 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16223 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
16224 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
16225 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
16226 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
16227 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
16228 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
16229 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
16230 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
16231 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32*
16232 // CHECK:   call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32* [[TMP12]])
16233 // CHECK:   ret void
test_vst1q_u32_x3(uint32_t * a,uint32x4x3_t b)16234 void test_vst1q_u32_x3(uint32_t *a, uint32x4x3_t b) {
16235   vst1q_u32_x3(a, b);
16236 }
16237 
16238 // CHECK-LABEL: define void @test_vst1q_u64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
16239 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
16240 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
16241 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0
16242 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
16243 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
16244 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
16245 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16246 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
16247 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
16248 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
16249 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
16250 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
16251 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
16252 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
16253 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
16254 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
16255 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
16256 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
16257 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
16258 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
16259 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
16260 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
16261 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
16262 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16263 // CHECK:   call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]])
16264 // CHECK:   ret void
test_vst1q_u64_x3(uint64_t * a,uint64x2x3_t b)16265 void test_vst1q_u64_x3(uint64_t *a, uint64x2x3_t b) {
16266   vst1q_u64_x3(a, b);
16267 }
16268 
16269 // CHECK-LABEL: define void @test_vst1q_s8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
16270 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
16271 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
16272 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
16273 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
16274 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
16275 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
16276 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16277 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
16278 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
16279 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16280 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
16281 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
16282 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16283 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
16284 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
16285 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16286 // CHECK:   call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
16287 // CHECK:   ret void
test_vst1q_s8_x3(int8_t * a,int8x16x3_t b)16288 void test_vst1q_s8_x3(int8_t *a, int8x16x3_t b) {
16289   vst1q_s8_x3(a, b);
16290 }
16291 
16292 // CHECK-LABEL: define void @test_vst1q_s16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
16293 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
16294 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
16295 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
16296 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
16297 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
16298 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
16299 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16300 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
16301 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
16302 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
16303 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16304 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
16305 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
16306 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
16307 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16308 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
16309 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
16310 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
16311 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
16312 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
16313 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
16314 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
16315 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
16316 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16317 // CHECK:   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]])
16318 // CHECK:   ret void
test_vst1q_s16_x3(int16_t * a,int16x8x3_t b)16319 void test_vst1q_s16_x3(int16_t *a, int16x8x3_t b) {
16320   vst1q_s16_x3(a, b);
16321 }
16322 
16323 // CHECK-LABEL: define void @test_vst1q_s32_x3(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
16324 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
16325 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
16326 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
16327 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
16328 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
16329 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
16330 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16331 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
16332 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
16333 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
16334 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16335 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
16336 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
16337 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
16338 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16339 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
16340 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
16341 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
16342 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
16343 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
16344 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
16345 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
16346 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
16347 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32*
16348 // CHECK:   call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32* [[TMP12]])
16349 // CHECK:   ret void
test_vst1q_s32_x3(int32_t * a,int32x4x3_t b)16350 void test_vst1q_s32_x3(int32_t *a, int32x4x3_t b) {
16351   vst1q_s32_x3(a, b);
16352 }
16353 
16354 // CHECK-LABEL: define void @test_vst1q_s64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
16355 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
16356 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
16357 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0
16358 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
16359 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
16360 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
16361 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16362 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
16363 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
16364 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
16365 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
16366 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
16367 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
16368 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
16369 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
16370 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
16371 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
16372 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
16373 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
16374 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
16375 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
16376 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
16377 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
16378 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16379 // CHECK:   call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]])
16380 // CHECK:   ret void
test_vst1q_s64_x3(int64_t * a,int64x2x3_t b)16381 void test_vst1q_s64_x3(int64_t *a, int64x2x3_t b) {
16382   vst1q_s64_x3(a, b);
16383 }
16384 
16385 // CHECK-LABEL: define void @test_vst1q_f16_x3(half* %a, [3 x <8 x half>] %b.coerce) #0 {
16386 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
16387 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
16388 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
16389 // CHECK:   store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
16390 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
16391 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
16392 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16393 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
16394 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
16395 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
16396 // CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
16397 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
16398 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
16399 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1
16400 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
16401 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
16402 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
16403 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
16404 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
16405 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
16406 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
16407 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
16408 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
16409 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16410 // CHECK:   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]])
16411 // CHECK:   ret void
test_vst1q_f16_x3(float16_t * a,float16x8x3_t b)16412 void test_vst1q_f16_x3(float16_t *a, float16x8x3_t b) {
16413   vst1q_f16_x3(a, b);
16414 }
16415 
16416 // CHECK-LABEL: define void @test_vst1q_f32_x3(float* %a, [3 x <4 x float>] %b.coerce) #0 {
16417 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
16418 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
16419 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
16420 // CHECK:   store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
16421 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
16422 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
16423 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16424 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
16425 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
16426 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
16427 // CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
16428 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
16429 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
16430 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1
16431 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
16432 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
16433 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
16434 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2
16435 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
16436 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
16437 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
16438 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
16439 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
16440 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to float*
16441 // CHECK:   call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], float* [[TMP12]])
16442 // CHECK:   ret void
test_vst1q_f32_x3(float32_t * a,float32x4x3_t b)16443 void test_vst1q_f32_x3(float32_t *a, float32x4x3_t b) {
16444   vst1q_f32_x3(a, b);
16445 }
16446 
16447 // CHECK-LABEL: define void @test_vst1q_f64_x3(double* %a, [3 x <2 x double>] %b.coerce) #0 {
16448 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
16449 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
16450 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
16451 // CHECK:   store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
16452 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
16453 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
16454 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16455 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
16456 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
16457 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
16458 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
16459 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
16460 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
16461 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
16462 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
16463 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
16464 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
16465 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
16466 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
16467 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
16468 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
16469 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
16470 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
16471 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
16472 // CHECK:   call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], double* [[TMP12]])
16473 // CHECK:   ret void
test_vst1q_f64_x3(float64_t * a,float64x2x3_t b)16474 void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) {
16475   vst1q_f64_x3(a, b);
16476 }
16477 
16478 // CHECK-LABEL: define void @test_vst1q_p8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
16479 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
16480 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
16481 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
16482 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
16483 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
16484 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
16485 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16486 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
16487 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
16488 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16489 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
16490 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
16491 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16492 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
16493 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
16494 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16495 // CHECK:   call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
16496 // CHECK:   ret void
test_vst1q_p8_x3(poly8_t * a,poly8x16x3_t b)16497 void test_vst1q_p8_x3(poly8_t *a, poly8x16x3_t b) {
16498   vst1q_p8_x3(a, b);
16499 }
16500 
16501 // CHECK-LABEL: define void @test_vst1q_p16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
16502 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
16503 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
16504 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
16505 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
16506 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
16507 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
16508 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16509 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
16510 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
16511 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
16512 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16513 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
16514 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
16515 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
16516 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16517 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
16518 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
16519 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
16520 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
16521 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
16522 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
16523 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
16524 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
16525 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16526 // CHECK:   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]])
16527 // CHECK:   ret void
test_vst1q_p16_x3(poly16_t * a,poly16x8x3_t b)16528 void test_vst1q_p16_x3(poly16_t *a, poly16x8x3_t b) {
16529   vst1q_p16_x3(a, b);
16530 }
16531 
16532 // CHECK-LABEL: define void @test_vst1q_p64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
16533 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
16534 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
16535 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0
16536 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
16537 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8*
16538 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8*
16539 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16540 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
16541 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
16542 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
16543 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
16544 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
16545 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
16546 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
16547 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
16548 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
16549 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
16550 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
16551 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
16552 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
16553 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
16554 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
16555 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
16556 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16557 // CHECK:   call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]])
16558 // CHECK:   ret void
test_vst1q_p64_x3(poly64_t * a,poly64x2x3_t b)16559 void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) {
16560   vst1q_p64_x3(a, b);
16561 }
16562 
16563 // CHECK-LABEL: define void @test_vst1_u8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
16564 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
16565 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
16566 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
16567 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
16568 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
16569 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
16570 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16571 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
16572 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
16573 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16574 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
16575 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
16576 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16577 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
16578 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
16579 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
16580 // CHECK:   call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
16581 // CHECK:   ret void
test_vst1_u8_x3(uint8_t * a,uint8x8x3_t b)16582 void test_vst1_u8_x3(uint8_t *a, uint8x8x3_t b) {
16583   vst1_u8_x3(a, b);
16584 }
16585 
16586 // CHECK-LABEL: define void @test_vst1_u16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
16587 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
16588 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
16589 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
16590 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
16591 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
16592 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
16593 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16594 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
16595 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
16596 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
16597 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16598 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
16599 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
16600 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
16601 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16602 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
16603 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
16604 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
16605 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
16606 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
16607 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16608 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16609 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
16610 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16611 // CHECK:   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]])
16612 // CHECK:   ret void
test_vst1_u16_x3(uint16_t * a,uint16x4x3_t b)16613 void test_vst1_u16_x3(uint16_t *a, uint16x4x3_t b) {
16614   vst1_u16_x3(a, b);
16615 }
16616 
16617 // CHECK-LABEL: define void @test_vst1_u32_x3(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
16618 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
16619 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
16620 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
16621 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
16622 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
16623 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
16624 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16625 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
16626 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
16627 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
16628 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16629 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
16630 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
16631 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
16632 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16633 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
16634 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
16635 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
16636 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
16637 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
16638 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
16639 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
16640 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
16641 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32*
16642 // CHECK:   call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32* [[TMP12]])
16643 // CHECK:   ret void
test_vst1_u32_x3(uint32_t * a,uint32x2x3_t b)16644 void test_vst1_u32_x3(uint32_t *a, uint32x2x3_t b) {
16645   vst1_u32_x3(a, b);
16646 }
16647 
16648 // CHECK-LABEL: define void @test_vst1_u64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
16649 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
16650 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
16651 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
16652 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
16653 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
16654 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
16655 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16656 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
16657 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
16658 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
16659 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16660 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
16661 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
16662 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
16663 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16664 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
16665 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
16666 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
16667 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
16668 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
16669 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
16670 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
16671 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
16672 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16673 // CHECK:   call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]])
16674 // CHECK:   ret void
test_vst1_u64_x3(uint64_t * a,uint64x1x3_t b)16675 void test_vst1_u64_x3(uint64_t *a, uint64x1x3_t b) {
16676   vst1_u64_x3(a, b);
16677 }
16678 
16679 // CHECK-LABEL: define void @test_vst1_s8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
16680 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
16681 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
16682 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
16683 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
16684 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
16685 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
16686 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16687 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
16688 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
16689 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16690 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
16691 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
16692 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16693 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
16694 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
16695 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
16696 // CHECK:   call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
16697 // CHECK:   ret void
test_vst1_s8_x3(int8_t * a,int8x8x3_t b)16698 void test_vst1_s8_x3(int8_t *a, int8x8x3_t b) {
16699   vst1_s8_x3(a, b);
16700 }
16701 
16702 // CHECK-LABEL: define void @test_vst1_s16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
16703 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
16704 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
16705 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
16706 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
16707 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
16708 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
16709 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16710 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
16711 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
16712 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
16713 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16714 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
16715 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
16716 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
16717 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16718 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
16719 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
16720 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
16721 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
16722 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
16723 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16724 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16725 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
16726 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16727 // CHECK:   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]])
16728 // CHECK:   ret void
test_vst1_s16_x3(int16_t * a,int16x4x3_t b)16729 void test_vst1_s16_x3(int16_t *a, int16x4x3_t b) {
16730   vst1_s16_x3(a, b);
16731 }
16732 
16733 // CHECK-LABEL: define void @test_vst1_s32_x3(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
16734 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
16735 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
16736 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
16737 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
16738 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
16739 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
16740 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16741 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
16742 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
16743 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
16744 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16745 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
16746 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
16747 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
16748 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16749 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
16750 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
16751 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
16752 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
16753 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
16754 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
16755 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
16756 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
16757 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32*
16758 // CHECK:   call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32* [[TMP12]])
16759 // CHECK:   ret void
test_vst1_s32_x3(int32_t * a,int32x2x3_t b)16760 void test_vst1_s32_x3(int32_t *a, int32x2x3_t b) {
16761   vst1_s32_x3(a, b);
16762 }
16763 
16764 // CHECK-LABEL: define void @test_vst1_s64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
16765 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
16766 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
16767 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
16768 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
16769 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
16770 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
16771 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16772 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
16773 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
16774 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
16775 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16776 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
16777 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
16778 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
16779 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16780 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
16781 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
16782 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
16783 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
16784 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
16785 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
16786 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
16787 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
16788 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16789 // CHECK:   call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]])
16790 // CHECK:   ret void
test_vst1_s64_x3(int64_t * a,int64x1x3_t b)16791 void test_vst1_s64_x3(int64_t *a, int64x1x3_t b) {
16792   vst1_s64_x3(a, b);
16793 }
16794 
16795 // CHECK-LABEL: define void @test_vst1_f16_x3(half* %a, [3 x <4 x half>] %b.coerce) #0 {
16796 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
16797 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
16798 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
16799 // CHECK:   store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
16800 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
16801 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
16802 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16803 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
16804 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
16805 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
16806 // CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
16807 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
16808 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
16809 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1
16810 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
16811 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
16812 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
16813 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
16814 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
16815 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
16816 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16817 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16818 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
16819 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16820 // CHECK:   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]])
16821 // CHECK:   ret void
test_vst1_f16_x3(float16_t * a,float16x4x3_t b)16822 void test_vst1_f16_x3(float16_t *a, float16x4x3_t b) {
16823   vst1_f16_x3(a, b);
16824 }
16825 
16826 // CHECK-LABEL: define void @test_vst1_f32_x3(float* %a, [3 x <2 x float>] %b.coerce) #0 {
16827 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
16828 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
16829 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
16830 // CHECK:   store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
16831 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
16832 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
16833 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16834 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
16835 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
16836 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
16837 // CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
16838 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
16839 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
16840 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1
16841 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
16842 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
16843 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
16844 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2
16845 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
16846 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
16847 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
16848 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
16849 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
16850 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to float*
16851 // CHECK:   call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], float* [[TMP12]])
16852 // CHECK:   ret void
test_vst1_f32_x3(float32_t * a,float32x2x3_t b)16853 void test_vst1_f32_x3(float32_t *a, float32x2x3_t b) {
16854   vst1_f32_x3(a, b);
16855 }
16856 
16857 // CHECK-LABEL: define void @test_vst1_f64_x3(double* %a, [3 x <1 x double>] %b.coerce) #0 {
16858 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
16859 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
16860 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
16861 // CHECK:   store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
16862 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
16863 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
16864 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16865 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
16866 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
16867 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
16868 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
16869 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
16870 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
16871 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
16872 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
16873 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
16874 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
16875 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
16876 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
16877 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
16878 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
16879 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
16880 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
16881 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
16882 // CHECK:   call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], double* [[TMP12]])
16883 // CHECK:   ret void
test_vst1_f64_x3(float64_t * a,float64x1x3_t b)16884 void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) {
16885   vst1_f64_x3(a, b);
16886 }
16887 
16888 // CHECK-LABEL: define void @test_vst1_p8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
16889 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
16890 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
16891 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
16892 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
16893 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
16894 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
16895 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16896 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
16897 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
16898 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16899 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
16900 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
16901 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16902 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
16903 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
16904 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
16905 // CHECK:   call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
16906 // CHECK:   ret void
test_vst1_p8_x3(poly8_t * a,poly8x8x3_t b)16907 void test_vst1_p8_x3(poly8_t *a, poly8x8x3_t b) {
16908   vst1_p8_x3(a, b);
16909 }
16910 
16911 // CHECK-LABEL: define void @test_vst1_p16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
16912 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
16913 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
16914 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
16915 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
16916 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
16917 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
16918 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16919 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
16920 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
16921 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
16922 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16923 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
16924 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
16925 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
16926 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16927 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
16928 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
16929 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
16930 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
16931 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
16932 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16933 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16934 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
16935 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16936 // CHECK:   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]])
16937 // CHECK:   ret void
test_vst1_p16_x3(poly16_t * a,poly16x4x3_t b)16938 void test_vst1_p16_x3(poly16_t *a, poly16x4x3_t b) {
16939   vst1_p16_x3(a, b);
16940 }
16941 
16942 // CHECK-LABEL: define void @test_vst1_p64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
16943 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
16944 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
16945 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0
16946 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
16947 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8*
16948 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8*
16949 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16950 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
16951 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
16952 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
16953 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16954 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
16955 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
16956 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
16957 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16958 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
16959 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
16960 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
16961 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
16962 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
16963 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
16964 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
16965 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
16966 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16967 // CHECK:   call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]])
16968 // CHECK:   ret void
test_vst1_p64_x3(poly64_t * a,poly64x1x3_t b)16969 void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) {
16970   vst1_p64_x3(a, b);
16971 }
16972 
16973 // CHECK-LABEL: define void @test_vst1q_u8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
16974 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
16975 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
16976 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
16977 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
16978 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
16979 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
16980 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
16981 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
16982 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
16983 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16984 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
16985 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
16986 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16987 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
16988 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
16989 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16990 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
16991 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
16992 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
16993 // CHECK:   call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
16994 // CHECK:   ret void
test_vst1q_u8_x4(uint8_t * a,uint8x16x4_t b)16995 void test_vst1q_u8_x4(uint8_t *a, uint8x16x4_t b) {
16996   vst1q_u8_x4(a, b);
16997 }
16998 
16999 // CHECK-LABEL: define void @test_vst1q_u16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
17000 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
17001 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
17002 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
17003 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
17004 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
17005 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
17006 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17007 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
17008 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
17009 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
17010 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17011 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
17012 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
17013 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
17014 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17015 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
17016 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
17017 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
17018 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17019 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
17020 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
17021 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
17022 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
17023 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
17024 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
17025 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
17026 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
17027 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
17028 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17029 // CHECK:   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]])
17030 // CHECK:   ret void
test_vst1q_u16_x4(uint16_t * a,uint16x8x4_t b)17031 void test_vst1q_u16_x4(uint16_t *a, uint16x8x4_t b) {
17032   vst1q_u16_x4(a, b);
17033 }
17034 
17035 // CHECK-LABEL: define void @test_vst1q_u32_x4(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
17036 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
17037 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
17038 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
17039 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
17040 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
17041 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
17042 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17043 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
17044 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
17045 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
17046 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17047 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
17048 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
17049 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
17050 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17051 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
17052 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
17053 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
17054 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17055 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
17056 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
17057 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
17058 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
17059 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
17060 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
17061 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
17062 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
17063 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
17064 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32*
17065 // CHECK:   call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i32* [[TMP15]])
17066 // CHECK:   ret void
test_vst1q_u32_x4(uint32_t * a,uint32x4x4_t b)17067 void test_vst1q_u32_x4(uint32_t *a, uint32x4x4_t b) {
17068   vst1q_u32_x4(a, b);
17069 }
17070 
17071 // CHECK-LABEL: define void @test_vst1q_u64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
17072 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
17073 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
17074 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0
17075 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
17076 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
17077 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
17078 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17079 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
17080 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
17081 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
17082 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
17083 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
17084 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
17085 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
17086 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
17087 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
17088 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
17089 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
17090 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
17091 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
17092 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
17093 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
17094 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
17095 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
17096 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
17097 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
17098 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
17099 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
17100 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17101 // CHECK:   call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]])
17102 // CHECK:   ret void
test_vst1q_u64_x4(uint64_t * a,uint64x2x4_t b)17103 void test_vst1q_u64_x4(uint64_t *a, uint64x2x4_t b) {
17104   vst1q_u64_x4(a, b);
17105 }
17106 
17107 // CHECK-LABEL: define void @test_vst1q_s8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
17108 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
17109 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
17110 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
17111 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
17112 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
17113 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
17114 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17115 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
17116 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
17117 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
17118 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
17119 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
17120 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
17121 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
17122 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
17123 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
17124 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
17125 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
17126 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
17127 // CHECK:   call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
17128 // CHECK:   ret void
test_vst1q_s8_x4(int8_t * a,int8x16x4_t b)17129 void test_vst1q_s8_x4(int8_t *a, int8x16x4_t b) {
17130   vst1q_s8_x4(a, b);
17131 }
17132 
17133 // CHECK-LABEL: define void @test_vst1q_s16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
17134 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
17135 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
17136 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
17137 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
17138 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
17139 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
17140 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17141 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
17142 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
17143 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
17144 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17145 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
17146 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
17147 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
17148 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17149 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
17150 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
17151 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
17152 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17153 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
17154 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
17155 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
17156 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
17157 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
17158 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
17159 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
17160 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
17161 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
17162 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17163 // CHECK:   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]])
17164 // CHECK:   ret void
test_vst1q_s16_x4(int16_t * a,int16x8x4_t b)17165 void test_vst1q_s16_x4(int16_t *a, int16x8x4_t b) {
17166   vst1q_s16_x4(a, b);
17167 }
17168 
17169 // CHECK-LABEL: define void @test_vst1q_s32_x4(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
17170 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
17171 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
17172 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
17173 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
17174 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
17175 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
17176 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17177 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
17178 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
17179 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
17180 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17181 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
17182 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
17183 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
17184 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17185 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
17186 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
17187 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
17188 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17189 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
17190 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
17191 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
17192 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
17193 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
17194 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
17195 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
17196 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
17197 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
17198 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32*
17199 // CHECK:   call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i32* [[TMP15]])
17200 // CHECK:   ret void
test_vst1q_s32_x4(int32_t * a,int32x4x4_t b)17201 void test_vst1q_s32_x4(int32_t *a, int32x4x4_t b) {
17202   vst1q_s32_x4(a, b);
17203 }
17204 
17205 // CHECK-LABEL: define void @test_vst1q_s64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
17206 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
17207 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
17208 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0
17209 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
17210 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
17211 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
17212 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17213 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
17214 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
17215 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
17216 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
17217 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
17218 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
17219 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
17220 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
17221 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
17222 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
17223 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
17224 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
17225 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
17226 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
17227 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
17228 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
17229 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
17230 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
17231 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
17232 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
17233 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
17234 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17235 // CHECK:   call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]])
17236 // CHECK:   ret void
test_vst1q_s64_x4(int64_t * a,int64x2x4_t b)17237 void test_vst1q_s64_x4(int64_t *a, int64x2x4_t b) {
17238   vst1q_s64_x4(a, b);
17239 }
17240 
17241 // CHECK-LABEL: define void @test_vst1q_f16_x4(half* %a, [4 x <8 x half>] %b.coerce) #0 {
17242 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
17243 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
17244 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
17245 // CHECK:   store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
17246 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
17247 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
17248 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17249 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
17250 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
17251 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
17252 // CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
17253 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
17254 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
17255 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1
17256 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
17257 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
17258 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
17259 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2
17260 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
17261 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
17262 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
17263 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
17264 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
17265 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
17266 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
17267 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
17268 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
17269 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
17270 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17271 // CHECK:   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]])
17272 // CHECK:   ret void
test_vst1q_f16_x4(float16_t * a,float16x8x4_t b)17273 void test_vst1q_f16_x4(float16_t *a, float16x8x4_t b) {
17274   vst1q_f16_x4(a, b);
17275 }
17276 
17277 // CHECK-LABEL: define void @test_vst1q_f32_x4(float* %a, [4 x <4 x float>] %b.coerce) #0 {
17278 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
17279 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
17280 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
17281 // CHECK:   store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
17282 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
17283 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
17284 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17285 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
17286 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
17287 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
17288 // CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
17289 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
17290 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
17291 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1
17292 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
17293 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
17294 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
17295 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2
17296 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
17297 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
17298 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
17299 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3
17300 // CHECK:   [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
17301 // CHECK:   [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
17302 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
17303 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
17304 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
17305 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
17306 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to float*
17307 // CHECK:   call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], float* [[TMP15]])
17308 // CHECK:   ret void
test_vst1q_f32_x4(float32_t * a,float32x4x4_t b)17309 void test_vst1q_f32_x4(float32_t *a, float32x4x4_t b) {
17310   vst1q_f32_x4(a, b);
17311 }
17312 
17313 // CHECK-LABEL: define void @test_vst1q_f64_x4(double* %a, [4 x <2 x double>] %b.coerce) #0 {
17314 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
17315 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
17316 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
17317 // CHECK:   store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
17318 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
17319 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
17320 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17321 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
17322 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
17323 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
17324 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
17325 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
17326 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
17327 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
17328 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
17329 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
17330 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
17331 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
17332 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
17333 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
17334 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
17335 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
17336 // CHECK:   [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
17337 // CHECK:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
17338 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
17339 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
17340 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
17341 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
17342 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
17343 // CHECK:   call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], double* [[TMP15]])
17344 // CHECK:   ret void
test_vst1q_f64_x4(float64_t * a,float64x2x4_t b)17345 void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) {
17346   vst1q_f64_x4(a, b);
17347 }
17348 
17349 // CHECK-LABEL: define void @test_vst1q_p8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
17350 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
17351 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
17352 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
17353 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
17354 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
17355 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
17356 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17357 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
17358 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
17359 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
17360 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
17361 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
17362 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
17363 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
17364 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
17365 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
17366 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
17367 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
17368 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
17369 // CHECK:   call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
17370 // CHECK:   ret void
test_vst1q_p8_x4(poly8_t * a,poly8x16x4_t b)17371 void test_vst1q_p8_x4(poly8_t *a, poly8x16x4_t b) {
17372   vst1q_p8_x4(a, b);
17373 }
17374 
17375 // CHECK-LABEL: define void @test_vst1q_p16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
17376 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
17377 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
17378 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
17379 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
17380 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
17381 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
17382 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17383 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
17384 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
17385 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
17386 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17387 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
17388 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
17389 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
17390 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17391 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
17392 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
17393 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
17394 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17395 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
17396 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
17397 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
17398 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
17399 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
17400 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
17401 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
17402 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
17403 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
17404 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17405 // CHECK:   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]])
17406 // CHECK:   ret void
test_vst1q_p16_x4(poly16_t * a,poly16x8x4_t b)17407 void test_vst1q_p16_x4(poly16_t *a, poly16x8x4_t b) {
17408   vst1q_p16_x4(a, b);
17409 }
17410 
17411 // CHECK-LABEL: define void @test_vst1q_p64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
17412 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
17413 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
17414 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0
17415 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
17416 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8*
17417 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8*
17418 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17419 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
17420 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
17421 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
17422 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
17423 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
17424 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
17425 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
17426 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
17427 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
17428 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
17429 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
17430 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
17431 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
17432 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
17433 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
17434 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
17435 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
17436 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
17437 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
17438 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
17439 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
17440 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17441 // CHECK:   call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]])
17442 // CHECK:   ret void
test_vst1q_p64_x4(poly64_t * a,poly64x2x4_t b)17443 void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) {
17444   vst1q_p64_x4(a, b);
17445 }
17446 
17447 // CHECK-LABEL: define void @test_vst1_u8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
17448 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
17449 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
17450 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
17451 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
17452 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
17453 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
17454 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17455 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
17456 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
17457 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17458 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
17459 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
17460 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17461 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
17462 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
17463 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17464 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
17465 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
17466 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
17467 // CHECK:   call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
17468 // CHECK:   ret void
test_vst1_u8_x4(uint8_t * a,uint8x8x4_t b)17469 void test_vst1_u8_x4(uint8_t *a, uint8x8x4_t b) {
17470   vst1_u8_x4(a, b);
17471 }
17472 
17473 // CHECK-LABEL: define void @test_vst1_u16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
17474 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
17475 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
17476 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
17477 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
17478 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
17479 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
17480 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17481 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
17482 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
17483 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
17484 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17485 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
17486 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
17487 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
17488 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17489 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
17490 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
17491 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
17492 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17493 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
17494 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
17495 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
17496 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
17497 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
17498 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
17499 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
17500 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
17501 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
17502 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17503 // CHECK:   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]])
17504 // CHECK:   ret void
test_vst1_u16_x4(uint16_t * a,uint16x4x4_t b)17505 void test_vst1_u16_x4(uint16_t *a, uint16x4x4_t b) {
17506   vst1_u16_x4(a, b);
17507 }
17508 
17509 // CHECK-LABEL: define void @test_vst1_u32_x4(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
17510 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
17511 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
17512 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
17513 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
17514 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
17515 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
17516 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17517 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
17518 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
17519 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
17520 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17521 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
17522 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
17523 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
17524 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17525 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
17526 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
17527 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
17528 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17529 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
17530 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
17531 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
17532 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
17533 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
17534 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
17535 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
17536 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
17537 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
17538 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32*
17539 // CHECK:   call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i32* [[TMP15]])
17540 // CHECK:   ret void
test_vst1_u32_x4(uint32_t * a,uint32x2x4_t b)17541 void test_vst1_u32_x4(uint32_t *a, uint32x2x4_t b) {
17542   vst1_u32_x4(a, b);
17543 }
17544 
17545 // CHECK-LABEL: define void @test_vst1_u64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
17546 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
17547 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
17548 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
17549 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
17550 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
17551 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
17552 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17553 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
17554 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
17555 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
17556 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17557 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
17558 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
17559 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
17560 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17561 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
17562 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
17563 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
17564 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17565 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
17566 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
17567 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
17568 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
17569 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
17570 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
17571 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
17572 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
17573 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
17574 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17575 // CHECK:   call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]])
17576 // CHECK:   ret void
test_vst1_u64_x4(uint64_t * a,uint64x1x4_t b)17577 void test_vst1_u64_x4(uint64_t *a, uint64x1x4_t b) {
17578   vst1_u64_x4(a, b);
17579 }
17580 
17581 // CHECK-LABEL: define void @test_vst1_s8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
17582 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
17583 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
17584 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
17585 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
17586 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
17587 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
17588 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17589 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
17590 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
17591 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17592 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
17593 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
17594 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17595 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
17596 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
17597 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17598 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
17599 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
17600 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
17601 // CHECK:   call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
17602 // CHECK:   ret void
test_vst1_s8_x4(int8_t * a,int8x8x4_t b)17603 void test_vst1_s8_x4(int8_t *a, int8x8x4_t b) {
17604   vst1_s8_x4(a, b);
17605 }
17606 
17607 // CHECK-LABEL: define void @test_vst1_s16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
17608 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
17609 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
17610 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
17611 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
17612 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
17613 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
17614 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17615 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
17616 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
17617 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
17618 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17619 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
17620 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
17621 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
17622 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17623 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
17624 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
17625 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
17626 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17627 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
17628 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
17629 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
17630 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
17631 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
17632 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
17633 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
17634 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
17635 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
17636 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17637 // CHECK:   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]])
17638 // CHECK:   ret void
test_vst1_s16_x4(int16_t * a,int16x4x4_t b)17639 void test_vst1_s16_x4(int16_t *a, int16x4x4_t b) {
17640   vst1_s16_x4(a, b);
17641 }
17642 
17643 // CHECK-LABEL: define void @test_vst1_s32_x4(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
17644 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
17645 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
17646 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
17647 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
17648 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
17649 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
17650 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17651 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
17652 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
17653 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
17654 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17655 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
17656 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
17657 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
17658 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17659 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
17660 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
17661 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
17662 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17663 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
17664 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
17665 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
17666 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
17667 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
17668 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
17669 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
17670 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
17671 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
17672 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32*
17673 // CHECK:   call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i32* [[TMP15]])
17674 // CHECK:   ret void
test_vst1_s32_x4(int32_t * a,int32x2x4_t b)17675 void test_vst1_s32_x4(int32_t *a, int32x2x4_t b) {
17676   vst1_s32_x4(a, b);
17677 }
17678 
17679 // CHECK-LABEL: define void @test_vst1_s64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
17680 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
17681 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
17682 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
17683 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
17684 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
17685 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
17686 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17687 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
17688 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
17689 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
17690 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17691 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
17692 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
17693 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
17694 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17695 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
17696 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
17697 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
17698 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17699 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
17700 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
17701 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
17702 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
17703 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
17704 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
17705 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
17706 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
17707 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
17708 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17709 // CHECK:   call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]])
17710 // CHECK:   ret void
test_vst1_s64_x4(int64_t * a,int64x1x4_t b)17711 void test_vst1_s64_x4(int64_t *a, int64x1x4_t b) {
17712   vst1_s64_x4(a, b);
17713 }
17714 
17715 // CHECK-LABEL: define void @test_vst1_f16_x4(half* %a, [4 x <4 x half>] %b.coerce) #0 {
17716 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
17717 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
17718 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
17719 // CHECK:   store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
17720 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
17721 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
17722 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17723 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
17724 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
17725 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
17726 // CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
17727 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
17728 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
17729 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1
17730 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
17731 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
17732 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
17733 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2
17734 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
17735 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
17736 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
17737 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
17738 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
17739 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
17740 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
17741 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
17742 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
17743 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
17744 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17745 // CHECK:   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]])
17746 // CHECK:   ret void
test_vst1_f16_x4(float16_t * a,float16x4x4_t b)17747 void test_vst1_f16_x4(float16_t *a, float16x4x4_t b) {
17748   vst1_f16_x4(a, b);
17749 }
17750 
17751 // CHECK-LABEL: define void @test_vst1_f32_x4(float* %a, [4 x <2 x float>] %b.coerce) #0 {
17752 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
17753 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
17754 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
17755 // CHECK:   store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
17756 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
17757 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
17758 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17759 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
17760 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
17761 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
17762 // CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
17763 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
17764 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
17765 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1
17766 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
17767 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
17768 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
17769 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2
17770 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
17771 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
17772 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
17773 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3
17774 // CHECK:   [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
17775 // CHECK:   [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
17776 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
17777 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
17778 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
17779 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
17780 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to float*
17781 // CHECK:   call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], float* [[TMP15]])
17782 // CHECK:   ret void
test_vst1_f32_x4(float32_t * a,float32x2x4_t b)17783 void test_vst1_f32_x4(float32_t *a, float32x2x4_t b) {
17784   vst1_f32_x4(a, b);
17785 }
17786 
17787 // CHECK-LABEL: define void @test_vst1_f64_x4(double* %a, [4 x <1 x double>] %b.coerce) #0 {
17788 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
17789 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
17790 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
17791 // CHECK:   store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
17792 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
17793 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
17794 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17795 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
17796 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
17797 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
17798 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
17799 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
17800 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
17801 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
17802 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
17803 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
17804 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
17805 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
17806 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
17807 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
17808 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
17809 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
17810 // CHECK:   [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
17811 // CHECK:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
17812 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
17813 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
17814 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
17815 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
17816 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
17817 // CHECK:   call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], double* [[TMP15]])
17818 // CHECK:   ret void
test_vst1_f64_x4(float64_t * a,float64x1x4_t b)17819 void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) {
17820   vst1_f64_x4(a, b);
17821 }
17822 
17823 // CHECK-LABEL: define void @test_vst1_p8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
17824 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
17825 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
17826 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
17827 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
17828 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
17829 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
17830 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17831 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
17832 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
17833 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17834 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
17835 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
17836 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17837 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
17838 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
17839 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17840 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
17841 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
17842 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
17843 // CHECK:   call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
17844 // CHECK:   ret void
test_vst1_p8_x4(poly8_t * a,poly8x8x4_t b)17845 void test_vst1_p8_x4(poly8_t *a, poly8x8x4_t b) {
17846   vst1_p8_x4(a, b);
17847 }
17848 
17849 // CHECK-LABEL: define void @test_vst1_p16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
17850 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
17851 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
17852 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
17853 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
17854 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
17855 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
17856 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17857 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
17858 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
17859 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
17860 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17861 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
17862 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
17863 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
17864 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17865 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
17866 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
17867 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
17868 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17869 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
17870 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
17871 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
17872 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
17873 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
17874 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
17875 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
17876 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
17877 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
17878 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17879 // CHECK:   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]])
17880 // CHECK:   ret void
test_vst1_p16_x4(poly16_t * a,poly16x4x4_t b)17881 void test_vst1_p16_x4(poly16_t *a, poly16x4x4_t b) {
17882   vst1_p16_x4(a, b);
17883 }
17884 
17885 // CHECK-LABEL: define void @test_vst1_p64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
17886 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
17887 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
17888 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0
17889 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
17890 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
17891 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8*
17892 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17893 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
17894 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
17895 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
17896 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17897 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
17898 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
17899 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
17900 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17901 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
17902 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
17903 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
17904 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17905 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
17906 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
17907 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
17908 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
17909 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
17910 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
17911 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
17912 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
17913 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
17914 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17915 // CHECK:   call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]])
17916 // CHECK:   ret void
test_vst1_p64_x4(poly64_t * a,poly64x1x4_t b)17917 void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) {
17918   vst1_p64_x4(a, b);
17919 }
17920 
17921 // CHECK-LABEL: define i64 @test_vceqd_s64(i64 %a, i64 %b) #0 {
17922 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, %b
17923 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17924 // CHECK:   ret i64 [[VCEQD_I]]
test_vceqd_s64(int64_t a,int64_t b)17925 int64_t test_vceqd_s64(int64_t a, int64_t b) {
17926   return (int64_t)vceqd_s64(a, b);
17927 }
17928 
17929 // CHECK-LABEL: define i64 @test_vceqd_u64(i64 %a, i64 %b) #0 {
17930 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, %b
17931 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17932 // CHECK:   ret i64 [[VCEQD_I]]
test_vceqd_u64(uint64_t a,uint64_t b)17933 uint64_t test_vceqd_u64(uint64_t a, uint64_t b) {
17934   return (int64_t)vceqd_u64(a, b);
17935 }
17936 
17937 // CHECK-LABEL: define i64 @test_vceqzd_s64(i64 %a) #0 {
17938 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, 0
17939 // CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
17940 // CHECK:   ret i64 [[VCEQZ_I]]
test_vceqzd_s64(int64_t a)17941 int64_t test_vceqzd_s64(int64_t a) {
17942   return (int64_t)vceqzd_s64(a);
17943 }
17944 
17945 // CHECK-LABEL: define i64 @test_vceqzd_u64(i64 %a) #0 {
17946 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, 0
17947 // CHECK:   [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64
17948 // CHECK:   ret i64 [[VCEQZD_I]]
test_vceqzd_u64(int64_t a)17949 int64_t test_vceqzd_u64(int64_t a) {
17950   return (int64_t)vceqzd_u64(a);
17951 }
17952 
17953 // CHECK-LABEL: define i64 @test_vcged_s64(i64 %a, i64 %b) #0 {
17954 // CHECK:   [[TMP0:%.*]] = icmp sge i64 %a, %b
17955 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17956 // CHECK:   ret i64 [[VCEQD_I]]
test_vcged_s64(int64_t a,int64_t b)17957 int64_t test_vcged_s64(int64_t a, int64_t b) {
17958   return (int64_t)vcged_s64(a, b);
17959 }
17960 
17961 // CHECK-LABEL: define i64 @test_vcged_u64(i64 %a, i64 %b) #0 {
17962 // CHECK:   [[TMP0:%.*]] = icmp uge i64 %a, %b
17963 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17964 // CHECK:   ret i64 [[VCEQD_I]]
test_vcged_u64(uint64_t a,uint64_t b)17965 uint64_t test_vcged_u64(uint64_t a, uint64_t b) {
17966     return (uint64_t)vcged_u64(a, b);
17967 }
17968 
17969 // CHECK-LABEL: define i64 @test_vcgezd_s64(i64 %a) #0 {
17970 // CHECK:   [[TMP0:%.*]] = icmp sge i64 %a, 0
17971 // CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
17972 // CHECK:   ret i64 [[VCGEZ_I]]
test_vcgezd_s64(int64_t a)17973 int64_t test_vcgezd_s64(int64_t a) {
17974   return (int64_t)vcgezd_s64(a);
17975 }
17976 
17977 // CHECK-LABEL: define i64 @test_vcgtd_s64(i64 %a, i64 %b) #0 {
17978 // CHECK:   [[TMP0:%.*]] = icmp sgt i64 %a, %b
17979 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17980 // CHECK:   ret i64 [[VCEQD_I]]
test_vcgtd_s64(int64_t a,int64_t b)17981 int64_t test_vcgtd_s64(int64_t a, int64_t b) {
17982   return (int64_t)vcgtd_s64(a, b);
17983 }
17984 
17985 // CHECK-LABEL: define i64 @test_vcgtd_u64(i64 %a, i64 %b) #0 {
17986 // CHECK:   [[TMP0:%.*]] = icmp ugt i64 %a, %b
17987 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17988 // CHECK:   ret i64 [[VCEQD_I]]
test_vcgtd_u64(uint64_t a,uint64_t b)17989 uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) {
17990   return (uint64_t)vcgtd_u64(a, b);
17991 }
17992 
17993 // CHECK-LABEL: define i64 @test_vcgtzd_s64(i64 %a) #0 {
17994 // CHECK:   [[TMP0:%.*]] = icmp sgt i64 %a, 0
17995 // CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
17996 // CHECK:   ret i64 [[VCGTZ_I]]
test_vcgtzd_s64(int64_t a)17997 int64_t test_vcgtzd_s64(int64_t a) {
17998   return (int64_t)vcgtzd_s64(a);
17999 }
18000 
18001 // CHECK-LABEL: define i64 @test_vcled_s64(i64 %a, i64 %b) #0 {
18002 // CHECK:   [[TMP0:%.*]] = icmp sle i64 %a, %b
18003 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
18004 // CHECK:   ret i64 [[VCEQD_I]]
test_vcled_s64(int64_t a,int64_t b)18005 int64_t test_vcled_s64(int64_t a, int64_t b) {
18006   return (int64_t)vcled_s64(a, b);
18007 }
18008 
18009 // CHECK-LABEL: define i64 @test_vcled_u64(i64 %a, i64 %b) #0 {
18010 // CHECK:   [[TMP0:%.*]] = icmp ule i64 %a, %b
18011 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
18012 // CHECK:   ret i64 [[VCEQD_I]]
test_vcled_u64(uint64_t a,uint64_t b)18013 uint64_t test_vcled_u64(uint64_t a, uint64_t b) {
18014   return (uint64_t)vcled_u64(a, b);
18015 }
18016 
18017 // CHECK-LABEL: define i64 @test_vclezd_s64(i64 %a) #0 {
18018 // CHECK:   [[TMP0:%.*]] = icmp sle i64 %a, 0
18019 // CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
18020 // CHECK:   ret i64 [[VCLEZ_I]]
test_vclezd_s64(int64_t a)18021 int64_t test_vclezd_s64(int64_t a) {
18022   return (int64_t)vclezd_s64(a);
18023 }
18024 
18025 // CHECK-LABEL: define i64 @test_vcltd_s64(i64 %a, i64 %b) #0 {
18026 // CHECK:   [[TMP0:%.*]] = icmp slt i64 %a, %b
18027 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
18028 // CHECK:   ret i64 [[VCEQD_I]]
test_vcltd_s64(int64_t a,int64_t b)18029 int64_t test_vcltd_s64(int64_t a, int64_t b) {
18030   return (int64_t)vcltd_s64(a, b);
18031 }
18032 
18033 // CHECK-LABEL: define i64 @test_vcltd_u64(i64 %a, i64 %b) #0 {
18034 // CHECK:   [[TMP0:%.*]] = icmp ult i64 %a, %b
18035 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
18036 // CHECK:   ret i64 [[VCEQD_I]]
test_vcltd_u64(uint64_t a,uint64_t b)18037 uint64_t test_vcltd_u64(uint64_t a, uint64_t b) {
18038   return (uint64_t)vcltd_u64(a, b);
18039 }
18040 
18041 // CHECK-LABEL: define i64 @test_vcltzd_s64(i64 %a) #0 {
18042 // CHECK:   [[TMP0:%.*]] = icmp slt i64 %a, 0
18043 // CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
18044 // CHECK:   ret i64 [[VCLTZ_I]]
test_vcltzd_s64(int64_t a)18045 int64_t test_vcltzd_s64(int64_t a) {
18046   return (int64_t)vcltzd_s64(a);
18047 }
18048 
18049 // CHECK-LABEL: define i64 @test_vtstd_s64(i64 %a, i64 %b) #0 {
18050 // CHECK:   [[TMP0:%.*]] = and i64 %a, %b
18051 // CHECK:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
18052 // CHECK:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
18053 // CHECK:   ret i64 [[VTSTD_I]]
test_vtstd_s64(int64_t a,int64_t b)18054 int64_t test_vtstd_s64(int64_t a, int64_t b) {
18055   return (int64_t)vtstd_s64(a, b);
18056 }
18057 
18058 // CHECK-LABEL: define i64 @test_vtstd_u64(i64 %a, i64 %b) #0 {
18059 // CHECK:   [[TMP0:%.*]] = and i64 %a, %b
18060 // CHECK:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
18061 // CHECK:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
18062 // CHECK:   ret i64 [[VTSTD_I]]
test_vtstd_u64(uint64_t a,uint64_t b)18063 uint64_t test_vtstd_u64(uint64_t a, uint64_t b) {
18064   return (uint64_t)vtstd_u64(a, b);
18065 }
18066 
18067 // CHECK-LABEL: define i64 @test_vabsd_s64(i64 %a) #0 {
18068 // CHECK:   [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a) #4
18069 // CHECK:   ret i64 [[VABSD_S64_I]]
test_vabsd_s64(int64_t a)18070 int64_t test_vabsd_s64(int64_t a) {
18071   return (int64_t)vabsd_s64(a);
18072 }
18073 
18074 // CHECK-LABEL: define i8 @test_vqabsb_s8(i8 %a) #0 {
18075 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18076 // CHECK:   [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]]) #4
18077 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0
18078 // CHECK:   ret i8 [[TMP1]]
test_vqabsb_s8(int8_t a)18079 int8_t test_vqabsb_s8(int8_t a) {
18080   return (int8_t)vqabsb_s8(a);
18081 }
18082 
18083 // CHECK-LABEL: define i16 @test_vqabsh_s16(i16 %a) #0 {
18084 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18085 // CHECK:   [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]]) #4
18086 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0
18087 // CHECK:   ret i16 [[TMP1]]
test_vqabsh_s16(int16_t a)18088 int16_t test_vqabsh_s16(int16_t a) {
18089   return (int16_t)vqabsh_s16(a);
18090 }
18091 
18092 // CHECK-LABEL: define i32 @test_vqabss_s32(i32 %a) #0 {
18093 // CHECK:   [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) #4
18094 // CHECK:   ret i32 [[VQABSS_S32_I]]
test_vqabss_s32(int32_t a)18095 int32_t test_vqabss_s32(int32_t a) {
18096   return (int32_t)vqabss_s32(a);
18097 }
18098 
18099 // CHECK-LABEL: define i64 @test_vqabsd_s64(i64 %a) #0 {
18100 // CHECK:   [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a) #4
18101 // CHECK:   ret i64 [[VQABSD_S64_I]]
test_vqabsd_s64(int64_t a)18102 int64_t test_vqabsd_s64(int64_t a) {
18103   return (int64_t)vqabsd_s64(a);
18104 }
18105 
18106 // CHECK-LABEL: define i64 @test_vnegd_s64(i64 %a) #0 {
18107 // CHECK:   [[VNEGD_I:%.*]] = sub i64 0, %a
18108 // CHECK:   ret i64 [[VNEGD_I]]
test_vnegd_s64(int64_t a)18109 int64_t test_vnegd_s64(int64_t a) {
18110   return (int64_t)vnegd_s64(a);
18111 }
18112 
18113 // CHECK-LABEL: define i8 @test_vqnegb_s8(i8 %a) #0 {
18114 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18115 // CHECK:   [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]]) #4
18116 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0
18117 // CHECK:   ret i8 [[TMP1]]
test_vqnegb_s8(int8_t a)18118 int8_t test_vqnegb_s8(int8_t a) {
18119   return (int8_t)vqnegb_s8(a);
18120 }
18121 
18122 // CHECK-LABEL: define i16 @test_vqnegh_s16(i16 %a) #0 {
18123 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18124 // CHECK:   [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]]) #4
18125 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0
18126 // CHECK:   ret i16 [[TMP1]]
test_vqnegh_s16(int16_t a)18127 int16_t test_vqnegh_s16(int16_t a) {
18128   return (int16_t)vqnegh_s16(a);
18129 }
18130 
18131 // CHECK-LABEL: define i32 @test_vqnegs_s32(i32 %a) #0 {
18132 // CHECK:   [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a) #4
18133 // CHECK:   ret i32 [[VQNEGS_S32_I]]
test_vqnegs_s32(int32_t a)18134 int32_t test_vqnegs_s32(int32_t a) {
18135   return (int32_t)vqnegs_s32(a);
18136 }
18137 
18138 // CHECK-LABEL: define i64 @test_vqnegd_s64(i64 %a) #0 {
18139 // CHECK:   [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a) #4
18140 // CHECK:   ret i64 [[VQNEGD_S64_I]]
test_vqnegd_s64(int64_t a)18141 int64_t test_vqnegd_s64(int64_t a) {
18142   return (int64_t)vqnegd_s64(a);
18143 }
18144 
18145 // CHECK-LABEL: define i8 @test_vuqaddb_s8(i8 %a, i8 %b) #0 {
18146 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18147 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
18148 // CHECK:   [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
18149 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0
18150 // CHECK:   ret i8 [[TMP2]]
test_vuqaddb_s8(int8_t a,int8_t b)18151 int8_t test_vuqaddb_s8(int8_t a, int8_t b) {
18152   return (int8_t)vuqaddb_s8(a, b);
18153 }
18154 
18155 // CHECK-LABEL: define i16 @test_vuqaddh_s16(i16 %a, i16 %b) #0 {
18156 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18157 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
18158 // CHECK:   [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
18159 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0
18160 // CHECK:   ret i16 [[TMP2]]
test_vuqaddh_s16(int16_t a,int16_t b)18161 int16_t test_vuqaddh_s16(int16_t a, int16_t b) {
18162   return (int16_t)vuqaddh_s16(a, b);
18163 }
18164 
18165 // CHECK-LABEL: define i32 @test_vuqadds_s32(i32 %a, i32 %b) #0 {
18166 // CHECK:   [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b) #4
18167 // CHECK:   ret i32 [[VUQADDS_S32_I]]
test_vuqadds_s32(int32_t a,int32_t b)18168 int32_t test_vuqadds_s32(int32_t a, int32_t b) {
18169   return (int32_t)vuqadds_s32(a, b);
18170 }
18171 
18172 // CHECK-LABEL: define i64 @test_vuqaddd_s64(i64 %a, i64 %b) #0 {
18173 // CHECK:   [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b) #4
18174 // CHECK:   ret i64 [[VUQADDD_S64_I]]
test_vuqaddd_s64(int64_t a,int64_t b)18175 int64_t test_vuqaddd_s64(int64_t a, int64_t b) {
18176   return (int64_t)vuqaddd_s64(a, b);
18177 }
18178 
18179 // CHECK-LABEL: define i8 @test_vsqaddb_u8(i8 %a, i8 %b) #0 {
18180 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18181 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
18182 // CHECK:   [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
18183 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0
18184 // CHECK:   ret i8 [[TMP2]]
test_vsqaddb_u8(uint8_t a,uint8_t b)18185 uint8_t test_vsqaddb_u8(uint8_t a, uint8_t b) {
18186   return (uint8_t)vsqaddb_u8(a, b);
18187 }
18188 
18189 // CHECK-LABEL: define i16 @test_vsqaddh_u16(i16 %a, i16 %b) #0 {
18190 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18191 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
18192 // CHECK:   [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
18193 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0
18194 // CHECK:   ret i16 [[TMP2]]
test_vsqaddh_u16(uint16_t a,uint16_t b)18195 uint16_t test_vsqaddh_u16(uint16_t a, uint16_t b) {
18196   return (uint16_t)vsqaddh_u16(a, b);
18197 }
18198 
18199 // CHECK-LABEL: define i32 @test_vsqadds_u32(i32 %a, i32 %b) #0 {
18200 // CHECK:   [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b) #4
18201 // CHECK:   ret i32 [[VSQADDS_U32_I]]
test_vsqadds_u32(uint32_t a,uint32_t b)18202 uint32_t test_vsqadds_u32(uint32_t a, uint32_t b) {
18203   return (uint32_t)vsqadds_u32(a, b);
18204 }
18205 
18206 // CHECK-LABEL: define i64 @test_vsqaddd_u64(i64 %a, i64 %b) #0 {
18207 // CHECK:   [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b) #4
18208 // CHECK:   ret i64 [[VSQADDD_U64_I]]
test_vsqaddd_u64(uint64_t a,uint64_t b)18209 uint64_t test_vsqaddd_u64(uint64_t a, uint64_t b) {
18210   return (uint64_t)vsqaddd_u64(a, b);
18211 }
18212 
18213 // CHECK-LABEL: define i32 @test_vqdmlalh_s16(i32 %a, i16 %b, i16 %c) #0 {
18214 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
18215 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
18216 // CHECK:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
18217 // CHECK:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
18218 // CHECK:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]]) #4
18219 // CHECK:   ret i32 [[VQDMLXL1_I]]
test_vqdmlalh_s16(int32_t a,int16_t b,int16_t c)18220 int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) {
18221 
18222   return (int32_t)vqdmlalh_s16(a, b, c);
18223 }
18224 
18225 // CHECK-LABEL: define i64 @test_vqdmlals_s32(i64 %a, i32 %b, i32 %c) #0 {
18226 // CHECK:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) #4
18227 // CHECK:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]]) #4
18228 // CHECK:   ret i64 [[VQDMLXL1_I]]
test_vqdmlals_s32(int64_t a,int32_t b,int32_t c)18229 int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) {
18230   return (int64_t)vqdmlals_s32(a, b, c);
18231 }
18232 
18233 // CHECK-LABEL: define i32 @test_vqdmlslh_s16(i32 %a, i16 %b, i16 %c) #0 {
18234 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
18235 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
18236 // CHECK:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
18237 // CHECK:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
18238 // CHECK:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]]) #4
18239 // CHECK:   ret i32 [[VQDMLXL1_I]]
test_vqdmlslh_s16(int32_t a,int16_t b,int16_t c)18240 int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) {
18241 
18242   return (int32_t)vqdmlslh_s16(a, b, c);
18243 }
18244 
18245 // CHECK-LABEL: define i64 @test_vqdmlsls_s32(i64 %a, i32 %b, i32 %c) #0 {
18246 // CHECK:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) #4
18247 // CHECK:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]]) #4
18248 // CHECK:   ret i64 [[VQDMLXL1_I]]
test_vqdmlsls_s32(int64_t a,int32_t b,int32_t c)18249 int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) {
18250   return (int64_t)vqdmlsls_s32(a, b, c);
18251 }
18252 
18253 // CHECK-LABEL: define i32 @test_vqdmullh_s16(i16 %a, i16 %b) #0 {
18254 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18255 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
18256 // CHECK:   [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
18257 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
18258 // CHECK:   ret i32 [[TMP2]]
test_vqdmullh_s16(int16_t a,int16_t b)18259 int32_t test_vqdmullh_s16(int16_t a, int16_t b) {
18260   return (int32_t)vqdmullh_s16(a, b);
18261 }
18262 
18263 // CHECK-LABEL: define i64 @test_vqdmulls_s32(i32 %a, i32 %b) #0 {
18264 // CHECK:   [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b) #4
18265 // CHECK:   ret i64 [[VQDMULLS_S32_I]]
test_vqdmulls_s32(int32_t a,int32_t b)18266 int64_t test_vqdmulls_s32(int32_t a, int32_t b) {
18267   return (int64_t)vqdmulls_s32(a, b);
18268 }
18269 
18270 // CHECK-LABEL: define i8 @test_vqmovunh_s16(i16 %a) #0 {
18271 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
18272 // CHECK:   [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]]) #4
18273 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0
18274 // CHECK:   ret i8 [[TMP1]]
test_vqmovunh_s16(int16_t a)18275 int8_t test_vqmovunh_s16(int16_t a) {
18276   return (int8_t)vqmovunh_s16(a);
18277 }
18278 
18279 // CHECK-LABEL: define i16 @test_vqmovuns_s32(i32 %a) #0 {
18280 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
18281 // CHECK:   [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]]) #4
18282 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0
18283 // CHECK:   ret i16 [[TMP1]]
test_vqmovuns_s32(int32_t a)18284 int16_t test_vqmovuns_s32(int32_t a) {
18285   return (int16_t)vqmovuns_s32(a);
18286 }
18287 
18288 // CHECK-LABEL: define i32 @test_vqmovund_s64(i64 %a) #0 {
18289 // CHECK:   [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a) #4
18290 // CHECK:   ret i32 [[VQMOVUND_S64_I]]
test_vqmovund_s64(int64_t a)18291 int32_t test_vqmovund_s64(int64_t a) {
18292   return (int32_t)vqmovund_s64(a);
18293 }
18294 
18295 // CHECK-LABEL: define i8 @test_vqmovnh_s16(i16 %a) #0 {
18296 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
18297 // CHECK:   [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]]) #4
18298 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0
18299 // CHECK:   ret i8 [[TMP1]]
test_vqmovnh_s16(int16_t a)18300 int8_t test_vqmovnh_s16(int16_t a) {
18301   return (int8_t)vqmovnh_s16(a);
18302 }
18303 
18304 // CHECK-LABEL: define i16 @test_vqmovns_s32(i32 %a) #0 {
18305 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
18306 // CHECK:   [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]]) #4
18307 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0
18308 // CHECK:   ret i16 [[TMP1]]
test_vqmovns_s32(int32_t a)18309 int16_t test_vqmovns_s32(int32_t a) {
18310   return (int16_t)vqmovns_s32(a);
18311 }
18312 
18313 // CHECK-LABEL: define i32 @test_vqmovnd_s64(i64 %a) #0 {
18314 // CHECK:   [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a) #4
18315 // CHECK:   ret i32 [[VQMOVND_S64_I]]
test_vqmovnd_s64(int64_t a)18316 int32_t test_vqmovnd_s64(int64_t a) {
18317   return (int32_t)vqmovnd_s64(a);
18318 }
18319 
18320 // CHECK-LABEL: define i8 @test_vqmovnh_u16(i16 %a) #0 {
18321 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
18322 // CHECK:   [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]]) #4
18323 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0
18324 // CHECK:   ret i8 [[TMP1]]
test_vqmovnh_u16(int16_t a)18325 int8_t test_vqmovnh_u16(int16_t a) {
18326   return (int8_t)vqmovnh_u16(a);
18327 }
18328 
18329 // CHECK-LABEL: define i16 @test_vqmovns_u32(i32 %a) #0 {
18330 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
18331 // CHECK:   [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]]) #4
18332 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0
18333 // CHECK:   ret i16 [[TMP1]]
test_vqmovns_u32(int32_t a)18334 int16_t test_vqmovns_u32(int32_t a) {
18335   return (int16_t)vqmovns_u32(a);
18336 }
18337 
18338 // CHECK-LABEL: define i32 @test_vqmovnd_u64(i64 %a) #0 {
18339 // CHECK:   [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a) #4
18340 // CHECK:   ret i32 [[VQMOVND_U64_I]]
test_vqmovnd_u64(int64_t a)18341 int32_t test_vqmovnd_u64(int64_t a) {
18342   return (int32_t)vqmovnd_u64(a);
18343 }
18344 
18345 // CHECK-LABEL: define i32 @test_vceqs_f32(float %a, float %b) #0 {
18346 // CHECK:   [[TMP0:%.*]] = fcmp oeq float %a, %b
18347 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
18348 // CHECK:   ret i32 [[VCMPD_I]]
test_vceqs_f32(float32_t a,float32_t b)18349 uint32_t test_vceqs_f32(float32_t a, float32_t b) {
18350   return (uint32_t)vceqs_f32(a, b);
18351 }
18352 
18353 // CHECK-LABEL: define i64 @test_vceqd_f64(double %a, double %b) #0 {
18354 // CHECK:   [[TMP0:%.*]] = fcmp oeq double %a, %b
18355 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
18356 // CHECK:   ret i64 [[VCMPD_I]]
test_vceqd_f64(float64_t a,float64_t b)18357 uint64_t test_vceqd_f64(float64_t a, float64_t b) {
18358   return (uint64_t)vceqd_f64(a, b);
18359 }
18360 
18361 // CHECK-LABEL: define i32 @test_vceqzs_f32(float %a) #0 {
18362 // CHECK:   [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00
18363 // CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
18364 // CHECK:   ret i32 [[VCEQZ_I]]
test_vceqzs_f32(float32_t a)18365 uint32_t test_vceqzs_f32(float32_t a) {
18366   return (uint32_t)vceqzs_f32(a);
18367 }
18368 
18369 // CHECK-LABEL: define i64 @test_vceqzd_f64(double %a) #0 {
18370 // CHECK:   [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00
18371 // CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
18372 // CHECK:   ret i64 [[VCEQZ_I]]
test_vceqzd_f64(float64_t a)18373 uint64_t test_vceqzd_f64(float64_t a) {
18374   return (uint64_t)vceqzd_f64(a);
18375 }
18376 
18377 // CHECK-LABEL: define i32 @test_vcges_f32(float %a, float %b) #0 {
18378 // CHECK:   [[TMP0:%.*]] = fcmp oge float %a, %b
18379 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
18380 // CHECK:   ret i32 [[VCMPD_I]]
test_vcges_f32(float32_t a,float32_t b)18381 uint32_t test_vcges_f32(float32_t a, float32_t b) {
18382   return (uint32_t)vcges_f32(a, b);
18383 }
18384 
18385 // CHECK-LABEL: define i64 @test_vcged_f64(double %a, double %b) #0 {
18386 // CHECK:   [[TMP0:%.*]] = fcmp oge double %a, %b
18387 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
18388 // CHECK:   ret i64 [[VCMPD_I]]
test_vcged_f64(float64_t a,float64_t b)18389 uint64_t test_vcged_f64(float64_t a, float64_t b) {
18390   return (uint64_t)vcged_f64(a, b);
18391 }
18392 
18393 // CHECK-LABEL: define i32 @test_vcgezs_f32(float %a) #0 {
18394 // CHECK:   [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00
18395 // CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
18396 // CHECK:   ret i32 [[VCGEZ_I]]
test_vcgezs_f32(float32_t a)18397 uint32_t test_vcgezs_f32(float32_t a) {
18398   return (uint32_t)vcgezs_f32(a);
18399 }
18400 
18401 // CHECK-LABEL: define i64 @test_vcgezd_f64(double %a) #0 {
18402 // CHECK:   [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00
18403 // CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
18404 // CHECK:   ret i64 [[VCGEZ_I]]
test_vcgezd_f64(float64_t a)18405 uint64_t test_vcgezd_f64(float64_t a) {
18406   return (uint64_t)vcgezd_f64(a);
18407 }
18408 
18409 // CHECK-LABEL: define i32 @test_vcgts_f32(float %a, float %b) #0 {
18410 // CHECK:   [[TMP0:%.*]] = fcmp ogt float %a, %b
18411 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
18412 // CHECK:   ret i32 [[VCMPD_I]]
test_vcgts_f32(float32_t a,float32_t b)18413 uint32_t test_vcgts_f32(float32_t a, float32_t b) {
18414   return (uint32_t)vcgts_f32(a, b);
18415 }
18416 
18417 // CHECK-LABEL: define i64 @test_vcgtd_f64(double %a, double %b) #0 {
18418 // CHECK:   [[TMP0:%.*]] = fcmp ogt double %a, %b
18419 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
18420 // CHECK:   ret i64 [[VCMPD_I]]
test_vcgtd_f64(float64_t a,float64_t b)18421 uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
18422   return (uint64_t)vcgtd_f64(a, b);
18423 }
18424 
18425 // CHECK-LABEL: define i32 @test_vcgtzs_f32(float %a) #0 {
18426 // CHECK:   [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00
18427 // CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
18428 // CHECK:   ret i32 [[VCGTZ_I]]
test_vcgtzs_f32(float32_t a)18429 uint32_t test_vcgtzs_f32(float32_t a) {
18430   return (uint32_t)vcgtzs_f32(a);
18431 }
18432 
18433 // CHECK-LABEL: define i64 @test_vcgtzd_f64(double %a) #0 {
18434 // CHECK:   [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00
18435 // CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
18436 // CHECK:   ret i64 [[VCGTZ_I]]
test_vcgtzd_f64(float64_t a)18437 uint64_t test_vcgtzd_f64(float64_t a) {
18438   return (uint64_t)vcgtzd_f64(a);
18439 }
18440 
18441 // CHECK-LABEL: define i32 @test_vcles_f32(float %a, float %b) #0 {
18442 // CHECK:   [[TMP0:%.*]] = fcmp ole float %a, %b
18443 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
18444 // CHECK:   ret i32 [[VCMPD_I]]
test_vcles_f32(float32_t a,float32_t b)18445 uint32_t test_vcles_f32(float32_t a, float32_t b) {
18446   return (uint32_t)vcles_f32(a, b);
18447 }
18448 
18449 // CHECK-LABEL: define i64 @test_vcled_f64(double %a, double %b) #0 {
18450 // CHECK:   [[TMP0:%.*]] = fcmp ole double %a, %b
18451 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
18452 // CHECK:   ret i64 [[VCMPD_I]]
test_vcled_f64(float64_t a,float64_t b)18453 uint64_t test_vcled_f64(float64_t a, float64_t b) {
18454   return (uint64_t)vcled_f64(a, b);
18455 }
18456 
18457 // CHECK-LABEL: define i32 @test_vclezs_f32(float %a) #0 {
18458 // CHECK:   [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00
18459 // CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
18460 // CHECK:   ret i32 [[VCLEZ_I]]
test_vclezs_f32(float32_t a)18461 uint32_t test_vclezs_f32(float32_t a) {
18462   return (uint32_t)vclezs_f32(a);
18463 }
18464 
18465 // CHECK-LABEL: define i64 @test_vclezd_f64(double %a) #0 {
18466 // CHECK:   [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00
18467 // CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
18468 // CHECK:   ret i64 [[VCLEZ_I]]
test_vclezd_f64(float64_t a)18469 uint64_t test_vclezd_f64(float64_t a) {
18470   return (uint64_t)vclezd_f64(a);
18471 }
18472 
18473 // CHECK-LABEL: define i32 @test_vclts_f32(float %a, float %b) #0 {
18474 // CHECK:   [[TMP0:%.*]] = fcmp olt float %a, %b
18475 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
18476 // CHECK:   ret i32 [[VCMPD_I]]
test_vclts_f32(float32_t a,float32_t b)18477 uint32_t test_vclts_f32(float32_t a, float32_t b) {
18478   return (uint32_t)vclts_f32(a, b);
18479 }
18480 
18481 // CHECK-LABEL: define i64 @test_vcltd_f64(double %a, double %b) #0 {
18482 // CHECK:   [[TMP0:%.*]] = fcmp olt double %a, %b
18483 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
18484 // CHECK:   ret i64 [[VCMPD_I]]
test_vcltd_f64(float64_t a,float64_t b)18485 uint64_t test_vcltd_f64(float64_t a, float64_t b) {
18486   return (uint64_t)vcltd_f64(a, b);
18487 }
18488 
18489 // CHECK-LABEL: define i32 @test_vcltzs_f32(float %a) #0 {
18490 // CHECK:   [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00
18491 // CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
18492 // CHECK:   ret i32 [[VCLTZ_I]]
test_vcltzs_f32(float32_t a)18493 uint32_t test_vcltzs_f32(float32_t a) {
18494   return (uint32_t)vcltzs_f32(a);
18495 }
18496 
18497 // CHECK-LABEL: define i64 @test_vcltzd_f64(double %a) #0 {
18498 // CHECK:   [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00
18499 // CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
18500 // CHECK:   ret i64 [[VCLTZ_I]]
test_vcltzd_f64(float64_t a)18501 uint64_t test_vcltzd_f64(float64_t a) {
18502   return (uint64_t)vcltzd_f64(a);
18503 }
18504 
18505 // CHECK-LABEL: define i32 @test_vcages_f32(float %a, float %b) #0 {
18506 // CHECK:   [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b) #4
18507 // CHECK:   ret i32 [[VCAGES_F32_I]]
test_vcages_f32(float32_t a,float32_t b)18508 uint32_t test_vcages_f32(float32_t a, float32_t b) {
18509   return (uint32_t)vcages_f32(a, b);
18510 }
18511 
18512 // CHECK-LABEL: define i64 @test_vcaged_f64(double %a, double %b) #0 {
18513 // CHECK:   [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b) #4
18514 // CHECK:   ret i64 [[VCAGED_F64_I]]
test_vcaged_f64(float64_t a,float64_t b)18515 uint64_t test_vcaged_f64(float64_t a, float64_t b) {
18516   return (uint64_t)vcaged_f64(a, b);
18517 }
18518 
18519 // CHECK-LABEL: define i32 @test_vcagts_f32(float %a, float %b) #0 {
18520 // CHECK:   [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b) #4
18521 // CHECK:   ret i32 [[VCAGTS_F32_I]]
test_vcagts_f32(float32_t a,float32_t b)18522 uint32_t test_vcagts_f32(float32_t a, float32_t b) {
18523   return (uint32_t)vcagts_f32(a, b);
18524 }
18525 
18526 // CHECK-LABEL: define i64 @test_vcagtd_f64(double %a, double %b) #0 {
18527 // CHECK:   [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b) #4
18528 // CHECK:   ret i64 [[VCAGTD_F64_I]]
test_vcagtd_f64(float64_t a,float64_t b)18529 uint64_t test_vcagtd_f64(float64_t a, float64_t b) {
18530   return (uint64_t)vcagtd_f64(a, b);
18531 }
18532 
18533 // CHECK-LABEL: define i32 @test_vcales_f32(float %a, float %b) #0 {
18534 // CHECK:   [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a) #4
18535 // CHECK:   ret i32 [[VCALES_F32_I]]
test_vcales_f32(float32_t a,float32_t b)18536 uint32_t test_vcales_f32(float32_t a, float32_t b) {
18537   return (uint32_t)vcales_f32(a, b);
18538 }
18539 
18540 // CHECK-LABEL: define i64 @test_vcaled_f64(double %a, double %b) #0 {
18541 // CHECK:   [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a) #4
18542 // CHECK:   ret i64 [[VCALED_F64_I]]
test_vcaled_f64(float64_t a,float64_t b)18543 uint64_t test_vcaled_f64(float64_t a, float64_t b) {
18544   return (uint64_t)vcaled_f64(a, b);
18545 }
18546 
18547 // CHECK-LABEL: define i32 @test_vcalts_f32(float %a, float %b) #0 {
18548 // CHECK:   [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a) #4
18549 // CHECK:   ret i32 [[VCALTS_F32_I]]
test_vcalts_f32(float32_t a,float32_t b)18550 uint32_t test_vcalts_f32(float32_t a, float32_t b) {
18551   return (uint32_t)vcalts_f32(a, b);
18552 }
18553 
18554 // CHECK-LABEL: define i64 @test_vcaltd_f64(double %a, double %b) #0 {
18555 // CHECK:   [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a) #4
18556 // CHECK:   ret i64 [[VCALTD_F64_I]]
test_vcaltd_f64(float64_t a,float64_t b)18557 uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
18558   return (uint64_t)vcaltd_f64(a, b);
18559 }
18560 
18561 // CHECK-LABEL: define i64 @test_vshrd_n_s64(i64 %a) #0 {
18562 // CHECK:   [[SHRD_N:%.*]] = ashr i64 %a, 1
18563 // CHECK:   ret i64 [[SHRD_N]]
test_vshrd_n_s64(int64_t a)18564 int64_t test_vshrd_n_s64(int64_t a) {
18565   return (int64_t)vshrd_n_s64(a, 1);
18566 }
18567 
18568 // CHECK-LABEL: define <1 x i64> @test_vshr_n_s64(<1 x i64> %a) #0 {
18569 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18570 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18571 // CHECK:   [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
18572 // CHECK:   ret <1 x i64> [[VSHR_N]]
test_vshr_n_s64(int64x1_t a)18573 int64x1_t test_vshr_n_s64(int64x1_t a) {
18574   return vshr_n_s64(a, 1);
18575 }
18576 
18577 // CHECK-LABEL: define i64 @test_vshrd_n_u64(i64 %a) #0 {
18578 // CHECK:   ret i64 0
test_vshrd_n_u64(uint64_t a)18579 uint64_t test_vshrd_n_u64(uint64_t a) {
18580 
18581   return (uint64_t)vshrd_n_u64(a, 64);
18582 }
18583 
18584 // CHECK-LABEL: define i64 @test_vshrd_n_u64_2() #0 {
18585 // CHECK:   ret i64 0
test_vshrd_n_u64_2()18586 uint64_t test_vshrd_n_u64_2() {
18587 
18588   uint64_t a = UINT64_C(0xf000000000000000);
18589   return vshrd_n_u64(a, 64);
18590 }
18591 
18592 // CHECK-LABEL: define <1 x i64> @test_vshr_n_u64(<1 x i64> %a) #0 {
18593 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18594 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18595 // CHECK:   [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
18596 // CHECK:   ret <1 x i64> [[VSHR_N]]
test_vshr_n_u64(uint64x1_t a)18597 uint64x1_t test_vshr_n_u64(uint64x1_t a) {
18598   return vshr_n_u64(a, 1);
18599 }
18600 
18601 // CHECK-LABEL: define i64 @test_vrshrd_n_s64(i64 %a) #0 {
18602 // CHECK:   [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63)
18603 // CHECK:   ret i64 [[VRSHR_N]]
test_vrshrd_n_s64(int64_t a)18604 int64_t test_vrshrd_n_s64(int64_t a) {
18605   return (int64_t)vrshrd_n_s64(a, 63);
18606 }
18607 
18608 // CHECK-LABEL: define <1 x i64> @test_vrshr_n_s64(<1 x i64> %a) #0 {
18609 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18610 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18611 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
18612 // CHECK:   ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_s64(int64x1_t a)18613 int64x1_t test_vrshr_n_s64(int64x1_t a) {
18614   return vrshr_n_s64(a, 1);
18615 }
18616 
18617 // CHECK-LABEL: define i64 @test_vrshrd_n_u64(i64 %a) #0 {
18618 // CHECK:   [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63)
18619 // CHECK:   ret i64 [[VRSHR_N]]
test_vrshrd_n_u64(uint64_t a)18620 uint64_t test_vrshrd_n_u64(uint64_t a) {
18621   return (uint64_t)vrshrd_n_u64(a, 63);
18622 }
18623 
18624 // CHECK-LABEL: define <1 x i64> @test_vrshr_n_u64(<1 x i64> %a) #0 {
18625 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18626 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18627 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
18628 // CHECK:   ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_u64(uint64x1_t a)18629 uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
18630   return vrshr_n_u64(a, 1);
18631 }
18632 
18633 // CHECK-LABEL: define i64 @test_vsrad_n_s64(i64 %a, i64 %b) #0 {
18634 // CHECK:   [[SHRD_N:%.*]] = ashr i64 %b, 63
18635 // CHECK:   [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
18636 // CHECK:   ret i64 [[TMP0]]
test_vsrad_n_s64(int64_t a,int64_t b)18637 int64_t test_vsrad_n_s64(int64_t a, int64_t b) {
18638   return (int64_t)vsrad_n_s64(a, b, 63);
18639 }
18640 
18641 // CHECK-LABEL: define <1 x i64> @test_vsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
18642 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18643 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18644 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18645 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18646 // CHECK:   [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
18647 // CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
18648 // CHECK:   ret <1 x i64> [[TMP4]]
test_vsra_n_s64(int64x1_t a,int64x1_t b)18649 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
18650   return vsra_n_s64(a, b, 1);
18651 }
18652 
18653 // CHECK-LABEL: define i64 @test_vsrad_n_u64(i64 %a, i64 %b) #0 {
18654 // CHECK:   [[SHRD_N:%.*]] = lshr i64 %b, 63
18655 // CHECK:   [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
18656 // CHECK:   ret i64 [[TMP0]]
test_vsrad_n_u64(uint64_t a,uint64_t b)18657 uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) {
18658   return (uint64_t)vsrad_n_u64(a, b, 63);
18659 }
18660 
18661 // CHECK-LABEL: define i64 @test_vsrad_n_u64_2(i64 %a, i64 %b) #0 {
18662 // CHECK:   ret i64 %a
test_vsrad_n_u64_2(uint64_t a,uint64_t b)18663 uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) {
18664 
18665   return (uint64_t)vsrad_n_u64(a, b, 64);
18666 }
18667 
18668 // CHECK-LABEL: define <1 x i64> @test_vsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
18669 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18670 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18671 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18672 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18673 // CHECK:   [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
18674 // CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
18675 // CHECK:   ret <1 x i64> [[TMP4]]
test_vsra_n_u64(uint64x1_t a,uint64x1_t b)18676 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
18677   return vsra_n_u64(a, b, 1);
18678 }
18679 
18680 // CHECK-LABEL: define i64 @test_vrsrad_n_s64(i64 %a, i64 %b) #0 {
18681 // CHECK:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63)
18682 // CHECK:   [[TMP1:%.*]] = add i64 %a, [[TMP0]]
18683 // CHECK:   ret i64 [[TMP1]]
test_vrsrad_n_s64(int64_t a,int64_t b)18684 int64_t test_vrsrad_n_s64(int64_t a, int64_t b) {
18685   return (int64_t)vrsrad_n_s64(a, b, 63);
18686 }
18687 
18688 // CHECK-LABEL: define <1 x i64> @test_vrsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
18689 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18690 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18691 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18692 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
18693 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18694 // CHECK:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
18695 // CHECK:   ret <1 x i64> [[TMP3]]
test_vrsra_n_s64(int64x1_t a,int64x1_t b)18696 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
18697   return vrsra_n_s64(a, b, 1);
18698 }
18699 
18700 // CHECK-LABEL: define i64 @test_vrsrad_n_u64(i64 %a, i64 %b) #0 {
18701 // CHECK:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63)
18702 // CHECK:   [[TMP1:%.*]] = add i64 %a, [[TMP0]]
18703 // CHECK:   ret i64 [[TMP1]]
test_vrsrad_n_u64(uint64_t a,uint64_t b)18704 uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) {
18705   return (uint64_t)vrsrad_n_u64(a, b, 63);
18706 }
18707 
18708 // CHECK-LABEL: define <1 x i64> @test_vrsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
18709 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18710 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18711 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18712 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
18713 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18714 // CHECK:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
18715 // CHECK:   ret <1 x i64> [[TMP3]]
test_vrsra_n_u64(uint64x1_t a,uint64x1_t b)18716 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
18717   return vrsra_n_u64(a, b, 1);
18718 }
18719 
18720 // CHECK-LABEL: define i64 @test_vshld_n_s64(i64 %a) #0 {
18721 // CHECK:   [[SHLD_N:%.*]] = shl i64 %a, 1
18722 // CHECK:   ret i64 [[SHLD_N]]
test_vshld_n_s64(int64_t a)18723 int64_t test_vshld_n_s64(int64_t a) {
18724   return (int64_t)vshld_n_s64(a, 1);
18725 }
18726 // CHECK-LABEL: define <1 x i64> @test_vshl_n_s64(<1 x i64> %a) #0 {
18727 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18728 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18729 // CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
18730 // CHECK:   ret <1 x i64> [[VSHL_N]]
test_vshl_n_s64(int64x1_t a)18731 int64x1_t test_vshl_n_s64(int64x1_t a) {
18732   return vshl_n_s64(a, 1);
18733 }
18734 
18735 // CHECK-LABEL: define i64 @test_vshld_n_u64(i64 %a) #0 {
18736 // CHECK:   [[SHLD_N:%.*]] = shl i64 %a, 63
18737 // CHECK:   ret i64 [[SHLD_N]]
test_vshld_n_u64(uint64_t a)18738 uint64_t test_vshld_n_u64(uint64_t a) {
18739   return (uint64_t)vshld_n_u64(a, 63);
18740 }
18741 
18742 // CHECK-LABEL: define <1 x i64> @test_vshl_n_u64(<1 x i64> %a) #0 {
18743 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18744 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18745 // CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
18746 // CHECK:   ret <1 x i64> [[VSHL_N]]
test_vshl_n_u64(uint64x1_t a)18747 uint64x1_t test_vshl_n_u64(uint64x1_t a) {
18748   return vshl_n_u64(a, 1);
18749 }
18750 
18751 // CHECK-LABEL: define i8 @test_vqshlb_n_s8(i8 %a) #0 {
18752 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18753 // CHECK:   [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
18754 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0
18755 // CHECK:   ret i8 [[TMP1]]
test_vqshlb_n_s8(int8_t a)18756 int8_t test_vqshlb_n_s8(int8_t a) {
18757   return (int8_t)vqshlb_n_s8(a, 7);
18758 }
18759 
18760 // CHECK-LABEL: define i16 @test_vqshlh_n_s16(i16 %a) #0 {
18761 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18762 // CHECK:   [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
18763 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0
18764 // CHECK:   ret i16 [[TMP1]]
test_vqshlh_n_s16(int16_t a)18765 int16_t test_vqshlh_n_s16(int16_t a) {
18766   return (int16_t)vqshlh_n_s16(a, 15);
18767 }
18768 
18769 // CHECK-LABEL: define i32 @test_vqshls_n_s32(i32 %a) #0 {
18770 // CHECK:   [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31)
18771 // CHECK:   ret i32 [[VQSHLS_N_S32]]
test_vqshls_n_s32(int32_t a)18772 int32_t test_vqshls_n_s32(int32_t a) {
18773   return (int32_t)vqshls_n_s32(a, 31);
18774 }
18775 
18776 // CHECK-LABEL: define i64 @test_vqshld_n_s64(i64 %a) #0 {
18777 // CHECK:   [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63)
18778 // CHECK:   ret i64 [[VQSHL_N]]
test_vqshld_n_s64(int64_t a)18779 int64_t test_vqshld_n_s64(int64_t a) {
18780   return (int64_t)vqshld_n_s64(a, 63);
18781 }
18782 
18783 // CHECK-LABEL: define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) #0 {
18784 // CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
18785 // CHECK:   ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_s8(int8x8_t a)18786 int8x8_t test_vqshl_n_s8(int8x8_t a) {
18787   return vqshl_n_s8(a, 0);
18788 }
18789 
18790 // CHECK-LABEL: define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) #0 {
18791 // CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
18792 // CHECK:   ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_s8(int8x16_t a)18793 int8x16_t test_vqshlq_n_s8(int8x16_t a) {
18794   return vqshlq_n_s8(a, 0);
18795 }
18796 
18797 // CHECK-LABEL: define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) #0 {
18798 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
18799 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
18800 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
18801 // CHECK:   ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_s16(int16x4_t a)18802 int16x4_t test_vqshl_n_s16(int16x4_t a) {
18803   return vqshl_n_s16(a, 0);
18804 }
18805 
18806 // CHECK-LABEL: define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) #0 {
18807 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
18808 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
18809 // CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
18810 // CHECK:   ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_s16(int16x8_t a)18811 int16x8_t test_vqshlq_n_s16(int16x8_t a) {
18812   return vqshlq_n_s16(a, 0);
18813 }
18814 
18815 // CHECK-LABEL: define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) #0 {
18816 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18817 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
18818 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
18819 // CHECK:   ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_s32(int32x2_t a)18820 int32x2_t test_vqshl_n_s32(int32x2_t a) {
18821   return vqshl_n_s32(a, 0);
18822 }
18823 
18824 // CHECK-LABEL: define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) #0 {
18825 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
18826 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
18827 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
18828 // CHECK:   ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_s32(int32x4_t a)18829 int32x4_t test_vqshlq_n_s32(int32x4_t a) {
18830   return vqshlq_n_s32(a, 0);
18831 }
18832 
18833 // CHECK-LABEL: define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) #0 {
18834 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
18835 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
18836 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
18837 // CHECK:   ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_s64(int64x2_t a)18838 int64x2_t test_vqshlq_n_s64(int64x2_t a) {
18839   return vqshlq_n_s64(a, 0);
18840 }
18841 
18842 // CHECK-LABEL: define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) #0 {
18843 // CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
18844 // CHECK:   ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_u8(uint8x8_t a)18845 uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
18846   return vqshl_n_u8(a, 0);
18847 }
18848 
18849 // CHECK-LABEL: define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) #0 {
18850 // CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
18851 // CHECK:   ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_u8(uint8x16_t a)18852 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
18853   return vqshlq_n_u8(a, 0);
18854 }
18855 
18856 // CHECK-LABEL: define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) #0 {
18857 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
18858 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
18859 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
18860 // CHECK:   ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_u16(uint16x4_t a)18861 uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
18862   return vqshl_n_u16(a, 0);
18863 }
18864 
18865 // CHECK-LABEL: define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) #0 {
18866 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
18867 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
18868 // CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
18869 // CHECK:   ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_u16(uint16x8_t a)18870 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
18871   return vqshlq_n_u16(a, 0);
18872 }
18873 
18874 // CHECK-LABEL: define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) #0 {
18875 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18876 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
18877 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
18878 // CHECK:   ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_u32(uint32x2_t a)18879 uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
18880   return vqshl_n_u32(a, 0);
18881 }
18882 
18883 // CHECK-LABEL: define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) #0 {
18884 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
18885 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
18886 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
18887 // CHECK:   ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_u32(uint32x4_t a)18888 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
18889   return vqshlq_n_u32(a, 0);
18890 }
18891 
18892 // CHECK-LABEL: define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) #0 {
18893 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
18894 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
18895 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
18896 // CHECK:   ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_u64(uint64x2_t a)18897 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
18898   return vqshlq_n_u64(a, 0);
18899 }
18900 
18901 // CHECK-LABEL: define <1 x i64> @test_vqshl_n_s64(<1 x i64> %a) #0 {
18902 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18903 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18904 // CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
18905 // CHECK:   ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_s64(int64x1_t a)18906 int64x1_t test_vqshl_n_s64(int64x1_t a) {
18907   return vqshl_n_s64(a, 1);
18908 }
18909 
18910 // CHECK-LABEL: define i8 @test_vqshlb_n_u8(i8 %a) #0 {
18911 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18912 // CHECK:   [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
18913 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0
18914 // CHECK:   ret i8 [[TMP1]]
test_vqshlb_n_u8(uint8_t a)18915 uint8_t test_vqshlb_n_u8(uint8_t a) {
18916   return (uint8_t)vqshlb_n_u8(a, 7);
18917 }
18918 
18919 // CHECK-LABEL: define i16 @test_vqshlh_n_u16(i16 %a) #0 {
18920 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18921 // CHECK:   [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
18922 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0
18923 // CHECK:   ret i16 [[TMP1]]
test_vqshlh_n_u16(uint16_t a)18924 uint16_t test_vqshlh_n_u16(uint16_t a) {
18925   return (uint16_t)vqshlh_n_u16(a, 15);
18926 }
18927 
18928 // CHECK-LABEL: define i32 @test_vqshls_n_u32(i32 %a) #0 {
18929 // CHECK:   [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31)
18930 // CHECK:   ret i32 [[VQSHLS_N_U32]]
test_vqshls_n_u32(uint32_t a)18931 uint32_t test_vqshls_n_u32(uint32_t a) {
18932   return (uint32_t)vqshls_n_u32(a, 31);
18933 }
18934 
18935 // CHECK-LABEL: define i64 @test_vqshld_n_u64(i64 %a) #0 {
18936 // CHECK:   [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63)
18937 // CHECK:   ret i64 [[VQSHL_N]]
test_vqshld_n_u64(uint64_t a)18938 uint64_t test_vqshld_n_u64(uint64_t a) {
18939   return (uint64_t)vqshld_n_u64(a, 63);
18940 }
18941 
18942 // CHECK-LABEL: define <1 x i64> @test_vqshl_n_u64(<1 x i64> %a) #0 {
18943 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18944 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18945 // CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
18946 // CHECK:   ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_u64(uint64x1_t a)18947 uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
18948   return vqshl_n_u64(a, 1);
18949 }
18950 
18951 // CHECK-LABEL: define i8 @test_vqshlub_n_s8(i8 %a) #0 {
18952 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18953 // CHECK:   [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
18954 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0
18955 // CHECK:   ret i8 [[TMP1]]
test_vqshlub_n_s8(int8_t a)18956 int8_t test_vqshlub_n_s8(int8_t a) {
18957   return (int8_t)vqshlub_n_s8(a, 7);
18958 }
18959 
18960 // CHECK-LABEL: define i16 @test_vqshluh_n_s16(i16 %a) #0 {
18961 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18962 // CHECK:   [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
18963 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0
18964 // CHECK:   ret i16 [[TMP1]]
test_vqshluh_n_s16(int16_t a)18965 int16_t test_vqshluh_n_s16(int16_t a) {
18966   return (int16_t)vqshluh_n_s16(a, 15);
18967 }
18968 
18969 // CHECK-LABEL: define i32 @test_vqshlus_n_s32(i32 %a) #0 {
18970 // CHECK:   [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31)
18971 // CHECK:   ret i32 [[VQSHLUS_N_S32]]
test_vqshlus_n_s32(int32_t a)18972 int32_t test_vqshlus_n_s32(int32_t a) {
18973   return (int32_t)vqshlus_n_s32(a, 31);
18974 }
18975 
18976 // CHECK-LABEL: define i64 @test_vqshlud_n_s64(i64 %a) #0 {
18977 // CHECK:   [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63)
18978 // CHECK:   ret i64 [[VQSHLU_N]]
test_vqshlud_n_s64(int64_t a)18979 int64_t test_vqshlud_n_s64(int64_t a) {
18980   return (int64_t)vqshlud_n_s64(a, 63);
18981 }
18982 
18983 // CHECK-LABEL: define <1 x i64> @test_vqshlu_n_s64(<1 x i64> %a) #0 {
18984 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18985 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18986 // CHECK:   [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
18987 // CHECK:   ret <1 x i64> [[VQSHLU_N1]]
test_vqshlu_n_s64(int64x1_t a)18988 uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
18989   return vqshlu_n_s64(a, 1);
18990 }
18991 
18992 // CHECK-LABEL: define i64 @test_vsrid_n_s64(i64 %a, i64 %b) #0 {
18993 // CHECK:   [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
18994 // CHECK:   [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
18995 // CHECK:   [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63)
18996 // CHECK:   [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64
18997 // CHECK:   ret i64 [[VSRID_N_S643]]
test_vsrid_n_s64(int64_t a,int64_t b)18998 int64_t test_vsrid_n_s64(int64_t a, int64_t b) {
18999   return (int64_t)vsrid_n_s64(a, b, 63);
19000 }
19001 
19002 // CHECK-LABEL: define <1 x i64> @test_vsri_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
19003 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19004 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
19005 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
19006 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
19007 // CHECK:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
19008 // CHECK:   ret <1 x i64> [[VSRI_N2]]
test_vsri_n_s64(int64x1_t a,int64x1_t b)19009 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
19010   return vsri_n_s64(a, b, 1);
19011 }
19012 
19013 // CHECK-LABEL: define i64 @test_vsrid_n_u64(i64 %a, i64 %b) #0 {
19014 // CHECK:   [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
19015 // CHECK:   [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
19016 // CHECK:   [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63)
19017 // CHECK:   [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64
19018 // CHECK:   ret i64 [[VSRID_N_U643]]
test_vsrid_n_u64(uint64_t a,uint64_t b)19019 uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) {
19020   return (uint64_t)vsrid_n_u64(a, b, 63);
19021 }
19022 
19023 // CHECK-LABEL: define <1 x i64> @test_vsri_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
19024 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19025 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
19026 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
19027 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
19028 // CHECK:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
19029 // CHECK:   ret <1 x i64> [[VSRI_N2]]
test_vsri_n_u64(uint64x1_t a,uint64x1_t b)19030 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
19031   return vsri_n_u64(a, b, 1);
19032 }
19033 
19034 // CHECK-LABEL: define i64 @test_vslid_n_s64(i64 %a, i64 %b) #0 {
19035 // CHECK:   [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
19036 // CHECK:   [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
19037 // CHECK:   [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63)
19038 // CHECK:   [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64
19039 // CHECK:   ret i64 [[VSLID_N_S643]]
test_vslid_n_s64(int64_t a,int64_t b)19040 int64_t test_vslid_n_s64(int64_t a, int64_t b) {
19041   return (int64_t)vslid_n_s64(a, b, 63);
19042 }
19043 
19044 // CHECK-LABEL: define <1 x i64> @test_vsli_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
19045 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19046 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
19047 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
19048 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
19049 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
19050 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_s64(int64x1_t a,int64x1_t b)19051 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
19052   return vsli_n_s64(a, b, 1);
19053 }
19054 
19055 // CHECK-LABEL: define i64 @test_vslid_n_u64(i64 %a, i64 %b) #0 {
19056 // CHECK:   [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
19057 // CHECK:   [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
19058 // CHECK:   [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63)
19059 // CHECK:   [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64
19060 // CHECK:   ret i64 [[VSLID_N_U643]]
test_vslid_n_u64(uint64_t a,uint64_t b)19061 uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) {
19062   return (uint64_t)vslid_n_u64(a, b, 63);
19063 }
19064 
19065 // CHECK-LABEL: define <1 x i64> @test_vsli_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
19066 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19067 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
19068 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
19069 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
19070 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
19071 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_u64(uint64x1_t a,uint64x1_t b)19072 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
19073   return vsli_n_u64(a, b, 1);
19074 }
19075 
19076 // CHECK-LABEL: define i8 @test_vqshrnh_n_s16(i16 %a) #0 {
19077 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19078 // CHECK:   [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
19079 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0
19080 // CHECK:   ret i8 [[TMP1]]
test_vqshrnh_n_s16(int16_t a)19081 int8_t test_vqshrnh_n_s16(int16_t a) {
19082   return (int8_t)vqshrnh_n_s16(a, 8);
19083 }
19084 
19085 // CHECK-LABEL: define i16 @test_vqshrns_n_s32(i32 %a) #0 {
19086 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19087 // CHECK:   [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
19088 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0
19089 // CHECK:   ret i16 [[TMP1]]
test_vqshrns_n_s32(int32_t a)19090 int16_t test_vqshrns_n_s32(int32_t a) {
19091   return (int16_t)vqshrns_n_s32(a, 16);
19092 }
19093 
19094 // CHECK-LABEL: define i32 @test_vqshrnd_n_s64(i64 %a) #0 {
19095 // CHECK:   [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32)
19096 // CHECK:   ret i32 [[VQSHRND_N_S64]]
test_vqshrnd_n_s64(int64_t a)19097 int32_t test_vqshrnd_n_s64(int64_t a) {
19098   return (int32_t)vqshrnd_n_s64(a, 32);
19099 }
19100 
19101 // CHECK-LABEL: define i8 @test_vqshrnh_n_u16(i16 %a) #0 {
19102 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19103 // CHECK:   [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
19104 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0
19105 // CHECK:   ret i8 [[TMP1]]
test_vqshrnh_n_u16(uint16_t a)19106 uint8_t test_vqshrnh_n_u16(uint16_t a) {
19107   return (uint8_t)vqshrnh_n_u16(a, 8);
19108 }
19109 
19110 // CHECK-LABEL: define i16 @test_vqshrns_n_u32(i32 %a) #0 {
19111 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19112 // CHECK:   [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
19113 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0
19114 // CHECK:   ret i16 [[TMP1]]
test_vqshrns_n_u32(uint32_t a)19115 uint16_t test_vqshrns_n_u32(uint32_t a) {
19116   return (uint16_t)vqshrns_n_u32(a, 16);
19117 }
19118 
19119 // CHECK-LABEL: define i32 @test_vqshrnd_n_u64(i64 %a) #0 {
19120 // CHECK:   [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32)
19121 // CHECK:   ret i32 [[VQSHRND_N_U64]]
test_vqshrnd_n_u64(uint64_t a)19122 uint32_t test_vqshrnd_n_u64(uint64_t a) {
19123   return (uint32_t)vqshrnd_n_u64(a, 32);
19124 }
19125 
19126 // CHECK-LABEL: define i8 @test_vqrshrnh_n_s16(i16 %a) #0 {
19127 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19128 // CHECK:   [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
19129 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0
19130 // CHECK:   ret i8 [[TMP1]]
test_vqrshrnh_n_s16(int16_t a)19131 int8_t test_vqrshrnh_n_s16(int16_t a) {
19132   return (int8_t)vqrshrnh_n_s16(a, 8);
19133 }
19134 
19135 // CHECK-LABEL: define i16 @test_vqrshrns_n_s32(i32 %a) #0 {
19136 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19137 // CHECK:   [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
19138 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0
19139 // CHECK:   ret i16 [[TMP1]]
test_vqrshrns_n_s32(int32_t a)19140 int16_t test_vqrshrns_n_s32(int32_t a) {
19141   return (int16_t)vqrshrns_n_s32(a, 16);
19142 }
19143 
19144 // CHECK-LABEL: define i32 @test_vqrshrnd_n_s64(i64 %a) #0 {
19145 // CHECK:   [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32)
19146 // CHECK:   ret i32 [[VQRSHRND_N_S64]]
test_vqrshrnd_n_s64(int64_t a)19147 int32_t test_vqrshrnd_n_s64(int64_t a) {
19148   return (int32_t)vqrshrnd_n_s64(a, 32);
19149 }
19150 
19151 // CHECK-LABEL: define i8 @test_vqrshrnh_n_u16(i16 %a) #0 {
19152 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19153 // CHECK:   [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
19154 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0
19155 // CHECK:   ret i8 [[TMP1]]
test_vqrshrnh_n_u16(uint16_t a)19156 uint8_t test_vqrshrnh_n_u16(uint16_t a) {
19157   return (uint8_t)vqrshrnh_n_u16(a, 8);
19158 }
19159 
19160 // CHECK-LABEL: define i16 @test_vqrshrns_n_u32(i32 %a) #0 {
19161 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19162 // CHECK:   [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
19163 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0
19164 // CHECK:   ret i16 [[TMP1]]
test_vqrshrns_n_u32(uint32_t a)19165 uint16_t test_vqrshrns_n_u32(uint32_t a) {
19166   return (uint16_t)vqrshrns_n_u32(a, 16);
19167 }
19168 
19169 // CHECK-LABEL: define i32 @test_vqrshrnd_n_u64(i64 %a) #0 {
19170 // CHECK:   [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32)
19171 // CHECK:   ret i32 [[VQRSHRND_N_U64]]
test_vqrshrnd_n_u64(uint64_t a)19172 uint32_t test_vqrshrnd_n_u64(uint64_t a) {
19173   return (uint32_t)vqrshrnd_n_u64(a, 32);
19174 }
19175 
19176 // CHECK-LABEL: define i8 @test_vqshrunh_n_s16(i16 %a) #0 {
19177 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19178 // CHECK:   [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
19179 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0
19180 // CHECK:   ret i8 [[TMP1]]
test_vqshrunh_n_s16(int16_t a)19181 int8_t test_vqshrunh_n_s16(int16_t a) {
19182   return (int8_t)vqshrunh_n_s16(a, 8);
19183 }
19184 
19185 // CHECK-LABEL: define i16 @test_vqshruns_n_s32(i32 %a) #0 {
19186 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19187 // CHECK:   [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
19188 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0
19189 // CHECK:   ret i16 [[TMP1]]
test_vqshruns_n_s32(int32_t a)19190 int16_t test_vqshruns_n_s32(int32_t a) {
19191   return (int16_t)vqshruns_n_s32(a, 16);
19192 }
19193 
19194 // CHECK-LABEL: define i32 @test_vqshrund_n_s64(i64 %a) #0 {
19195 // CHECK:   [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32)
19196 // CHECK:   ret i32 [[VQSHRUND_N_S64]]
test_vqshrund_n_s64(int64_t a)19197 int32_t test_vqshrund_n_s64(int64_t a) {
19198   return (int32_t)vqshrund_n_s64(a, 32);
19199 }
19200 
19201 // CHECK-LABEL: define i8 @test_vqrshrunh_n_s16(i16 %a) #0 {
19202 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19203 // CHECK:   [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
19204 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0
19205 // CHECK:   ret i8 [[TMP1]]
test_vqrshrunh_n_s16(int16_t a)19206 int8_t test_vqrshrunh_n_s16(int16_t a) {
19207   return (int8_t)vqrshrunh_n_s16(a, 8);
19208 }
19209 
19210 // CHECK-LABEL: define i16 @test_vqrshruns_n_s32(i32 %a) #0 {
19211 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19212 // CHECK:   [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
19213 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0
19214 // CHECK:   ret i16 [[TMP1]]
test_vqrshruns_n_s32(int32_t a)19215 int16_t test_vqrshruns_n_s32(int32_t a) {
19216   return (int16_t)vqrshruns_n_s32(a, 16);
19217 }
19218 
19219 // CHECK-LABEL: define i32 @test_vqrshrund_n_s64(i64 %a) #0 {
19220 // CHECK:   [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32)
19221 // CHECK:   ret i32 [[VQRSHRUND_N_S64]]
test_vqrshrund_n_s64(int64_t a)19222 int32_t test_vqrshrund_n_s64(int64_t a) {
19223   return (int32_t)vqrshrund_n_s64(a, 32);
19224 }
19225 
19226 // CHECK-LABEL: define float @test_vcvts_n_f32_s32(i32 %a) #0 {
19227 // CHECK:   [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1)
19228 // CHECK:   ret float [[VCVTS_N_F32_S32]]
test_vcvts_n_f32_s32(int32_t a)19229 float32_t test_vcvts_n_f32_s32(int32_t a) {
19230   return vcvts_n_f32_s32(a, 1);
19231 }
19232 
19233 // CHECK-LABEL: define double @test_vcvtd_n_f64_s64(i64 %a) #0 {
19234 // CHECK:   [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1)
19235 // CHECK:   ret double [[VCVTD_N_F64_S64]]
test_vcvtd_n_f64_s64(int64_t a)19236 float64_t test_vcvtd_n_f64_s64(int64_t a) {
19237   return vcvtd_n_f64_s64(a, 1);
19238 }
19239 
19240 // CHECK-LABEL: define float @test_vcvts_n_f32_u32(i32 %a) #0 {
19241 // CHECK:   [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32)
19242 // CHECK:   ret float [[VCVTS_N_F32_U32]]
test_vcvts_n_f32_u32(uint32_t a)19243 float32_t test_vcvts_n_f32_u32(uint32_t a) {
19244   return vcvts_n_f32_u32(a, 32);
19245 }
19246 
19247 // CHECK-LABEL: define double @test_vcvtd_n_f64_u64(i64 %a) #0 {
19248 // CHECK:   [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64)
19249 // CHECK:   ret double [[VCVTD_N_F64_U64]]
test_vcvtd_n_f64_u64(uint64_t a)19250 float64_t test_vcvtd_n_f64_u64(uint64_t a) {
19251   return vcvtd_n_f64_u64(a, 64);
19252 }
19253 
19254 // CHECK-LABEL: define i32 @test_vcvts_n_s32_f32(float %a) #0 {
19255 // CHECK:   [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1)
19256 // CHECK:   ret i32 [[VCVTS_N_S32_F32]]
test_vcvts_n_s32_f32(float32_t a)19257 int32_t test_vcvts_n_s32_f32(float32_t a) {
19258   return (int32_t)vcvts_n_s32_f32(a, 1);
19259 }
19260 
19261 // CHECK-LABEL: define i64 @test_vcvtd_n_s64_f64(double %a) #0 {
19262 // CHECK:   [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1)
19263 // CHECK:   ret i64 [[VCVTD_N_S64_F64]]
test_vcvtd_n_s64_f64(float64_t a)19264 int64_t test_vcvtd_n_s64_f64(float64_t a) {
19265   return (int64_t)vcvtd_n_s64_f64(a, 1);
19266 }
19267 
19268 // CHECK-LABEL: define i32 @test_vcvts_n_u32_f32(float %a) #0 {
19269 // CHECK:   [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32)
19270 // CHECK:   ret i32 [[VCVTS_N_U32_F32]]
test_vcvts_n_u32_f32(float32_t a)19271 uint32_t test_vcvts_n_u32_f32(float32_t a) {
19272   return (uint32_t)vcvts_n_u32_f32(a, 32);
19273 }
19274 
19275 // CHECK-LABEL: define i64 @test_vcvtd_n_u64_f64(double %a) #0 {
19276 // CHECK:   [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64)
19277 // CHECK:   ret i64 [[VCVTD_N_U64_F64]]
test_vcvtd_n_u64_f64(float64_t a)19278 uint64_t test_vcvtd_n_u64_f64(float64_t a) {
19279   return (uint64_t)vcvtd_n_u64_f64(a, 64);
19280 }
19281 
19282 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s16(<4 x i16> %a) #0 {
19283 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19284 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s16(int16x4_t a)19285 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
19286   return vreinterpret_s8_s16(a);
19287 }
19288 
19289 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s32(<2 x i32> %a) #0 {
19290 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19291 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s32(int32x2_t a)19292 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
19293   return vreinterpret_s8_s32(a);
19294 }
19295 
19296 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s64(<1 x i64> %a) #0 {
19297 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19298 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s64(int64x1_t a)19299 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
19300   return vreinterpret_s8_s64(a);
19301 }
19302 
19303 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u8(<8 x i8> %a) #0 {
19304 // CHECK:   ret <8 x i8> %a
test_vreinterpret_s8_u8(uint8x8_t a)19305 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
19306   return vreinterpret_s8_u8(a);
19307 }
19308 
19309 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u16(<4 x i16> %a) #0 {
19310 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19311 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u16(uint16x4_t a)19312 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
19313   return vreinterpret_s8_u16(a);
19314 }
19315 
19316 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u32(<2 x i32> %a) #0 {
19317 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19318 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u32(uint32x2_t a)19319 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
19320   return vreinterpret_s8_u32(a);
19321 }
19322 
19323 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u64(<1 x i64> %a) #0 {
19324 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19325 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u64(uint64x1_t a)19326 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
19327   return vreinterpret_s8_u64(a);
19328 }
19329 
19330 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f16(<4 x half> %a) #0 {
19331 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
19332 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f16(float16x4_t a)19333 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
19334   return vreinterpret_s8_f16(a);
19335 }
19336 
19337 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f32(<2 x float> %a) #0 {
19338 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
19339 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f32(float32x2_t a)19340 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
19341   return vreinterpret_s8_f32(a);
19342 }
19343 
19344 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f64(<1 x double> %a) #0 {
19345 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
19346 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f64(float64x1_t a)19347 int8x8_t test_vreinterpret_s8_f64(float64x1_t a) {
19348   return vreinterpret_s8_f64(a);
19349 }
19350 
19351 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p8(<8 x i8> %a) #0 {
19352 // CHECK:   ret <8 x i8> %a
test_vreinterpret_s8_p8(poly8x8_t a)19353 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
19354   return vreinterpret_s8_p8(a);
19355 }
19356 
19357 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p16(<4 x i16> %a) #0 {
19358 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19359 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p16(poly16x4_t a)19360 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
19361   return vreinterpret_s8_p16(a);
19362 }
19363 
19364 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p64(<1 x i64> %a) #0 {
19365 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19366 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p64(poly64x1_t a)19367 int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) {
19368   return vreinterpret_s8_p64(a);
19369 }
19370 
19371 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s8(<8 x i8> %a) #0 {
19372 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19373 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s8(int8x8_t a)19374 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
19375   return vreinterpret_s16_s8(a);
19376 }
19377 
19378 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s32(<2 x i32> %a) #0 {
19379 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
19380 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s32(int32x2_t a)19381 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
19382   return vreinterpret_s16_s32(a);
19383 }
19384 
19385 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s64(<1 x i64> %a) #0 {
19386 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19387 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s64(int64x1_t a)19388 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
19389   return vreinterpret_s16_s64(a);
19390 }
19391 
19392 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u8(<8 x i8> %a) #0 {
19393 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19394 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u8(uint8x8_t a)19395 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
19396   return vreinterpret_s16_u8(a);
19397 }
19398 
19399 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u16(<4 x i16> %a) #0 {
19400 // CHECK:   ret <4 x i16> %a
test_vreinterpret_s16_u16(uint16x4_t a)19401 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
19402   return vreinterpret_s16_u16(a);
19403 }
19404 
19405 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u32(<2 x i32> %a) #0 {
19406 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
19407 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u32(uint32x2_t a)19408 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
19409   return vreinterpret_s16_u32(a);
19410 }
19411 
19412 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u64(<1 x i64> %a) #0 {
19413 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19414 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u64(uint64x1_t a)19415 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
19416   return vreinterpret_s16_u64(a);
19417 }
19418 
19419 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f16(<4 x half> %a) #0 {
19420 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
19421 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f16(float16x4_t a)19422 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
19423   return vreinterpret_s16_f16(a);
19424 }
19425 
19426 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f32(<2 x float> %a) #0 {
19427 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
19428 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f32(float32x2_t a)19429 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
19430   return vreinterpret_s16_f32(a);
19431 }
19432 
19433 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f64(<1 x double> %a) #0 {
19434 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
19435 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f64(float64x1_t a)19436 int16x4_t test_vreinterpret_s16_f64(float64x1_t a) {
19437   return vreinterpret_s16_f64(a);
19438 }
19439 
19440 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p8(<8 x i8> %a) #0 {
19441 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19442 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p8(poly8x8_t a)19443 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
19444   return vreinterpret_s16_p8(a);
19445 }
19446 
19447 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p16(<4 x i16> %a) #0 {
19448 // CHECK:   ret <4 x i16> %a
test_vreinterpret_s16_p16(poly16x4_t a)19449 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
19450   return vreinterpret_s16_p16(a);
19451 }
19452 
19453 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p64(<1 x i64> %a) #0 {
19454 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19455 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p64(poly64x1_t a)19456 int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) {
19457   return vreinterpret_s16_p64(a);
19458 }
19459 
19460 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s8(<8 x i8> %a) #0 {
19461 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19462 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s8(int8x8_t a)19463 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
19464   return vreinterpret_s32_s8(a);
19465 }
19466 
19467 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s16(<4 x i16> %a) #0 {
19468 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19469 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s16(int16x4_t a)19470 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
19471   return vreinterpret_s32_s16(a);
19472 }
19473 
19474 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s64(<1 x i64> %a) #0 {
19475 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19476 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s64(int64x1_t a)19477 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
19478   return vreinterpret_s32_s64(a);
19479 }
19480 
19481 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u8(<8 x i8> %a) #0 {
19482 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19483 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u8(uint8x8_t a)19484 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
19485   return vreinterpret_s32_u8(a);
19486 }
19487 
19488 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u16(<4 x i16> %a) #0 {
19489 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19490 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u16(uint16x4_t a)19491 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
19492   return vreinterpret_s32_u16(a);
19493 }
19494 
19495 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u32(<2 x i32> %a) #0 {
19496 // CHECK:   ret <2 x i32> %a
test_vreinterpret_s32_u32(uint32x2_t a)19497 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
19498   return vreinterpret_s32_u32(a);
19499 }
19500 
19501 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u64(<1 x i64> %a) #0 {
19502 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19503 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u64(uint64x1_t a)19504 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
19505   return vreinterpret_s32_u64(a);
19506 }
19507 
19508 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f16(<4 x half> %a) #0 {
19509 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
19510 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f16(float16x4_t a)19511 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
19512   return vreinterpret_s32_f16(a);
19513 }
19514 
19515 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f32(<2 x float> %a) #0 {
19516 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
19517 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f32(float32x2_t a)19518 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
19519   return vreinterpret_s32_f32(a);
19520 }
19521 
19522 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f64(<1 x double> %a) #0 {
19523 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
19524 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f64(float64x1_t a)19525 int32x2_t test_vreinterpret_s32_f64(float64x1_t a) {
19526   return vreinterpret_s32_f64(a);
19527 }
19528 
19529 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p8(<8 x i8> %a) #0 {
19530 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19531 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p8(poly8x8_t a)19532 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
19533   return vreinterpret_s32_p8(a);
19534 }
19535 
19536 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p16(<4 x i16> %a) #0 {
19537 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19538 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p16(poly16x4_t a)19539 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
19540   return vreinterpret_s32_p16(a);
19541 }
19542 
19543 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p64(<1 x i64> %a) #0 {
19544 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19545 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p64(poly64x1_t a)19546 int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) {
19547   return vreinterpret_s32_p64(a);
19548 }
19549 
19550 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s8(<8 x i8> %a) #0 {
19551 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19552 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s8(int8x8_t a)19553 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
19554   return vreinterpret_s64_s8(a);
19555 }
19556 
19557 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s16(<4 x i16> %a) #0 {
19558 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19559 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s16(int16x4_t a)19560 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
19561   return vreinterpret_s64_s16(a);
19562 }
19563 
19564 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s32(<2 x i32> %a) #0 {
19565 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
19566 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s32(int32x2_t a)19567 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
19568   return vreinterpret_s64_s32(a);
19569 }
19570 
19571 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u8(<8 x i8> %a) #0 {
19572 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19573 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u8(uint8x8_t a)19574 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
19575   return vreinterpret_s64_u8(a);
19576 }
19577 
19578 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u16(<4 x i16> %a) #0 {
19579 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19580 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u16(uint16x4_t a)19581 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
19582   return vreinterpret_s64_u16(a);
19583 }
19584 
19585 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u32(<2 x i32> %a) #0 {
19586 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
19587 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u32(uint32x2_t a)19588 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
19589   return vreinterpret_s64_u32(a);
19590 }
19591 
19592 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u64(<1 x i64> %a) #0 {
19593 // CHECK:   ret <1 x i64> %a
test_vreinterpret_s64_u64(uint64x1_t a)19594 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
19595   return vreinterpret_s64_u64(a);
19596 }
19597 
19598 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f16(<4 x half> %a) #0 {
19599 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
19600 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f16(float16x4_t a)19601 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
19602   return vreinterpret_s64_f16(a);
19603 }
19604 
19605 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f32(<2 x float> %a) #0 {
19606 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
19607 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f32(float32x2_t a)19608 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
19609   return vreinterpret_s64_f32(a);
19610 }
19611 
19612 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f64(<1 x double> %a) #0 {
19613 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
19614 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f64(float64x1_t a)19615 int64x1_t test_vreinterpret_s64_f64(float64x1_t a) {
19616   return vreinterpret_s64_f64(a);
19617 }
19618 
19619 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p8(<8 x i8> %a) #0 {
19620 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19621 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p8(poly8x8_t a)19622 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
19623   return vreinterpret_s64_p8(a);
19624 }
19625 
19626 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p16(<4 x i16> %a) #0 {
19627 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19628 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p16(poly16x4_t a)19629 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
19630   return vreinterpret_s64_p16(a);
19631 }
19632 
19633 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p64(<1 x i64> %a) #0 {
19634 // CHECK:   ret <1 x i64> %a
test_vreinterpret_s64_p64(poly64x1_t a)19635 int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) {
19636   return vreinterpret_s64_p64(a);
19637 }
19638 
19639 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s8(<8 x i8> %a) #0 {
19640 // CHECK:   ret <8 x i8> %a
test_vreinterpret_u8_s8(int8x8_t a)19641 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
19642   return vreinterpret_u8_s8(a);
19643 }
19644 
19645 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s16(<4 x i16> %a) #0 {
19646 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19647 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s16(int16x4_t a)19648 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
19649   return vreinterpret_u8_s16(a);
19650 }
19651 
19652 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s32(<2 x i32> %a) #0 {
19653 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19654 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s32(int32x2_t a)19655 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
19656   return vreinterpret_u8_s32(a);
19657 }
19658 
19659 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s64(<1 x i64> %a) #0 {
19660 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19661 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s64(int64x1_t a)19662 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
19663   return vreinterpret_u8_s64(a);
19664 }
19665 
19666 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u16(<4 x i16> %a) #0 {
19667 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19668 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u16(uint16x4_t a)19669 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
19670   return vreinterpret_u8_u16(a);
19671 }
19672 
19673 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u32(<2 x i32> %a) #0 {
19674 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19675 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u32(uint32x2_t a)19676 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
19677   return vreinterpret_u8_u32(a);
19678 }
19679 
19680 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u64(<1 x i64> %a) #0 {
19681 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19682 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u64(uint64x1_t a)19683 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
19684   return vreinterpret_u8_u64(a);
19685 }
19686 
19687 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f16(<4 x half> %a) #0 {
19688 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
19689 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f16(float16x4_t a)19690 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
19691   return vreinterpret_u8_f16(a);
19692 }
19693 
19694 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f32(<2 x float> %a) #0 {
19695 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
19696 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f32(float32x2_t a)19697 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
19698   return vreinterpret_u8_f32(a);
19699 }
19700 
19701 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f64(<1 x double> %a) #0 {
19702 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
19703 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f64(float64x1_t a)19704 uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) {
19705   return vreinterpret_u8_f64(a);
19706 }
19707 
19708 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p8(<8 x i8> %a) #0 {
19709 // CHECK:   ret <8 x i8> %a
test_vreinterpret_u8_p8(poly8x8_t a)19710 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
19711   return vreinterpret_u8_p8(a);
19712 }
19713 
19714 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p16(<4 x i16> %a) #0 {
19715 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19716 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p16(poly16x4_t a)19717 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
19718   return vreinterpret_u8_p16(a);
19719 }
19720 
19721 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p64(<1 x i64> %a) #0 {
19722 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19723 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p64(poly64x1_t a)19724 uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) {
19725   return vreinterpret_u8_p64(a);
19726 }
19727 
19728 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s8(<8 x i8> %a) #0 {
19729 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19730 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s8(int8x8_t a)19731 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
19732   return vreinterpret_u16_s8(a);
19733 }
19734 
19735 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s16(<4 x i16> %a) #0 {
19736 // CHECK:   ret <4 x i16> %a
test_vreinterpret_u16_s16(int16x4_t a)19737 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
19738   return vreinterpret_u16_s16(a);
19739 }
19740 
19741 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s32(<2 x i32> %a) #0 {
19742 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
19743 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s32(int32x2_t a)19744 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
19745   return vreinterpret_u16_s32(a);
19746 }
19747 
19748 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s64(<1 x i64> %a) #0 {
19749 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19750 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s64(int64x1_t a)19751 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
19752   return vreinterpret_u16_s64(a);
19753 }
19754 
19755 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u8(<8 x i8> %a) #0 {
19756 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19757 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u8(uint8x8_t a)19758 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
19759   return vreinterpret_u16_u8(a);
19760 }
19761 
19762 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u32(<2 x i32> %a) #0 {
19763 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
19764 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u32(uint32x2_t a)19765 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
19766   return vreinterpret_u16_u32(a);
19767 }
19768 
19769 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u64(<1 x i64> %a) #0 {
19770 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19771 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u64(uint64x1_t a)19772 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
19773   return vreinterpret_u16_u64(a);
19774 }
19775 
19776 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f16(<4 x half> %a) #0 {
19777 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
19778 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f16(float16x4_t a)19779 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
19780   return vreinterpret_u16_f16(a);
19781 }
19782 
19783 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f32(<2 x float> %a) #0 {
19784 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
19785 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f32(float32x2_t a)19786 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
19787   return vreinterpret_u16_f32(a);
19788 }
19789 
19790 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f64(<1 x double> %a) #0 {
19791 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
19792 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f64(float64x1_t a)19793 uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) {
19794   return vreinterpret_u16_f64(a);
19795 }
19796 
19797 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p8(<8 x i8> %a) #0 {
19798 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19799 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p8(poly8x8_t a)19800 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
19801   return vreinterpret_u16_p8(a);
19802 }
19803 
19804 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p16(<4 x i16> %a) #0 {
19805 // CHECK:   ret <4 x i16> %a
test_vreinterpret_u16_p16(poly16x4_t a)19806 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
19807   return vreinterpret_u16_p16(a);
19808 }
19809 
19810 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p64(<1 x i64> %a) #0 {
19811 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19812 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p64(poly64x1_t a)19813 uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) {
19814   return vreinterpret_u16_p64(a);
19815 }
19816 
19817 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s8(<8 x i8> %a) #0 {
19818 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19819 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s8(int8x8_t a)19820 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
19821   return vreinterpret_u32_s8(a);
19822 }
19823 
19824 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s16(<4 x i16> %a) #0 {
19825 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19826 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s16(int16x4_t a)19827 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
19828   return vreinterpret_u32_s16(a);
19829 }
19830 
19831 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s32(<2 x i32> %a) #0 {
19832 // CHECK:   ret <2 x i32> %a
test_vreinterpret_u32_s32(int32x2_t a)19833 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
19834   return vreinterpret_u32_s32(a);
19835 }
19836 
19837 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s64(<1 x i64> %a) #0 {
19838 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19839 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s64(int64x1_t a)19840 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
19841   return vreinterpret_u32_s64(a);
19842 }
19843 
19844 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u8(<8 x i8> %a) #0 {
19845 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19846 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u8(uint8x8_t a)19847 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
19848   return vreinterpret_u32_u8(a);
19849 }
19850 
19851 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u16(<4 x i16> %a) #0 {
19852 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19853 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u16(uint16x4_t a)19854 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
19855   return vreinterpret_u32_u16(a);
19856 }
19857 
19858 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u64(<1 x i64> %a) #0 {
19859 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19860 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u64(uint64x1_t a)19861 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
19862   return vreinterpret_u32_u64(a);
19863 }
19864 
19865 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f16(<4 x half> %a) #0 {
19866 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
19867 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f16(float16x4_t a)19868 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
19869   return vreinterpret_u32_f16(a);
19870 }
19871 
19872 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f32(<2 x float> %a) #0 {
19873 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
19874 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f32(float32x2_t a)19875 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
19876   return vreinterpret_u32_f32(a);
19877 }
19878 
19879 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f64(<1 x double> %a) #0 {
19880 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
19881 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f64(float64x1_t a)19882 uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) {
19883   return vreinterpret_u32_f64(a);
19884 }
19885 
19886 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p8(<8 x i8> %a) #0 {
19887 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19888 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p8(poly8x8_t a)19889 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
19890   return vreinterpret_u32_p8(a);
19891 }
19892 
19893 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p16(<4 x i16> %a) #0 {
19894 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19895 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p16(poly16x4_t a)19896 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
19897   return vreinterpret_u32_p16(a);
19898 }
19899 
19900 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p64(<1 x i64> %a) #0 {
19901 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19902 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p64(poly64x1_t a)19903 uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) {
19904   return vreinterpret_u32_p64(a);
19905 }
19906 
19907 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s8(<8 x i8> %a) #0 {
19908 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19909 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s8(int8x8_t a)19910 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
19911   return vreinterpret_u64_s8(a);
19912 }
19913 
19914 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s16(<4 x i16> %a) #0 {
19915 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19916 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s16(int16x4_t a)19917 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
19918   return vreinterpret_u64_s16(a);
19919 }
19920 
19921 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s32(<2 x i32> %a) #0 {
19922 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
19923 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s32(int32x2_t a)19924 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
19925   return vreinterpret_u64_s32(a);
19926 }
19927 
19928 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s64(<1 x i64> %a) #0 {
19929 // CHECK:   ret <1 x i64> %a
test_vreinterpret_u64_s64(int64x1_t a)19930 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
19931   return vreinterpret_u64_s64(a);
19932 }
19933 
19934 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u8(<8 x i8> %a) #0 {
19935 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19936 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u8(uint8x8_t a)19937 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
19938   return vreinterpret_u64_u8(a);
19939 }
19940 
19941 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u16(<4 x i16> %a) #0 {
19942 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19943 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u16(uint16x4_t a)19944 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
19945   return vreinterpret_u64_u16(a);
19946 }
19947 
19948 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u32(<2 x i32> %a) #0 {
19949 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
19950 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u32(uint32x2_t a)19951 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
19952   return vreinterpret_u64_u32(a);
19953 }
19954 
19955 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f16(<4 x half> %a) #0 {
19956 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
19957 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f16(float16x4_t a)19958 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
19959   return vreinterpret_u64_f16(a);
19960 }
19961 
19962 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f32(<2 x float> %a) #0 {
19963 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
19964 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f32(float32x2_t a)19965 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
19966   return vreinterpret_u64_f32(a);
19967 }
19968 
19969 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f64(<1 x double> %a) #0 {
19970 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
19971 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f64(float64x1_t a)19972 uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) {
19973   return vreinterpret_u64_f64(a);
19974 }
19975 
19976 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p8(<8 x i8> %a) #0 {
19977 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19978 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p8(poly8x8_t a)19979 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
19980   return vreinterpret_u64_p8(a);
19981 }
19982 
19983 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p16(<4 x i16> %a) #0 {
19984 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19985 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p16(poly16x4_t a)19986 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
19987   return vreinterpret_u64_p16(a);
19988 }
19989 
19990 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p64(<1 x i64> %a) #0 {
19991 // CHECK:   ret <1 x i64> %a
test_vreinterpret_u64_p64(poly64x1_t a)19992 uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) {
19993   return vreinterpret_u64_p64(a);
19994 }
19995 
19996 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s8(<8 x i8> %a) #0 {
19997 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
19998 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s8(int8x8_t a)19999 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
20000   return vreinterpret_f16_s8(a);
20001 }
20002 
20003 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s16(<4 x i16> %a) #0 {
20004 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
20005 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s16(int16x4_t a)20006 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
20007   return vreinterpret_f16_s16(a);
20008 }
20009 
20010 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s32(<2 x i32> %a) #0 {
20011 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
20012 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s32(int32x2_t a)20013 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
20014   return vreinterpret_f16_s32(a);
20015 }
20016 
20017 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s64(<1 x i64> %a) #0 {
20018 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
20019 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s64(int64x1_t a)20020 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
20021   return vreinterpret_f16_s64(a);
20022 }
20023 
20024 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u8(<8 x i8> %a) #0 {
20025 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
20026 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u8(uint8x8_t a)20027 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
20028   return vreinterpret_f16_u8(a);
20029 }
20030 
20031 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u16(<4 x i16> %a) #0 {
20032 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
20033 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u16(uint16x4_t a)20034 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
20035   return vreinterpret_f16_u16(a);
20036 }
20037 
20038 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u32(<2 x i32> %a) #0 {
20039 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
20040 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u32(uint32x2_t a)20041 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
20042   return vreinterpret_f16_u32(a);
20043 }
20044 
20045 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u64(<1 x i64> %a) #0 {
20046 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
20047 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u64(uint64x1_t a)20048 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
20049   return vreinterpret_f16_u64(a);
20050 }
20051 
20052 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f32(<2 x float> %a) #0 {
20053 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
20054 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f32(float32x2_t a)20055 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
20056   return vreinterpret_f16_f32(a);
20057 }
20058 
20059 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f64(<1 x double> %a) #0 {
20060 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half>
20061 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f64(float64x1_t a)20062 float16x4_t test_vreinterpret_f16_f64(float64x1_t a) {
20063   return vreinterpret_f16_f64(a);
20064 }
20065 
20066 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p8(<8 x i8> %a) #0 {
20067 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
20068 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p8(poly8x8_t a)20069 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
20070   return vreinterpret_f16_p8(a);
20071 }
20072 
20073 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p16(<4 x i16> %a) #0 {
20074 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
20075 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p16(poly16x4_t a)20076 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
20077   return vreinterpret_f16_p16(a);
20078 }
20079 
20080 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p64(<1 x i64> %a) #0 {
20081 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
20082 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p64(poly64x1_t a)20083 float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) {
20084   return vreinterpret_f16_p64(a);
20085 }
20086 
20087 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s8(<8 x i8> %a) #0 {
20088 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
20089 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s8(int8x8_t a)20090 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
20091   return vreinterpret_f32_s8(a);
20092 }
20093 
20094 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s16(<4 x i16> %a) #0 {
20095 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
20096 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s16(int16x4_t a)20097 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
20098   return vreinterpret_f32_s16(a);
20099 }
20100 
20101 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s32(<2 x i32> %a) #0 {
20102 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
20103 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s32(int32x2_t a)20104 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
20105   return vreinterpret_f32_s32(a);
20106 }
20107 
20108 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s64(<1 x i64> %a) #0 {
20109 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
20110 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s64(int64x1_t a)20111 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
20112   return vreinterpret_f32_s64(a);
20113 }
20114 
20115 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u8(<8 x i8> %a) #0 {
20116 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
20117 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u8(uint8x8_t a)20118 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
20119   return vreinterpret_f32_u8(a);
20120 }
20121 
20122 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u16(<4 x i16> %a) #0 {
20123 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
20124 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u16(uint16x4_t a)20125 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
20126   return vreinterpret_f32_u16(a);
20127 }
20128 
20129 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u32(<2 x i32> %a) #0 {
20130 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
20131 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u32(uint32x2_t a)20132 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
20133   return vreinterpret_f32_u32(a);
20134 }
20135 
20136 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u64(<1 x i64> %a) #0 {
20137 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
20138 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u64(uint64x1_t a)20139 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
20140   return vreinterpret_f32_u64(a);
20141 }
20142 
20143 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f16(<4 x half> %a) #0 {
20144 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
20145 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f16(float16x4_t a)20146 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
20147   return vreinterpret_f32_f16(a);
20148 }
20149 
20150 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f64(<1 x double> %a) #0 {
20151 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float>
20152 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f64(float64x1_t a)20153 float32x2_t test_vreinterpret_f32_f64(float64x1_t a) {
20154   return vreinterpret_f32_f64(a);
20155 }
20156 
20157 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p8(<8 x i8> %a) #0 {
20158 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
20159 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p8(poly8x8_t a)20160 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
20161   return vreinterpret_f32_p8(a);
20162 }
20163 
20164 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p16(<4 x i16> %a) #0 {
20165 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
20166 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p16(poly16x4_t a)20167 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
20168   return vreinterpret_f32_p16(a);
20169 }
20170 
20171 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p64(<1 x i64> %a) #0 {
20172 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
20173 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p64(poly64x1_t a)20174 float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) {
20175   return vreinterpret_f32_p64(a);
20176 }
20177 
20178 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s8(<8 x i8> %a) #0 {
20179 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
20180 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s8(int8x8_t a)20181 float64x1_t test_vreinterpret_f64_s8(int8x8_t a) {
20182   return vreinterpret_f64_s8(a);
20183 }
20184 
20185 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s16(<4 x i16> %a) #0 {
20186 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
20187 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s16(int16x4_t a)20188 float64x1_t test_vreinterpret_f64_s16(int16x4_t a) {
20189   return vreinterpret_f64_s16(a);
20190 }
20191 
20192 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s32(<2 x i32> %a) #0 {
20193 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
20194 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s32(int32x2_t a)20195 float64x1_t test_vreinterpret_f64_s32(int32x2_t a) {
20196   return vreinterpret_f64_s32(a);
20197 }
20198 
20199 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s64(<1 x i64> %a) #0 {
20200 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
20201 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s64(int64x1_t a)20202 float64x1_t test_vreinterpret_f64_s64(int64x1_t a) {
20203   return vreinterpret_f64_s64(a);
20204 }
20205 
20206 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u8(<8 x i8> %a) #0 {
20207 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
20208 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u8(uint8x8_t a)20209 float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) {
20210   return vreinterpret_f64_u8(a);
20211 }
20212 
20213 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u16(<4 x i16> %a) #0 {
20214 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
20215 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u16(uint16x4_t a)20216 float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) {
20217   return vreinterpret_f64_u16(a);
20218 }
20219 
20220 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u32(<2 x i32> %a) #0 {
20221 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
20222 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u32(uint32x2_t a)20223 float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) {
20224   return vreinterpret_f64_u32(a);
20225 }
20226 
20227 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u64(<1 x i64> %a) #0 {
20228 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
20229 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u64(uint64x1_t a)20230 float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) {
20231   return vreinterpret_f64_u64(a);
20232 }
20233 
20234 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_f16(<4 x half> %a) #0 {
20235 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double>
20236 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_f16(float16x4_t a)20237 float64x1_t test_vreinterpret_f64_f16(float16x4_t a) {
20238   return vreinterpret_f64_f16(a);
20239 }
20240 
20241 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_f32(<2 x float> %a) #0 {
20242 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double>
20243 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_f32(float32x2_t a)20244 float64x1_t test_vreinterpret_f64_f32(float32x2_t a) {
20245   return vreinterpret_f64_f32(a);
20246 }
20247 
20248 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p8(<8 x i8> %a) #0 {
20249 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
20250 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p8(poly8x8_t a)20251 float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) {
20252   return vreinterpret_f64_p8(a);
20253 }
20254 
20255 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p16(<4 x i16> %a) #0 {
20256 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
20257 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p16(poly16x4_t a)20258 float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) {
20259   return vreinterpret_f64_p16(a);
20260 }
20261 
20262 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p64(<1 x i64> %a) #0 {
20263 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
20264 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p64(poly64x1_t a)20265 float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) {
20266   return vreinterpret_f64_p64(a);
20267 }
20268 
20269 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s8(<8 x i8> %a) #0 {
20270 // CHECK:   ret <8 x i8> %a
test_vreinterpret_p8_s8(int8x8_t a)20271 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
20272   return vreinterpret_p8_s8(a);
20273 }
20274 
20275 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s16(<4 x i16> %a) #0 {
20276 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20277 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s16(int16x4_t a)20278 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
20279   return vreinterpret_p8_s16(a);
20280 }
20281 
20282 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s32(<2 x i32> %a) #0 {
20283 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20284 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s32(int32x2_t a)20285 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
20286   return vreinterpret_p8_s32(a);
20287 }
20288 
20289 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s64(<1 x i64> %a) #0 {
20290 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
20291 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s64(int64x1_t a)20292 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
20293   return vreinterpret_p8_s64(a);
20294 }
20295 
20296 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u8(<8 x i8> %a) #0 {
20297 // CHECK:   ret <8 x i8> %a
test_vreinterpret_p8_u8(uint8x8_t a)20298 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
20299   return vreinterpret_p8_u8(a);
20300 }
20301 
20302 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u16(<4 x i16> %a) #0 {
20303 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20304 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u16(uint16x4_t a)20305 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
20306   return vreinterpret_p8_u16(a);
20307 }
20308 
20309 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u32(<2 x i32> %a) #0 {
20310 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20311 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u32(uint32x2_t a)20312 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
20313   return vreinterpret_p8_u32(a);
20314 }
20315 
20316 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u64(<1 x i64> %a) #0 {
20317 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
20318 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u64(uint64x1_t a)20319 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
20320   return vreinterpret_p8_u64(a);
20321 }
20322 
20323 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f16(<4 x half> %a) #0 {
20324 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
20325 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f16(float16x4_t a)20326 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
20327   return vreinterpret_p8_f16(a);
20328 }
20329 
20330 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f32(<2 x float> %a) #0 {
20331 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
20332 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f32(float32x2_t a)20333 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
20334   return vreinterpret_p8_f32(a);
20335 }
20336 
20337 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f64(<1 x double> %a) #0 {
20338 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
20339 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f64(float64x1_t a)20340 poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) {
20341   return vreinterpret_p8_f64(a);
20342 }
20343 
20344 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p16(<4 x i16> %a) #0 {
20345 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20346 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p16(poly16x4_t a)20347 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
20348   return vreinterpret_p8_p16(a);
20349 }
20350 
20351 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p64(<1 x i64> %a) #0 {
20352 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
20353 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p64(poly64x1_t a)20354 poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) {
20355   return vreinterpret_p8_p64(a);
20356 }
20357 
20358 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s8(<8 x i8> %a) #0 {
20359 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
20360 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s8(int8x8_t a)20361 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
20362   return vreinterpret_p16_s8(a);
20363 }
20364 
20365 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s16(<4 x i16> %a) #0 {
20366 // CHECK:   ret <4 x i16> %a
test_vreinterpret_p16_s16(int16x4_t a)20367 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
20368   return vreinterpret_p16_s16(a);
20369 }
20370 
20371 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s32(<2 x i32> %a) #0 {
20372 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
20373 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s32(int32x2_t a)20374 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
20375   return vreinterpret_p16_s32(a);
20376 }
20377 
20378 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s64(<1 x i64> %a) #0 {
20379 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
20380 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s64(int64x1_t a)20381 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
20382   return vreinterpret_p16_s64(a);
20383 }
20384 
20385 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u8(<8 x i8> %a) #0 {
20386 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
20387 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u8(uint8x8_t a)20388 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
20389   return vreinterpret_p16_u8(a);
20390 }
20391 
20392 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u16(<4 x i16> %a) #0 {
20393 // CHECK:   ret <4 x i16> %a
test_vreinterpret_p16_u16(uint16x4_t a)20394 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
20395   return vreinterpret_p16_u16(a);
20396 }
20397 
20398 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u32(<2 x i32> %a) #0 {
20399 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
20400 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u32(uint32x2_t a)20401 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
20402   return vreinterpret_p16_u32(a);
20403 }
20404 
20405 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u64(<1 x i64> %a) #0 {
20406 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
20407 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u64(uint64x1_t a)20408 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
20409   return vreinterpret_p16_u64(a);
20410 }
20411 
20412 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f16(<4 x half> %a) #0 {
20413 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
20414 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f16(float16x4_t a)20415 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
20416   return vreinterpret_p16_f16(a);
20417 }
20418 
20419 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f32(<2 x float> %a) #0 {
20420 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
20421 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f32(float32x2_t a)20422 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
20423   return vreinterpret_p16_f32(a);
20424 }
20425 
20426 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f64(<1 x double> %a) #0 {
20427 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
20428 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f64(float64x1_t a)20429 poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) {
20430   return vreinterpret_p16_f64(a);
20431 }
20432 
20433 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p8(<8 x i8> %a) #0 {
20434 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
20435 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p8(poly8x8_t a)20436 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
20437   return vreinterpret_p16_p8(a);
20438 }
20439 
20440 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p64(<1 x i64> %a) #0 {
20441 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
20442 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p64(poly64x1_t a)20443 poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) {
20444   return vreinterpret_p16_p64(a);
20445 }
20446 
20447 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s8(<8 x i8> %a) #0 {
20448 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
20449 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s8(int8x8_t a)20450 poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) {
20451   return vreinterpret_p64_s8(a);
20452 }
20453 
20454 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s16(<4 x i16> %a) #0 {
20455 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
20456 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s16(int16x4_t a)20457 poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) {
20458   return vreinterpret_p64_s16(a);
20459 }
20460 
20461 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s32(<2 x i32> %a) #0 {
20462 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
20463 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s32(int32x2_t a)20464 poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) {
20465   return vreinterpret_p64_s32(a);
20466 }
20467 
20468 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s64(<1 x i64> %a) #0 {
20469 // CHECK:   ret <1 x i64> %a
test_vreinterpret_p64_s64(int64x1_t a)20470 poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) {
20471   return vreinterpret_p64_s64(a);
20472 }
20473 
20474 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u8(<8 x i8> %a) #0 {
20475 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
20476 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u8(uint8x8_t a)20477 poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) {
20478   return vreinterpret_p64_u8(a);
20479 }
20480 
20481 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u16(<4 x i16> %a) #0 {
20482 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
20483 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u16(uint16x4_t a)20484 poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) {
20485   return vreinterpret_p64_u16(a);
20486 }
20487 
20488 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u32(<2 x i32> %a) #0 {
20489 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
20490 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u32(uint32x2_t a)20491 poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) {
20492   return vreinterpret_p64_u32(a);
20493 }
20494 
20495 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u64(<1 x i64> %a) #0 {
20496 // CHECK:   ret <1 x i64> %a
test_vreinterpret_p64_u64(uint64x1_t a)20497 poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) {
20498   return vreinterpret_p64_u64(a);
20499 }
20500 
20501 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f16(<4 x half> %a) #0 {
20502 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
20503 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f16(float16x4_t a)20504 poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) {
20505   return vreinterpret_p64_f16(a);
20506 }
20507 
20508 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f32(<2 x float> %a) #0 {
20509 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
20510 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f32(float32x2_t a)20511 poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) {
20512   return vreinterpret_p64_f32(a);
20513 }
20514 
20515 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f64(<1 x double> %a) #0 {
20516 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
20517 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f64(float64x1_t a)20518 poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) {
20519   return vreinterpret_p64_f64(a);
20520 }
20521 
20522 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_p8(<8 x i8> %a) #0 {
20523 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
20524 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_p8(poly8x8_t a)20525 poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) {
20526   return vreinterpret_p64_p8(a);
20527 }
20528 
20529 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_p16(<4 x i16> %a) #0 {
20530 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
20531 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_p16(poly16x4_t a)20532 poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) {
20533   return vreinterpret_p64_p16(a);
20534 }
20535 
20536 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s16(<8 x i16> %a) #0 {
20537 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20538 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s16(int16x8_t a)20539 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
20540   return vreinterpretq_s8_s16(a);
20541 }
20542 
20543 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s32(<4 x i32> %a) #0 {
20544 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20545 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s32(int32x4_t a)20546 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
20547   return vreinterpretq_s8_s32(a);
20548 }
20549 
20550 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s64(<2 x i64> %a) #0 {
20551 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20552 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s64(int64x2_t a)20553 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
20554   return vreinterpretq_s8_s64(a);
20555 }
20556 
20557 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u8(<16 x i8> %a) #0 {
20558 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_s8_u8(uint8x16_t a)20559 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
20560   return vreinterpretq_s8_u8(a);
20561 }
20562 
20563 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u16(<8 x i16> %a) #0 {
20564 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20565 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u16(uint16x8_t a)20566 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
20567   return vreinterpretq_s8_u16(a);
20568 }
20569 
20570 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u32(<4 x i32> %a) #0 {
20571 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20572 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u32(uint32x4_t a)20573 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
20574   return vreinterpretq_s8_u32(a);
20575 }
20576 
20577 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u64(<2 x i64> %a) #0 {
20578 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20579 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u64(uint64x2_t a)20580 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
20581   return vreinterpretq_s8_u64(a);
20582 }
20583 
20584 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f16(<8 x half> %a) #0 {
20585 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
20586 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f16(float16x8_t a)20587 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
20588   return vreinterpretq_s8_f16(a);
20589 }
20590 
20591 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f32(<4 x float> %a) #0 {
20592 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
20593 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f32(float32x4_t a)20594 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
20595   return vreinterpretq_s8_f32(a);
20596 }
20597 
20598 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f64(<2 x double> %a) #0 {
20599 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
20600 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f64(float64x2_t a)20601 int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) {
20602   return vreinterpretq_s8_f64(a);
20603 }
20604 
20605 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p8(<16 x i8> %a) #0 {
20606 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_s8_p8(poly8x16_t a)20607 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
20608   return vreinterpretq_s8_p8(a);
20609 }
20610 
20611 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p16(<8 x i16> %a) #0 {
20612 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20613 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p16(poly16x8_t a)20614 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
20615   return vreinterpretq_s8_p16(a);
20616 }
20617 
20618 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p64(<2 x i64> %a) #0 {
20619 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20620 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p64(poly64x2_t a)20621 int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) {
20622   return vreinterpretq_s8_p64(a);
20623 }
20624 
20625 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s8(<16 x i8> %a) #0 {
20626 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
20627 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s8(int8x16_t a)20628 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
20629   return vreinterpretq_s16_s8(a);
20630 }
20631 
20632 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s32(<4 x i32> %a) #0 {
20633 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
20634 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s32(int32x4_t a)20635 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
20636   return vreinterpretq_s16_s32(a);
20637 }
20638 
20639 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s64(<2 x i64> %a) #0 {
20640 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
20641 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s64(int64x2_t a)20642 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
20643   return vreinterpretq_s16_s64(a);
20644 }
20645 
20646 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u8(<16 x i8> %a) #0 {
20647 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
20648 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u8(uint8x16_t a)20649 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
20650   return vreinterpretq_s16_u8(a);
20651 }
20652 
20653 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u16(<8 x i16> %a) #0 {
20654 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_s16_u16(uint16x8_t a)20655 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
20656   return vreinterpretq_s16_u16(a);
20657 }
20658 
20659 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u32(<4 x i32> %a) #0 {
20660 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
20661 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u32(uint32x4_t a)20662 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
20663   return vreinterpretq_s16_u32(a);
20664 }
20665 
20666 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u64(<2 x i64> %a) #0 {
20667 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
20668 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u64(uint64x2_t a)20669 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
20670   return vreinterpretq_s16_u64(a);
20671 }
20672 
20673 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f16(<8 x half> %a) #0 {
20674 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
20675 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f16(float16x8_t a)20676 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
20677   return vreinterpretq_s16_f16(a);
20678 }
20679 
20680 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f32(<4 x float> %a) #0 {
20681 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
20682 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f32(float32x4_t a)20683 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
20684   return vreinterpretq_s16_f32(a);
20685 }
20686 
20687 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f64(<2 x double> %a) #0 {
20688 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
20689 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f64(float64x2_t a)20690 int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) {
20691   return vreinterpretq_s16_f64(a);
20692 }
20693 
20694 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p8(<16 x i8> %a) #0 {
20695 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
20696 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p8(poly8x16_t a)20697 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
20698   return vreinterpretq_s16_p8(a);
20699 }
20700 
20701 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p16(<8 x i16> %a) #0 {
20702 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_s16_p16(poly16x8_t a)20703 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
20704   return vreinterpretq_s16_p16(a);
20705 }
20706 
20707 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p64(<2 x i64> %a) #0 {
20708 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
20709 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p64(poly64x2_t a)20710 int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) {
20711   return vreinterpretq_s16_p64(a);
20712 }
20713 
20714 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s8(<16 x i8> %a) #0 {
20715 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
20716 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s8(int8x16_t a)20717 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
20718   return vreinterpretq_s32_s8(a);
20719 }
20720 
20721 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s16(<8 x i16> %a) #0 {
20722 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
20723 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s16(int16x8_t a)20724 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
20725   return vreinterpretq_s32_s16(a);
20726 }
20727 
20728 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s64(<2 x i64> %a) #0 {
20729 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
20730 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s64(int64x2_t a)20731 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
20732   return vreinterpretq_s32_s64(a);
20733 }
20734 
20735 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u8(<16 x i8> %a) #0 {
20736 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
20737 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u8(uint8x16_t a)20738 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
20739   return vreinterpretq_s32_u8(a);
20740 }
20741 
20742 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u16(<8 x i16> %a) #0 {
20743 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
20744 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u16(uint16x8_t a)20745 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
20746   return vreinterpretq_s32_u16(a);
20747 }
20748 
20749 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u32(<4 x i32> %a) #0 {
20750 // CHECK:   ret <4 x i32> %a
test_vreinterpretq_s32_u32(uint32x4_t a)20751 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
20752   return vreinterpretq_s32_u32(a);
20753 }
20754 
20755 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u64(<2 x i64> %a) #0 {
20756 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
20757 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u64(uint64x2_t a)20758 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
20759   return vreinterpretq_s32_u64(a);
20760 }
20761 
20762 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f16(<8 x half> %a) #0 {
20763 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
20764 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f16(float16x8_t a)20765 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
20766   return vreinterpretq_s32_f16(a);
20767 }
20768 
20769 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f32(<4 x float> %a) #0 {
20770 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
20771 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f32(float32x4_t a)20772 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
20773   return vreinterpretq_s32_f32(a);
20774 }
20775 
20776 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f64(<2 x double> %a) #0 {
20777 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
20778 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f64(float64x2_t a)20779 int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) {
20780   return vreinterpretq_s32_f64(a);
20781 }
20782 
20783 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p8(<16 x i8> %a) #0 {
20784 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
20785 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p8(poly8x16_t a)20786 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
20787   return vreinterpretq_s32_p8(a);
20788 }
20789 
20790 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p16(<8 x i16> %a) #0 {
20791 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
20792 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p16(poly16x8_t a)20793 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
20794   return vreinterpretq_s32_p16(a);
20795 }
20796 
20797 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p64(<2 x i64> %a) #0 {
20798 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
20799 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p64(poly64x2_t a)20800 int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) {
20801   return vreinterpretq_s32_p64(a);
20802 }
20803 
20804 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s8(<16 x i8> %a) #0 {
20805 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
20806 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s8(int8x16_t a)20807 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
20808   return vreinterpretq_s64_s8(a);
20809 }
20810 
20811 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s16(<8 x i16> %a) #0 {
20812 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
20813 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s16(int16x8_t a)20814 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
20815   return vreinterpretq_s64_s16(a);
20816 }
20817 
20818 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s32(<4 x i32> %a) #0 {
20819 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
20820 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s32(int32x4_t a)20821 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
20822   return vreinterpretq_s64_s32(a);
20823 }
20824 
20825 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u8(<16 x i8> %a) #0 {
20826 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
20827 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u8(uint8x16_t a)20828 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
20829   return vreinterpretq_s64_u8(a);
20830 }
20831 
20832 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u16(<8 x i16> %a) #0 {
20833 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
20834 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u16(uint16x8_t a)20835 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
20836   return vreinterpretq_s64_u16(a);
20837 }
20838 
20839 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u32(<4 x i32> %a) #0 {
20840 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
20841 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u32(uint32x4_t a)20842 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
20843   return vreinterpretq_s64_u32(a);
20844 }
20845 
20846 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u64(<2 x i64> %a) #0 {
20847 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_s64_u64(uint64x2_t a)20848 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
20849   return vreinterpretq_s64_u64(a);
20850 }
20851 
20852 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f16(<8 x half> %a) #0 {
20853 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
20854 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f16(float16x8_t a)20855 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
20856   return vreinterpretq_s64_f16(a);
20857 }
20858 
20859 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f32(<4 x float> %a) #0 {
20860 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
20861 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f32(float32x4_t a)20862 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
20863   return vreinterpretq_s64_f32(a);
20864 }
20865 
20866 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f64(<2 x double> %a) #0 {
20867 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
20868 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f64(float64x2_t a)20869 int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) {
20870   return vreinterpretq_s64_f64(a);
20871 }
20872 
20873 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p8(<16 x i8> %a) #0 {
20874 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
20875 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p8(poly8x16_t a)20876 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
20877   return vreinterpretq_s64_p8(a);
20878 }
20879 
20880 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p16(<8 x i16> %a) #0 {
20881 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
20882 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p16(poly16x8_t a)20883 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
20884   return vreinterpretq_s64_p16(a);
20885 }
20886 
20887 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p64(<2 x i64> %a) #0 {
20888 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_s64_p64(poly64x2_t a)20889 int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) {
20890   return vreinterpretq_s64_p64(a);
20891 }
20892 
20893 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s8(<16 x i8> %a) #0 {
20894 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_u8_s8(int8x16_t a)20895 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
20896   return vreinterpretq_u8_s8(a);
20897 }
20898 
20899 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s16(<8 x i16> %a) #0 {
20900 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20901 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s16(int16x8_t a)20902 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
20903   return vreinterpretq_u8_s16(a);
20904 }
20905 
20906 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s32(<4 x i32> %a) #0 {
20907 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20908 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s32(int32x4_t a)20909 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
20910   return vreinterpretq_u8_s32(a);
20911 }
20912 
20913 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s64(<2 x i64> %a) #0 {
20914 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20915 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s64(int64x2_t a)20916 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
20917   return vreinterpretq_u8_s64(a);
20918 }
20919 
20920 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u16(<8 x i16> %a) #0 {
20921 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20922 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u16(uint16x8_t a)20923 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
20924   return vreinterpretq_u8_u16(a);
20925 }
20926 
20927 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u32(<4 x i32> %a) #0 {
20928 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20929 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u32(uint32x4_t a)20930 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
20931   return vreinterpretq_u8_u32(a);
20932 }
20933 
20934 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u64(<2 x i64> %a) #0 {
20935 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20936 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u64(uint64x2_t a)20937 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
20938   return vreinterpretq_u8_u64(a);
20939 }
20940 
20941 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f16(<8 x half> %a) #0 {
20942 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
20943 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f16(float16x8_t a)20944 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
20945   return vreinterpretq_u8_f16(a);
20946 }
20947 
20948 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f32(<4 x float> %a) #0 {
20949 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
20950 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f32(float32x4_t a)20951 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
20952   return vreinterpretq_u8_f32(a);
20953 }
20954 
20955 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f64(<2 x double> %a) #0 {
20956 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
20957 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f64(float64x2_t a)20958 uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) {
20959   return vreinterpretq_u8_f64(a);
20960 }
20961 
20962 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p8(<16 x i8> %a) #0 {
20963 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_u8_p8(poly8x16_t a)20964 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
20965   return vreinterpretq_u8_p8(a);
20966 }
20967 
20968 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p16(<8 x i16> %a) #0 {
20969 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20970 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p16(poly16x8_t a)20971 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
20972   return vreinterpretq_u8_p16(a);
20973 }
20974 
20975 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p64(<2 x i64> %a) #0 {
20976 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20977 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p64(poly64x2_t a)20978 uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) {
20979   return vreinterpretq_u8_p64(a);
20980 }
20981 
20982 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s8(<16 x i8> %a) #0 {
20983 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
20984 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s8(int8x16_t a)20985 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
20986   return vreinterpretq_u16_s8(a);
20987 }
20988 
20989 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s16(<8 x i16> %a) #0 {
20990 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_u16_s16(int16x8_t a)20991 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
20992   return vreinterpretq_u16_s16(a);
20993 }
20994 
20995 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s32(<4 x i32> %a) #0 {
20996 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
20997 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s32(int32x4_t a)20998 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
20999   return vreinterpretq_u16_s32(a);
21000 }
21001 
21002 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s64(<2 x i64> %a) #0 {
21003 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21004 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s64(int64x2_t a)21005 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
21006   return vreinterpretq_u16_s64(a);
21007 }
21008 
21009 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u8(<16 x i8> %a) #0 {
21010 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
21011 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u8(uint8x16_t a)21012 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
21013   return vreinterpretq_u16_u8(a);
21014 }
21015 
21016 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u32(<4 x i32> %a) #0 {
21017 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
21018 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u32(uint32x4_t a)21019 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
21020   return vreinterpretq_u16_u32(a);
21021 }
21022 
21023 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u64(<2 x i64> %a) #0 {
21024 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21025 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u64(uint64x2_t a)21026 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
21027   return vreinterpretq_u16_u64(a);
21028 }
21029 
21030 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f16(<8 x half> %a) #0 {
21031 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
21032 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f16(float16x8_t a)21033 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
21034   return vreinterpretq_u16_f16(a);
21035 }
21036 
21037 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f32(<4 x float> %a) #0 {
21038 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
21039 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f32(float32x4_t a)21040 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
21041   return vreinterpretq_u16_f32(a);
21042 }
21043 
21044 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f64(<2 x double> %a) #0 {
21045 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
21046 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f64(float64x2_t a)21047 uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) {
21048   return vreinterpretq_u16_f64(a);
21049 }
21050 
21051 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p8(<16 x i8> %a) #0 {
21052 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
21053 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p8(poly8x16_t a)21054 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
21055   return vreinterpretq_u16_p8(a);
21056 }
21057 
21058 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p16(<8 x i16> %a) #0 {
21059 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_u16_p16(poly16x8_t a)21060 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
21061   return vreinterpretq_u16_p16(a);
21062 }
21063 
21064 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p64(<2 x i64> %a) #0 {
21065 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21066 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p64(poly64x2_t a)21067 uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) {
21068   return vreinterpretq_u16_p64(a);
21069 }
21070 
21071 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s8(<16 x i8> %a) #0 {
21072 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
21073 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s8(int8x16_t a)21074 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
21075   return vreinterpretq_u32_s8(a);
21076 }
21077 
21078 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s16(<8 x i16> %a) #0 {
21079 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
21080 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s16(int16x8_t a)21081 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
21082   return vreinterpretq_u32_s16(a);
21083 }
21084 
21085 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s32(<4 x i32> %a) #0 {
21086 // CHECK:   ret <4 x i32> %a
test_vreinterpretq_u32_s32(int32x4_t a)21087 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
21088   return vreinterpretq_u32_s32(a);
21089 }
21090 
21091 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s64(<2 x i64> %a) #0 {
21092 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
21093 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s64(int64x2_t a)21094 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
21095   return vreinterpretq_u32_s64(a);
21096 }
21097 
21098 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u8(<16 x i8> %a) #0 {
21099 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
21100 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u8(uint8x16_t a)21101 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
21102   return vreinterpretq_u32_u8(a);
21103 }
21104 
21105 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u16(<8 x i16> %a) #0 {
21106 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
21107 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u16(uint16x8_t a)21108 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
21109   return vreinterpretq_u32_u16(a);
21110 }
21111 
21112 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u64(<2 x i64> %a) #0 {
21113 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
21114 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u64(uint64x2_t a)21115 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
21116   return vreinterpretq_u32_u64(a);
21117 }
21118 
21119 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f16(<8 x half> %a) #0 {
21120 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
21121 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f16(float16x8_t a)21122 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
21123   return vreinterpretq_u32_f16(a);
21124 }
21125 
21126 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f32(<4 x float> %a) #0 {
21127 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
21128 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f32(float32x4_t a)21129 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
21130   return vreinterpretq_u32_f32(a);
21131 }
21132 
21133 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f64(<2 x double> %a) #0 {
21134 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
21135 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f64(float64x2_t a)21136 uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) {
21137   return vreinterpretq_u32_f64(a);
21138 }
21139 
21140 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p8(<16 x i8> %a) #0 {
21141 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
21142 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p8(poly8x16_t a)21143 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
21144   return vreinterpretq_u32_p8(a);
21145 }
21146 
21147 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p16(<8 x i16> %a) #0 {
21148 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
21149 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p16(poly16x8_t a)21150 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
21151   return vreinterpretq_u32_p16(a);
21152 }
21153 
21154 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p64(<2 x i64> %a) #0 {
21155 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
21156 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p64(poly64x2_t a)21157 uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) {
21158   return vreinterpretq_u32_p64(a);
21159 }
21160 
21161 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s8(<16 x i8> %a) #0 {
21162 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21163 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s8(int8x16_t a)21164 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
21165   return vreinterpretq_u64_s8(a);
21166 }
21167 
21168 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s16(<8 x i16> %a) #0 {
21169 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21170 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s16(int16x8_t a)21171 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
21172   return vreinterpretq_u64_s16(a);
21173 }
21174 
21175 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s32(<4 x i32> %a) #0 {
21176 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
21177 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s32(int32x4_t a)21178 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
21179   return vreinterpretq_u64_s32(a);
21180 }
21181 
21182 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s64(<2 x i64> %a) #0 {
21183 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_u64_s64(int64x2_t a)21184 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
21185   return vreinterpretq_u64_s64(a);
21186 }
21187 
21188 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u8(<16 x i8> %a) #0 {
21189 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21190 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u8(uint8x16_t a)21191 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
21192   return vreinterpretq_u64_u8(a);
21193 }
21194 
21195 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u16(<8 x i16> %a) #0 {
21196 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21197 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u16(uint16x8_t a)21198 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
21199   return vreinterpretq_u64_u16(a);
21200 }
21201 
21202 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u32(<4 x i32> %a) #0 {
21203 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
21204 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u32(uint32x4_t a)21205 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
21206   return vreinterpretq_u64_u32(a);
21207 }
21208 
21209 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f16(<8 x half> %a) #0 {
21210 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
21211 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f16(float16x8_t a)21212 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
21213   return vreinterpretq_u64_f16(a);
21214 }
21215 
21216 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f32(<4 x float> %a) #0 {
21217 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
21218 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f32(float32x4_t a)21219 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
21220   return vreinterpretq_u64_f32(a);
21221 }
21222 
21223 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f64(<2 x double> %a) #0 {
21224 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
21225 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f64(float64x2_t a)21226 uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) {
21227   return vreinterpretq_u64_f64(a);
21228 }
21229 
21230 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p8(<16 x i8> %a) #0 {
21231 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21232 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p8(poly8x16_t a)21233 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
21234   return vreinterpretq_u64_p8(a);
21235 }
21236 
21237 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p16(<8 x i16> %a) #0 {
21238 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21239 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p16(poly16x8_t a)21240 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
21241   return vreinterpretq_u64_p16(a);
21242 }
21243 
21244 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p64(<2 x i64> %a) #0 {
21245 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_u64_p64(poly64x2_t a)21246 uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) {
21247   return vreinterpretq_u64_p64(a);
21248 }
21249 
21250 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s8(<16 x i8> %a) #0 {
21251 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
21252 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s8(int8x16_t a)21253 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
21254   return vreinterpretq_f16_s8(a);
21255 }
21256 
21257 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s16(<8 x i16> %a) #0 {
21258 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
21259 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s16(int16x8_t a)21260 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
21261   return vreinterpretq_f16_s16(a);
21262 }
21263 
21264 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s32(<4 x i32> %a) #0 {
21265 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
21266 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s32(int32x4_t a)21267 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
21268   return vreinterpretq_f16_s32(a);
21269 }
21270 
21271 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s64(<2 x i64> %a) #0 {
21272 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
21273 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s64(int64x2_t a)21274 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
21275   return vreinterpretq_f16_s64(a);
21276 }
21277 
21278 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u8(<16 x i8> %a) #0 {
21279 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
21280 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u8(uint8x16_t a)21281 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
21282   return vreinterpretq_f16_u8(a);
21283 }
21284 
21285 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u16(<8 x i16> %a) #0 {
21286 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
21287 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u16(uint16x8_t a)21288 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
21289   return vreinterpretq_f16_u16(a);
21290 }
21291 
21292 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u32(<4 x i32> %a) #0 {
21293 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
21294 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u32(uint32x4_t a)21295 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
21296   return vreinterpretq_f16_u32(a);
21297 }
21298 
21299 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u64(<2 x i64> %a) #0 {
21300 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
21301 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u64(uint64x2_t a)21302 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
21303   return vreinterpretq_f16_u64(a);
21304 }
21305 
21306 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f32(<4 x float> %a) #0 {
21307 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
21308 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f32(float32x4_t a)21309 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
21310   return vreinterpretq_f16_f32(a);
21311 }
21312 
21313 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f64(<2 x double> %a) #0 {
21314 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half>
21315 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f64(float64x2_t a)21316 float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) {
21317   return vreinterpretq_f16_f64(a);
21318 }
21319 
21320 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p8(<16 x i8> %a) #0 {
21321 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
21322 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p8(poly8x16_t a)21323 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
21324   return vreinterpretq_f16_p8(a);
21325 }
21326 
21327 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p16(<8 x i16> %a) #0 {
21328 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
21329 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p16(poly16x8_t a)21330 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
21331   return vreinterpretq_f16_p16(a);
21332 }
21333 
21334 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p64(<2 x i64> %a) #0 {
21335 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
21336 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p64(poly64x2_t a)21337 float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) {
21338   return vreinterpretq_f16_p64(a);
21339 }
21340 
21341 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s8(<16 x i8> %a) #0 {
21342 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
21343 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s8(int8x16_t a)21344 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
21345   return vreinterpretq_f32_s8(a);
21346 }
21347 
21348 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s16(<8 x i16> %a) #0 {
21349 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
21350 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s16(int16x8_t a)21351 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
21352   return vreinterpretq_f32_s16(a);
21353 }
21354 
21355 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s32(<4 x i32> %a) #0 {
21356 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
21357 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s32(int32x4_t a)21358 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
21359   return vreinterpretq_f32_s32(a);
21360 }
21361 
21362 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s64(<2 x i64> %a) #0 {
21363 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
21364 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s64(int64x2_t a)21365 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
21366   return vreinterpretq_f32_s64(a);
21367 }
21368 
21369 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u8(<16 x i8> %a) #0 {
21370 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
21371 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u8(uint8x16_t a)21372 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
21373   return vreinterpretq_f32_u8(a);
21374 }
21375 
21376 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u16(<8 x i16> %a) #0 {
21377 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
21378 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u16(uint16x8_t a)21379 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
21380   return vreinterpretq_f32_u16(a);
21381 }
21382 
21383 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u32(<4 x i32> %a) #0 {
21384 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
21385 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u32(uint32x4_t a)21386 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
21387   return vreinterpretq_f32_u32(a);
21388 }
21389 
21390 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u64(<2 x i64> %a) #0 {
21391 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
21392 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u64(uint64x2_t a)21393 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
21394   return vreinterpretq_f32_u64(a);
21395 }
21396 
21397 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f16(<8 x half> %a) #0 {
21398 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
21399 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f16(float16x8_t a)21400 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
21401   return vreinterpretq_f32_f16(a);
21402 }
21403 
21404 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f64(<2 x double> %a) #0 {
21405 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float>
21406 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f64(float64x2_t a)21407 float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) {
21408   return vreinterpretq_f32_f64(a);
21409 }
21410 
21411 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p8(<16 x i8> %a) #0 {
21412 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
21413 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p8(poly8x16_t a)21414 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
21415   return vreinterpretq_f32_p8(a);
21416 }
21417 
21418 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p16(<8 x i16> %a) #0 {
21419 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
21420 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p16(poly16x8_t a)21421 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
21422   return vreinterpretq_f32_p16(a);
21423 }
21424 
21425 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p64(<2 x i64> %a) #0 {
21426 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
21427 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p64(poly64x2_t a)21428 float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) {
21429   return vreinterpretq_f32_p64(a);
21430 }
21431 
21432 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s8(<16 x i8> %a) #0 {
21433 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
21434 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s8(int8x16_t a)21435 float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) {
21436   return vreinterpretq_f64_s8(a);
21437 }
21438 
21439 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s16(<8 x i16> %a) #0 {
21440 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
21441 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s16(int16x8_t a)21442 float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) {
21443   return vreinterpretq_f64_s16(a);
21444 }
21445 
21446 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s32(<4 x i32> %a) #0 {
21447 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
21448 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s32(int32x4_t a)21449 float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) {
21450   return vreinterpretq_f64_s32(a);
21451 }
21452 
21453 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s64(<2 x i64> %a) #0 {
21454 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
21455 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s64(int64x2_t a)21456 float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) {
21457   return vreinterpretq_f64_s64(a);
21458 }
21459 
21460 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u8(<16 x i8> %a) #0 {
21461 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
21462 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u8(uint8x16_t a)21463 float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) {
21464   return vreinterpretq_f64_u8(a);
21465 }
21466 
21467 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u16(<8 x i16> %a) #0 {
21468 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
21469 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u16(uint16x8_t a)21470 float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) {
21471   return vreinterpretq_f64_u16(a);
21472 }
21473 
21474 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u32(<4 x i32> %a) #0 {
21475 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
21476 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u32(uint32x4_t a)21477 float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) {
21478   return vreinterpretq_f64_u32(a);
21479 }
21480 
21481 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u64(<2 x i64> %a) #0 {
21482 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
21483 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u64(uint64x2_t a)21484 float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) {
21485   return vreinterpretq_f64_u64(a);
21486 }
21487 
21488 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_f16(<8 x half> %a) #0 {
21489 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double>
21490 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_f16(float16x8_t a)21491 float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) {
21492   return vreinterpretq_f64_f16(a);
21493 }
21494 
21495 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_f32(<4 x float> %a) #0 {
21496 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double>
21497 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_f32(float32x4_t a)21498 float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) {
21499   return vreinterpretq_f64_f32(a);
21500 }
21501 
21502 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p8(<16 x i8> %a) #0 {
21503 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
21504 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p8(poly8x16_t a)21505 float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) {
21506   return vreinterpretq_f64_p8(a);
21507 }
21508 
21509 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p16(<8 x i16> %a) #0 {
21510 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
21511 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p16(poly16x8_t a)21512 float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) {
21513   return vreinterpretq_f64_p16(a);
21514 }
21515 
21516 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p64(<2 x i64> %a) #0 {
21517 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
21518 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p64(poly64x2_t a)21519 float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) {
21520   return vreinterpretq_f64_p64(a);
21521 }
21522 
21523 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s8(<16 x i8> %a) #0 {
21524 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_p8_s8(int8x16_t a)21525 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
21526   return vreinterpretq_p8_s8(a);
21527 }
21528 
21529 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s16(<8 x i16> %a) #0 {
21530 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21531 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s16(int16x8_t a)21532 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
21533   return vreinterpretq_p8_s16(a);
21534 }
21535 
21536 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s32(<4 x i32> %a) #0 {
21537 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21538 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s32(int32x4_t a)21539 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
21540   return vreinterpretq_p8_s32(a);
21541 }
21542 
21543 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s64(<2 x i64> %a) #0 {
21544 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
21545 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s64(int64x2_t a)21546 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
21547   return vreinterpretq_p8_s64(a);
21548 }
21549 
21550 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u8(<16 x i8> %a) #0 {
21551 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_p8_u8(uint8x16_t a)21552 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
21553   return vreinterpretq_p8_u8(a);
21554 }
21555 
21556 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u16(<8 x i16> %a) #0 {
21557 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21558 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u16(uint16x8_t a)21559 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
21560   return vreinterpretq_p8_u16(a);
21561 }
21562 
21563 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u32(<4 x i32> %a) #0 {
21564 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21565 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u32(uint32x4_t a)21566 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
21567   return vreinterpretq_p8_u32(a);
21568 }
21569 
21570 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u64(<2 x i64> %a) #0 {
21571 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
21572 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u64(uint64x2_t a)21573 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
21574   return vreinterpretq_p8_u64(a);
21575 }
21576 
21577 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f16(<8 x half> %a) #0 {
21578 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
21579 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f16(float16x8_t a)21580 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
21581   return vreinterpretq_p8_f16(a);
21582 }
21583 
21584 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f32(<4 x float> %a) #0 {
21585 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
21586 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f32(float32x4_t a)21587 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
21588   return vreinterpretq_p8_f32(a);
21589 }
21590 
21591 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f64(<2 x double> %a) #0 {
21592 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
21593 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f64(float64x2_t a)21594 poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) {
21595   return vreinterpretq_p8_f64(a);
21596 }
21597 
21598 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p16(<8 x i16> %a) #0 {
21599 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21600 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p16(poly16x8_t a)21601 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
21602   return vreinterpretq_p8_p16(a);
21603 }
21604 
21605 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p64(<2 x i64> %a) #0 {
21606 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
21607 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p64(poly64x2_t a)21608 poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) {
21609   return vreinterpretq_p8_p64(a);
21610 }
21611 
21612 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s8(<16 x i8> %a) #0 {
21613 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
21614 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s8(int8x16_t a)21615 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
21616   return vreinterpretq_p16_s8(a);
21617 }
21618 
21619 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s16(<8 x i16> %a) #0 {
21620 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_p16_s16(int16x8_t a)21621 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
21622   return vreinterpretq_p16_s16(a);
21623 }
21624 
21625 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s32(<4 x i32> %a) #0 {
21626 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
21627 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s32(int32x4_t a)21628 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
21629   return vreinterpretq_p16_s32(a);
21630 }
21631 
21632 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s64(<2 x i64> %a) #0 {
21633 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21634 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s64(int64x2_t a)21635 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
21636   return vreinterpretq_p16_s64(a);
21637 }
21638 
21639 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u8(<16 x i8> %a) #0 {
21640 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
21641 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u8(uint8x16_t a)21642 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
21643   return vreinterpretq_p16_u8(a);
21644 }
21645 
21646 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u16(<8 x i16> %a) #0 {
21647 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_p16_u16(uint16x8_t a)21648 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
21649   return vreinterpretq_p16_u16(a);
21650 }
21651 
21652 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u32(<4 x i32> %a) #0 {
21653 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
21654 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u32(uint32x4_t a)21655 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
21656   return vreinterpretq_p16_u32(a);
21657 }
21658 
21659 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u64(<2 x i64> %a) #0 {
21660 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21661 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u64(uint64x2_t a)21662 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
21663   return vreinterpretq_p16_u64(a);
21664 }
21665 
21666 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f16(<8 x half> %a) #0 {
21667 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
21668 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f16(float16x8_t a)21669 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
21670   return vreinterpretq_p16_f16(a);
21671 }
21672 
21673 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f32(<4 x float> %a) #0 {
21674 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
21675 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f32(float32x4_t a)21676 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
21677   return vreinterpretq_p16_f32(a);
21678 }
21679 
21680 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f64(<2 x double> %a) #0 {
21681 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
21682 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f64(float64x2_t a)21683 poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) {
21684   return vreinterpretq_p16_f64(a);
21685 }
21686 
21687 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p8(<16 x i8> %a) #0 {
21688 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
21689 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p8(poly8x16_t a)21690 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
21691   return vreinterpretq_p16_p8(a);
21692 }
21693 
21694 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p64(<2 x i64> %a) #0 {
21695 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21696 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p64(poly64x2_t a)21697 poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) {
21698   return vreinterpretq_p16_p64(a);
21699 }
21700 
21701 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s8(<16 x i8> %a) #0 {
21702 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21703 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s8(int8x16_t a)21704 poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) {
21705   return vreinterpretq_p64_s8(a);
21706 }
21707 
21708 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s16(<8 x i16> %a) #0 {
21709 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21710 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s16(int16x8_t a)21711 poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) {
21712   return vreinterpretq_p64_s16(a);
21713 }
21714 
21715 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s32(<4 x i32> %a) #0 {
21716 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
21717 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s32(int32x4_t a)21718 poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) {
21719   return vreinterpretq_p64_s32(a);
21720 }
21721 
21722 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s64(<2 x i64> %a) #0 {
21723 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_p64_s64(int64x2_t a)21724 poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) {
21725   return vreinterpretq_p64_s64(a);
21726 }
21727 
21728 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u8(<16 x i8> %a) #0 {
21729 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21730 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u8(uint8x16_t a)21731 poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) {
21732   return vreinterpretq_p64_u8(a);
21733 }
21734 
21735 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u16(<8 x i16> %a) #0 {
21736 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21737 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u16(uint16x8_t a)21738 poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) {
21739   return vreinterpretq_p64_u16(a);
21740 }
21741 
21742 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u32(<4 x i32> %a) #0 {
21743 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
21744 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u32(uint32x4_t a)21745 poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) {
21746   return vreinterpretq_p64_u32(a);
21747 }
21748 
21749 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u64(<2 x i64> %a) #0 {
21750 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_p64_u64(uint64x2_t a)21751 poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) {
21752   return vreinterpretq_p64_u64(a);
21753 }
21754 
21755 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f16(<8 x half> %a) #0 {
21756 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
21757 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f16(float16x8_t a)21758 poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) {
21759   return vreinterpretq_p64_f16(a);
21760 }
21761 
21762 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f32(<4 x float> %a) #0 {
21763 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
21764 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f32(float32x4_t a)21765 poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) {
21766   return vreinterpretq_p64_f32(a);
21767 }
21768 
21769 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f64(<2 x double> %a) #0 {
21770 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
21771 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f64(float64x2_t a)21772 poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) {
21773   return vreinterpretq_p64_f64(a);
21774 }
21775 
21776 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_p8(<16 x i8> %a) #0 {
21777 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21778 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_p8(poly8x16_t a)21779 poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) {
21780   return vreinterpretq_p64_p8(a);
21781 }
21782 
21783 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_p16(<8 x i16> %a) #0 {
21784 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21785 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_p16(poly16x8_t a)21786 poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) {
21787   return vreinterpretq_p64_p16(a);
21788 }
21789 
21790 // CHECK-LABEL: define float @test_vabds_f32(float %a, float %b) #0 {
21791 // CHECK:   [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b) #4
21792 // CHECK:   ret float [[VABDS_F32_I]]
test_vabds_f32(float32_t a,float32_t b)21793 float32_t test_vabds_f32(float32_t a, float32_t b) {
21794   return vabds_f32(a, b);
21795 }
21796 
21797 // CHECK-LABEL: define double @test_vabdd_f64(double %a, double %b) #0 {
21798 // CHECK:   [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b) #4
21799 // CHECK:   ret double [[VABDD_F64_I]]
test_vabdd_f64(float64_t a,float64_t b)21800 float64_t test_vabdd_f64(float64_t a, float64_t b) {
21801   return vabdd_f64(a, b);
21802 }
21803 
21804 // CHECK-LABEL: define <1 x i64> @test_vuqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 {
21805 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
21806 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
21807 // CHECK:   [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
21808 // CHECK:   [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
21809 // CHECK:   [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> [[VUQADD_I]], <1 x i64> [[VUQADD1_I]]) #4
21810 // CHECK:   ret <1 x i64> [[VUQADD2_I]]
test_vuqadd_s64(int64x1_t a,uint64x1_t b)21811 int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) {
21812   return vuqadd_s64(a, b);
21813 }
21814 
21815 // CHECK-LABEL: define <1 x i64> @test_vsqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 {
21816 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
21817 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
21818 // CHECK:   [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
21819 // CHECK:   [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
21820 // CHECK:   [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> [[VSQADD_I]], <1 x i64> [[VSQADD1_I]]) #4
21821 // CHECK:   ret <1 x i64> [[VSQADD2_I]]
test_vsqadd_u64(uint64x1_t a,int64x1_t b)21822 uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) {
21823   return vsqadd_u64(a, b);
21824 }
21825 
21826 // CHECK-LABEL: define <8 x i8> @test_vsqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
21827 // CHECK:   [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
21828 // CHECK:   ret <8 x i8> [[VSQADD_I]]
test_vsqadd_u8(uint8x8_t a,int8x8_t b)21829 uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) {
21830   return vsqadd_u8(a, b);
21831 }
21832 
21833 // CHECK-LABEL: define <16 x i8> @test_vsqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
21834 // CHECK:   [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
21835 // CHECK:   ret <16 x i8> [[VSQADD_I]]
test_vsqaddq_u8(uint8x16_t a,int8x16_t b)21836 uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) {
21837   return vsqaddq_u8(a, b);
21838 }
21839 
21840 // CHECK-LABEL: define <4 x i16> @test_vsqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
21841 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
21842 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
21843 // CHECK:   [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
21844 // CHECK:   [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
21845 // CHECK:   [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[VSQADD_I]], <4 x i16> [[VSQADD1_I]]) #4
21846 // CHECK:   ret <4 x i16> [[VSQADD2_I]]
test_vsqadd_u16(uint16x4_t a,int16x4_t b)21847 uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) {
21848   return vsqadd_u16(a, b);
21849 }
21850 
21851 // CHECK-LABEL: define <8 x i16> @test_vsqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
21852 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21853 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
21854 // CHECK:   [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
21855 // CHECK:   [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
21856 // CHECK:   [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> [[VSQADD_I]], <8 x i16> [[VSQADD1_I]]) #4
21857 // CHECK:   ret <8 x i16> [[VSQADD2_I]]
test_vsqaddq_u16(uint16x8_t a,int16x8_t b)21858 uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) {
21859   return vsqaddq_u16(a, b);
21860 }
21861 
21862 // CHECK-LABEL: define <2 x i32> @test_vsqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
21863 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
21864 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
21865 // CHECK:   [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
21866 // CHECK:   [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
21867 // CHECK:   [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> [[VSQADD_I]], <2 x i32> [[VSQADD1_I]]) #4
21868 // CHECK:   ret <2 x i32> [[VSQADD2_I]]
test_vsqadd_u32(uint32x2_t a,int32x2_t b)21869 uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) {
21870   return vsqadd_u32(a, b);
21871 }
21872 
21873 // CHECK-LABEL: define <4 x i32> @test_vsqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
21874 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21875 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
21876 // CHECK:   [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
21877 // CHECK:   [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
21878 // CHECK:   [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> [[VSQADD_I]], <4 x i32> [[VSQADD1_I]]) #4
21879 // CHECK:   ret <4 x i32> [[VSQADD2_I]]
test_vsqaddq_u32(uint32x4_t a,int32x4_t b)21880 uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) {
21881   return vsqaddq_u32(a, b);
21882 }
21883 
21884 // CHECK-LABEL: define <2 x i64> @test_vsqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
21885 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
21886 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
21887 // CHECK:   [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
21888 // CHECK:   [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
21889 // CHECK:   [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> [[VSQADD_I]], <2 x i64> [[VSQADD1_I]]) #4
21890 // CHECK:   ret <2 x i64> [[VSQADD2_I]]
test_vsqaddq_u64(uint64x2_t a,int64x2_t b)21891 uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) {
21892   return vsqaddq_u64(a, b);
21893 }
21894 
21895 // CHECK-LABEL: define <1 x i64> @test_vabs_s64(<1 x i64> %a) #0 {
21896 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
21897 // CHECK:   [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
21898 // CHECK:   [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> [[VABS_I]]) #4
21899 // CHECK:   ret <1 x i64> [[VABS1_I]]
test_vabs_s64(int64x1_t a)21900 int64x1_t test_vabs_s64(int64x1_t a) {
21901   return vabs_s64(a);
21902 }
21903 
21904 // CHECK-LABEL: define <1 x i64> @test_vqabs_s64(<1 x i64> %a) #0 {
21905 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
21906 // CHECK:   [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
21907 // CHECK:   [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> [[VQABS_V_I]]) #4
21908 // CHECK:   [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8>
21909 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <1 x i64>
21910 // CHECK:   ret <1 x i64> [[TMP1]]
test_vqabs_s64(int64x1_t a)21911 int64x1_t test_vqabs_s64(int64x1_t a) {
21912   return vqabs_s64(a);
21913 }
21914 
21915 // CHECK-LABEL: define <1 x i64> @test_vqneg_s64(<1 x i64> %a) #0 {
21916 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
21917 // CHECK:   [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
21918 // CHECK:   [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> [[VQNEG_V_I]]) #4
21919 // CHECK:   [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8>
21920 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <1 x i64>
21921 // CHECK:   ret <1 x i64> [[TMP1]]
test_vqneg_s64(int64x1_t a)21922 int64x1_t test_vqneg_s64(int64x1_t a) {
21923   return vqneg_s64(a);
21924 }
21925 
21926 // CHECK-LABEL: define <1 x i64> @test_vneg_s64(<1 x i64> %a) #0 {
21927 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a
21928 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vneg_s64(int64x1_t a)21929 int64x1_t test_vneg_s64(int64x1_t a) {
21930   return vneg_s64(a);
21931 }
21932 
21933 // CHECK-LABEL: define float @test_vaddv_f32(<2 x float> %a) #0 {
21934 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
21935 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
21936 // CHECK:   [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> [[TMP1]]) #4
21937 // CHECK:   ret float [[VADDV_F32_I]]
test_vaddv_f32(float32x2_t a)21938 float32_t test_vaddv_f32(float32x2_t a) {
21939   return vaddv_f32(a);
21940 }
21941 
21942 // CHECK-LABEL: define float @test_vaddvq_f32(<4 x float> %a) #0 {
21943 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
21944 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
21945 // CHECK:   [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> [[TMP1]]) #4
21946 // CHECK:   ret float [[VADDVQ_F32_I]]
test_vaddvq_f32(float32x4_t a)21947 float32_t test_vaddvq_f32(float32x4_t a) {
21948   return vaddvq_f32(a);
21949 }
21950 
21951 // CHECK-LABEL: define double @test_vaddvq_f64(<2 x double> %a) #0 {
21952 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
21953 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
21954 // CHECK:   [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> [[TMP1]]) #4
21955 // CHECK:   ret double [[VADDVQ_F64_I]]
test_vaddvq_f64(float64x2_t a)21956 float64_t test_vaddvq_f64(float64x2_t a) {
21957   return vaddvq_f64(a);
21958 }
21959 
21960 // CHECK-LABEL: define float @test_vmaxv_f32(<2 x float> %a) #0 {
21961 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
21962 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
21963 // CHECK:   [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[TMP1]]) #4
21964 // CHECK:   ret float [[VMAXV_F32_I]]
test_vmaxv_f32(float32x2_t a)21965 float32_t test_vmaxv_f32(float32x2_t a) {
21966   return vmaxv_f32(a);
21967 }
21968 
21969 // CHECK-LABEL: define double @test_vmaxvq_f64(<2 x double> %a) #0 {
21970 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
21971 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
21972 // CHECK:   [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[TMP1]]) #4
21973 // CHECK:   ret double [[VMAXVQ_F64_I]]
test_vmaxvq_f64(float64x2_t a)21974 float64_t test_vmaxvq_f64(float64x2_t a) {
21975   return vmaxvq_f64(a);
21976 }
21977 
21978 // CHECK-LABEL: define float @test_vminv_f32(<2 x float> %a) #0 {
21979 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
21980 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
21981 // CHECK:   [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[TMP1]]) #4
21982 // CHECK:   ret float [[VMINV_F32_I]]
test_vminv_f32(float32x2_t a)21983 float32_t test_vminv_f32(float32x2_t a) {
21984   return vminv_f32(a);
21985 }
21986 
21987 // CHECK-LABEL: define double @test_vminvq_f64(<2 x double> %a) #0 {
21988 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
21989 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
21990 // CHECK:   [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[TMP1]]) #4
21991 // CHECK:   ret double [[VMINVQ_F64_I]]
test_vminvq_f64(float64x2_t a)21992 float64_t test_vminvq_f64(float64x2_t a) {
21993   return vminvq_f64(a);
21994 }
21995 
21996 // CHECK-LABEL: define double @test_vmaxnmvq_f64(<2 x double> %a) #0 {
21997 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
21998 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
21999 // CHECK:   [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[TMP1]]) #4
22000 // CHECK:   ret double [[VMAXNMVQ_F64_I]]
test_vmaxnmvq_f64(float64x2_t a)22001 float64_t test_vmaxnmvq_f64(float64x2_t a) {
22002   return vmaxnmvq_f64(a);
22003 }
22004 
22005 // CHECK-LABEL: define float @test_vmaxnmv_f32(<2 x float> %a) #0 {
22006 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
22007 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
22008 // CHECK:   [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[TMP1]]) #4
22009 // CHECK:   ret float [[VMAXNMV_F32_I]]
test_vmaxnmv_f32(float32x2_t a)22010 float32_t test_vmaxnmv_f32(float32x2_t a) {
22011   return vmaxnmv_f32(a);
22012 }
22013 
22014 // CHECK-LABEL: define double @test_vminnmvq_f64(<2 x double> %a) #0 {
22015 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
22016 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
22017 // CHECK:   [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[TMP1]]) #4
22018 // CHECK:   ret double [[VMINNMVQ_F64_I]]
test_vminnmvq_f64(float64x2_t a)22019 float64_t test_vminnmvq_f64(float64x2_t a) {
22020   return vminnmvq_f64(a);
22021 }
22022 
22023 // CHECK-LABEL: define float @test_vminnmv_f32(<2 x float> %a) #0 {
22024 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
22025 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
22026 // CHECK:   [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[TMP1]]) #4
22027 // CHECK:   ret float [[VMINNMV_F32_I]]
test_vminnmv_f32(float32x2_t a)22028 float32_t test_vminnmv_f32(float32x2_t a) {
22029   return vminnmv_f32(a);
22030 }
22031 
22032 // CHECK-LABEL: define <2 x i64> @test_vpaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
22033 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22034 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
22035 // CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22036 // CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
22037 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[VPADDQ_V_I]], <2 x i64> [[VPADDQ_V1_I]]) #4
22038 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
22039 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x i64>
22040 // CHECK:   ret <2 x i64> [[TMP2]]
test_vpaddq_s64(int64x2_t a,int64x2_t b)22041 int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) {
22042   return vpaddq_s64(a, b);
22043 }
22044 
22045 // CHECK-LABEL: define <2 x i64> @test_vpaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
22046 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22047 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
22048 // CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22049 // CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
22050 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[VPADDQ_V_I]], <2 x i64> [[VPADDQ_V1_I]]) #4
22051 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
22052 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x i64>
22053 // CHECK:   ret <2 x i64> [[TMP2]]
test_vpaddq_u64(uint64x2_t a,uint64x2_t b)22054 uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {
22055   return vpaddq_u64(a, b);
22056 }
22057 
22058 // CHECK-LABEL: define i64 @test_vpaddd_u64(<2 x i64> %a) #0 {
22059 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22060 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22061 // CHECK:   [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4
22062 // CHECK:   ret i64 [[VPADDD_U64_I]]
test_vpaddd_u64(uint64x2_t a)22063 uint64_t test_vpaddd_u64(uint64x2_t a) {
22064   return vpaddd_u64(a);
22065 }
22066 
22067 // CHECK-LABEL: define i64 @test_vaddvq_s64(<2 x i64> %a) #0 {
22068 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22069 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22070 // CHECK:   [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> [[TMP1]]) #4
22071 // CHECK:   ret i64 [[VADDVQ_S64_I]]
test_vaddvq_s64(int64x2_t a)22072 int64_t test_vaddvq_s64(int64x2_t a) {
22073   return vaddvq_s64(a);
22074 }
22075 
22076 // CHECK-LABEL: define i64 @test_vaddvq_u64(<2 x i64> %a) #0 {
22077 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22078 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22079 // CHECK:   [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4
22080 // CHECK:   ret i64 [[VADDVQ_U64_I]]
test_vaddvq_u64(uint64x2_t a)22081 uint64_t test_vaddvq_u64(uint64x2_t a) {
22082   return vaddvq_u64(a);
22083 }
22084 
22085 // CHECK-LABEL: define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) #0 {
22086 // CHECK:   [[ADD_I:%.*]] = fadd <1 x double> %a, %b
22087 // CHECK:   ret <1 x double> [[ADD_I]]
test_vadd_f64(float64x1_t a,float64x1_t b)22088 float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
22089   return vadd_f64(a, b);
22090 }
22091 
22092 // CHECK-LABEL: define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) #0 {
22093 // CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %a, %b
22094 // CHECK:   ret <1 x double> [[MUL_I]]
test_vmul_f64(float64x1_t a,float64x1_t b)22095 float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
22096   return vmul_f64(a, b);
22097 }
22098 
22099 // CHECK-LABEL: define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) #0 {
22100 // CHECK:   [[DIV_I:%.*]] = fdiv <1 x double> %a, %b
22101 // CHECK:   ret <1 x double> [[DIV_I]]
test_vdiv_f64(float64x1_t a,float64x1_t b)22102 float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
22103   return vdiv_f64(a, b);
22104 }
22105 
22106 // CHECK-LABEL: define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 {
22107 // CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
22108 // CHECK:   [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]]
22109 // CHECK:   ret <1 x double> [[ADD_I]]
test_vmla_f64(float64x1_t a,float64x1_t b,float64x1_t c)22110 float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
22111   return vmla_f64(a, b, c);
22112 }
22113 
22114 // CHECK-LABEL: define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 {
22115 // CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
22116 // CHECK:   [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]]
22117 // CHECK:   ret <1 x double> [[SUB_I]]
test_vmls_f64(float64x1_t a,float64x1_t b,float64x1_t c)22118 float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
22119   return vmls_f64(a, b, c);
22120 }
22121 
22122 // CHECK-LABEL: define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 {
22123 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22124 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22125 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
22126 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22127 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22128 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
22129 // CHECK:   [[TMP6:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP4]], <1 x double> [[TMP5]], <1 x double> [[TMP3]]) #4
22130 // CHECK:   ret <1 x double> [[TMP6]]
test_vfma_f64(float64x1_t a,float64x1_t b,float64x1_t c)22131 float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
22132   return vfma_f64(a, b, c);
22133 }
22134 
22135 // CHECK-LABEL: define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 {
22136 // CHECK:   [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b
22137 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22138 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
22139 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
22140 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22141 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22142 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
22143 // CHECK:   [[TMP6:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP4]], <1 x double> [[TMP5]], <1 x double> [[TMP3]]) #4
22144 // CHECK:   ret <1 x double> [[TMP6]]
test_vfms_f64(float64x1_t a,float64x1_t b,float64x1_t c)22145 float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
22146   return vfms_f64(a, b, c);
22147 }
22148 
22149 // CHECK-LABEL: define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) #0 {
22150 // CHECK:   [[SUB_I:%.*]] = fsub <1 x double> %a, %b
22151 // CHECK:   ret <1 x double> [[SUB_I]]
test_vsub_f64(float64x1_t a,float64x1_t b)22152 float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
22153   return vsub_f64(a, b);
22154 }
22155 
22156 // CHECK-LABEL: define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) #0 {
22157 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22158 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22159 // CHECK:   [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22160 // CHECK:   [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22161 // CHECK:   [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> [[VABD_I]], <1 x double> [[VABD1_I]]) #4
22162 // CHECK:   ret <1 x double> [[VABD2_I]]
test_vabd_f64(float64x1_t a,float64x1_t b)22163 float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) {
22164   return vabd_f64(a, b);
22165 }
22166 
22167 // CHECK-LABEL: define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) #0 {
22168 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22169 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22170 // CHECK:   [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22171 // CHECK:   [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22172 // CHECK:   [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> [[VMAX_I]], <1 x double> [[VMAX1_I]]) #4
22173 // CHECK:   ret <1 x double> [[VMAX2_I]]
test_vmax_f64(float64x1_t a,float64x1_t b)22174 float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
22175   return vmax_f64(a, b);
22176 }
22177 
22178 // CHECK-LABEL: define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) #0 {
22179 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22180 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22181 // CHECK:   [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22182 // CHECK:   [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22183 // CHECK:   [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> [[VMIN_I]], <1 x double> [[VMIN1_I]]) #4
22184 // CHECK:   ret <1 x double> [[VMIN2_I]]
test_vmin_f64(float64x1_t a,float64x1_t b)22185 float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) {
22186   return vmin_f64(a, b);
22187 }
22188 
22189 // CHECK-LABEL: define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) #0 {
22190 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22191 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22192 // CHECK:   [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22193 // CHECK:   [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22194 // CHECK:   [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> [[VMAXNM_I]], <1 x double> [[VMAXNM1_I]]) #4
22195 // CHECK:   ret <1 x double> [[VMAXNM2_I]]
test_vmaxnm_f64(float64x1_t a,float64x1_t b)22196 float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) {
22197   return vmaxnm_f64(a, b);
22198 }
22199 
22200 // CHECK-LABEL: define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) #0 {
22201 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22202 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22203 // CHECK:   [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22204 // CHECK:   [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22205 // CHECK:   [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> [[VMINNM_I]], <1 x double> [[VMINNM1_I]]) #4
22206 // CHECK:   ret <1 x double> [[VMINNM2_I]]
test_vminnm_f64(float64x1_t a,float64x1_t b)22207 float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) {
22208   return vminnm_f64(a, b);
22209 }
22210 
22211 // CHECK-LABEL: define <1 x double> @test_vabs_f64(<1 x double> %a) #0 {
22212 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22213 // CHECK:   [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22214 // CHECK:   [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> [[VABS_I]]) #4
22215 // CHECK:   ret <1 x double> [[VABS1_I]]
test_vabs_f64(float64x1_t a)22216 float64x1_t test_vabs_f64(float64x1_t a) {
22217   return vabs_f64(a);
22218 }
22219 
22220 // CHECK-LABEL: define <1 x double> @test_vneg_f64(<1 x double> %a) #0 {
22221 // CHECK:   [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %a
22222 // CHECK:   ret <1 x double> [[SUB_I]]
test_vneg_f64(float64x1_t a)22223 float64x1_t test_vneg_f64(float64x1_t a) {
22224   return vneg_f64(a);
22225 }
22226 
22227 // CHECK-LABEL: define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) #0 {
22228 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22229 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22230 // CHECK:   [[TMP2:%.*]] = fptosi <1 x double> [[TMP1]] to <1 x i64>
22231 // CHECK:   ret <1 x i64> [[TMP2]]
test_vcvt_s64_f64(float64x1_t a)22232 int64x1_t test_vcvt_s64_f64(float64x1_t a) {
22233   return vcvt_s64_f64(a);
22234 }
22235 
22236 // CHECK-LABEL: define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) #0 {
22237 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22238 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22239 // CHECK:   [[TMP2:%.*]] = fptoui <1 x double> [[TMP1]] to <1 x i64>
22240 // CHECK:   ret <1 x i64> [[TMP2]]
test_vcvt_u64_f64(float64x1_t a)22241 uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
22242   return vcvt_u64_f64(a);
22243 }
22244 
22245 // CHECK-LABEL: define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) #0 {
22246 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22247 // CHECK:   [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22248 // CHECK:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> [[VCVTN_I]]) #4
22249 // CHECK:   ret <1 x i64> [[VCVTN1_I]]
test_vcvtn_s64_f64(float64x1_t a)22250 int64x1_t test_vcvtn_s64_f64(float64x1_t a) {
22251   return vcvtn_s64_f64(a);
22252 }
22253 
22254 // CHECK-LABEL: define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) #0 {
22255 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22256 // CHECK:   [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22257 // CHECK:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> [[VCVTN_I]]) #4
22258 // CHECK:   ret <1 x i64> [[VCVTN1_I]]
test_vcvtn_u64_f64(float64x1_t a)22259 uint64x1_t test_vcvtn_u64_f64(float64x1_t a) {
22260   return vcvtn_u64_f64(a);
22261 }
22262 
22263 // CHECK-LABEL: define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) #0 {
22264 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22265 // CHECK:   [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22266 // CHECK:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> [[VCVTP_I]]) #4
22267 // CHECK:   ret <1 x i64> [[VCVTP1_I]]
test_vcvtp_s64_f64(float64x1_t a)22268 int64x1_t test_vcvtp_s64_f64(float64x1_t a) {
22269   return vcvtp_s64_f64(a);
22270 }
22271 
22272 // CHECK-LABEL: define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) #0 {
22273 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22274 // CHECK:   [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22275 // CHECK:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> [[VCVTP_I]]) #4
22276 // CHECK:   ret <1 x i64> [[VCVTP1_I]]
test_vcvtp_u64_f64(float64x1_t a)22277 uint64x1_t test_vcvtp_u64_f64(float64x1_t a) {
22278   return vcvtp_u64_f64(a);
22279 }
22280 
22281 // CHECK-LABEL: define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) #0 {
22282 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22283 // CHECK:   [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22284 // CHECK:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> [[VCVTM_I]]) #4
22285 // CHECK:   ret <1 x i64> [[VCVTM1_I]]
test_vcvtm_s64_f64(float64x1_t a)22286 int64x1_t test_vcvtm_s64_f64(float64x1_t a) {
22287   return vcvtm_s64_f64(a);
22288 }
22289 
22290 // CHECK-LABEL: define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) #0 {
22291 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22292 // CHECK:   [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22293 // CHECK:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> [[VCVTM_I]]) #4
22294 // CHECK:   ret <1 x i64> [[VCVTM1_I]]
test_vcvtm_u64_f64(float64x1_t a)22295 uint64x1_t test_vcvtm_u64_f64(float64x1_t a) {
22296   return vcvtm_u64_f64(a);
22297 }
22298 
22299 // CHECK-LABEL: define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) #0 {
22300 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22301 // CHECK:   [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22302 // CHECK:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> [[VCVTA_I]]) #4
22303 // CHECK:   ret <1 x i64> [[VCVTA1_I]]
test_vcvta_s64_f64(float64x1_t a)22304 int64x1_t test_vcvta_s64_f64(float64x1_t a) {
22305   return vcvta_s64_f64(a);
22306 }
22307 
22308 // CHECK-LABEL: define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) #0 {
22309 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22310 // CHECK:   [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22311 // CHECK:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> [[VCVTA_I]]) #4
22312 // CHECK:   ret <1 x i64> [[VCVTA1_I]]
test_vcvta_u64_f64(float64x1_t a)22313 uint64x1_t test_vcvta_u64_f64(float64x1_t a) {
22314   return vcvta_u64_f64(a);
22315 }
22316 
22317 // CHECK-LABEL: define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) #0 {
22318 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
22319 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
22320 // CHECK:   [[VCVT_I:%.*]] = sitofp <1 x i64> [[TMP1]] to <1 x double>
22321 // CHECK:   ret <1 x double> [[VCVT_I]]
test_vcvt_f64_s64(int64x1_t a)22322 float64x1_t test_vcvt_f64_s64(int64x1_t a) {
22323   return vcvt_f64_s64(a);
22324 }
22325 
22326 // CHECK-LABEL: define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) #0 {
22327 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
22328 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
22329 // CHECK:   [[VCVT_I:%.*]] = uitofp <1 x i64> [[TMP1]] to <1 x double>
22330 // CHECK:   ret <1 x double> [[VCVT_I]]
test_vcvt_f64_u64(uint64x1_t a)22331 float64x1_t test_vcvt_f64_u64(uint64x1_t a) {
22332   return vcvt_f64_u64(a);
22333 }
22334 
22335 // CHECK-LABEL: define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) #0 {
22336 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22337 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22338 // CHECK:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
22339 // CHECK:   ret <1 x i64> [[VCVT_N1]]
test_vcvt_n_s64_f64(float64x1_t a)22340 int64x1_t test_vcvt_n_s64_f64(float64x1_t a) {
22341   return vcvt_n_s64_f64(a, 64);
22342 }
22343 
22344 // CHECK-LABEL: define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) #0 {
22345 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22346 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22347 // CHECK:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
22348 // CHECK:   ret <1 x i64> [[VCVT_N1]]
test_vcvt_n_u64_f64(float64x1_t a)22349 uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) {
22350   return vcvt_n_u64_f64(a, 64);
22351 }
22352 
22353 // CHECK-LABEL: define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) #0 {
22354 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
22355 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
22356 // CHECK:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
22357 // CHECK:   ret <1 x double> [[VCVT_N1]]
test_vcvt_n_f64_s64(int64x1_t a)22358 float64x1_t test_vcvt_n_f64_s64(int64x1_t a) {
22359   return vcvt_n_f64_s64(a, 64);
22360 }
22361 
22362 // CHECK-LABEL: define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) #0 {
22363 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
22364 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
22365 // CHECK:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
22366 // CHECK:   ret <1 x double> [[VCVT_N1]]
test_vcvt_n_f64_u64(uint64x1_t a)22367 float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
22368   return vcvt_n_f64_u64(a, 64);
22369 }
22370 
22371 // CHECK-LABEL: define <1 x double> @test_vrndn_f64(<1 x double> %a) #0 {
22372 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22373 // CHECK:   [[VRNDN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22374 // CHECK:   [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> [[VRNDN_I]]) #4
22375 // CHECK:   ret <1 x double> [[VRNDN1_I]]
test_vrndn_f64(float64x1_t a)22376 float64x1_t test_vrndn_f64(float64x1_t a) {
22377   return vrndn_f64(a);
22378 }
22379 
22380 // CHECK-LABEL: define <1 x double> @test_vrnda_f64(<1 x double> %a) #0 {
22381 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22382 // CHECK:   [[VRNDA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22383 // CHECK:   [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> [[VRNDA_I]]) #4
22384 // CHECK:   ret <1 x double> [[VRNDA1_I]]
test_vrnda_f64(float64x1_t a)22385 float64x1_t test_vrnda_f64(float64x1_t a) {
22386   return vrnda_f64(a);
22387 }
22388 
22389 // CHECK-LABEL: define <1 x double> @test_vrndp_f64(<1 x double> %a) #0 {
22390 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22391 // CHECK:   [[VRNDP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22392 // CHECK:   [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> [[VRNDP_I]]) #4
22393 // CHECK:   ret <1 x double> [[VRNDP1_I]]
test_vrndp_f64(float64x1_t a)22394 float64x1_t test_vrndp_f64(float64x1_t a) {
22395   return vrndp_f64(a);
22396 }
22397 
22398 // CHECK-LABEL: define <1 x double> @test_vrndm_f64(<1 x double> %a) #0 {
22399 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22400 // CHECK:   [[VRNDM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22401 // CHECK:   [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> [[VRNDM_I]]) #4
22402 // CHECK:   ret <1 x double> [[VRNDM1_I]]
test_vrndm_f64(float64x1_t a)22403 float64x1_t test_vrndm_f64(float64x1_t a) {
22404   return vrndm_f64(a);
22405 }
22406 
22407 // CHECK-LABEL: define <1 x double> @test_vrndx_f64(<1 x double> %a) #0 {
22408 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22409 // CHECK:   [[VRNDX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22410 // CHECK:   [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> [[VRNDX_I]]) #4
22411 // CHECK:   ret <1 x double> [[VRNDX1_I]]
test_vrndx_f64(float64x1_t a)22412 float64x1_t test_vrndx_f64(float64x1_t a) {
22413   return vrndx_f64(a);
22414 }
22415 
22416 // CHECK-LABEL: define <1 x double> @test_vrnd_f64(<1 x double> %a) #0 {
22417 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22418 // CHECK:   [[VRNDZ_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22419 // CHECK:   [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> [[VRNDZ_I]]) #4
22420 // CHECK:   ret <1 x double> [[VRNDZ1_I]]
test_vrnd_f64(float64x1_t a)22421 float64x1_t test_vrnd_f64(float64x1_t a) {
22422   return vrnd_f64(a);
22423 }
22424 
22425 // CHECK-LABEL: define <1 x double> @test_vrndi_f64(<1 x double> %a) #0 {
22426 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22427 // CHECK:   [[VRNDI_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22428 // CHECK:   [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> [[VRNDI_I]]) #4
22429 // CHECK:   ret <1 x double> [[VRNDI1_I]]
test_vrndi_f64(float64x1_t a)22430 float64x1_t test_vrndi_f64(float64x1_t a) {
22431   return vrndi_f64(a);
22432 }
22433 
22434 // CHECK-LABEL: define <1 x double> @test_vrsqrte_f64(<1 x double> %a) #0 {
22435 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22436 // CHECK:   [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22437 // CHECK:   [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> [[VRSQRTE_V_I]]) #4
22438 // CHECK:   ret <1 x double> [[VRSQRTE_V1_I]]
test_vrsqrte_f64(float64x1_t a)22439 float64x1_t test_vrsqrte_f64(float64x1_t a) {
22440   return vrsqrte_f64(a);
22441 }
22442 
22443 // CHECK-LABEL: define <1 x double> @test_vrecpe_f64(<1 x double> %a) #0 {
22444 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22445 // CHECK:   [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22446 // CHECK:   [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> [[VRECPE_V_I]]) #4
22447 // CHECK:   ret <1 x double> [[VRECPE_V1_I]]
test_vrecpe_f64(float64x1_t a)22448 float64x1_t test_vrecpe_f64(float64x1_t a) {
22449   return vrecpe_f64(a);
22450 }
22451 
22452 // CHECK-LABEL: define <1 x double> @test_vsqrt_f64(<1 x double> %a) #0 {
22453 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22454 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22455 // CHECK:   [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> [[TMP1]]) #4
22456 // CHECK:   ret <1 x double> [[VSQRT_I]]
test_vsqrt_f64(float64x1_t a)22457 float64x1_t test_vsqrt_f64(float64x1_t a) {
22458   return vsqrt_f64(a);
22459 }
22460 
22461 // CHECK-LABEL: define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) #0 {
22462 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22463 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22464 // CHECK:   [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22465 // CHECK:   [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22466 // CHECK:   [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> [[VRECPS_V_I]], <1 x double> [[VRECPS_V1_I]]) #4
22467 // CHECK:   [[VRECPS_V3_I:%.*]] = bitcast <1 x double> [[VRECPS_V2_I]] to <8 x i8>
22468 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <1 x double>
22469 // CHECK:   ret <1 x double> [[TMP2]]
test_vrecps_f64(float64x1_t a,float64x1_t b)22470 float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) {
22471   return vrecps_f64(a, b);
22472 }
22473 
22474 // CHECK-LABEL: define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) #0 {
22475 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22476 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22477 // CHECK:   [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22478 // CHECK:   [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22479 // CHECK:   [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> [[VRSQRTS_V_I]], <1 x double> [[VRSQRTS_V1_I]]) #4
22480 // CHECK:   [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8>
22481 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <1 x double>
22482 // CHECK:   ret <1 x double> [[TMP2]]
test_vrsqrts_f64(float64x1_t a,float64x1_t b)22483 float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
22484   return vrsqrts_f64(a, b);
22485 }
22486 
22487 // CHECK-LABEL: define i32 @test_vminv_s32(<2 x i32> %a) #0 {
22488 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22489 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22490 // CHECK:   [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22491 // CHECK:   ret i32 [[VMINV_S32_I]]
test_vminv_s32(int32x2_t a)22492 int32_t test_vminv_s32(int32x2_t a) {
22493   return vminv_s32(a);
22494 }
22495 
22496 // CHECK-LABEL: define i32 @test_vminv_u32(<2 x i32> %a) #0 {
22497 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22498 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22499 // CHECK:   [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22500 // CHECK:   ret i32 [[VMINV_U32_I]]
test_vminv_u32(uint32x2_t a)22501 uint32_t test_vminv_u32(uint32x2_t a) {
22502   return vminv_u32(a);
22503 }
22504 
22505 // CHECK-LABEL: define i32 @test_vmaxv_s32(<2 x i32> %a) #0 {
22506 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22507 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22508 // CHECK:   [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22509 // CHECK:   ret i32 [[VMAXV_S32_I]]
test_vmaxv_s32(int32x2_t a)22510 int32_t test_vmaxv_s32(int32x2_t a) {
22511   return vmaxv_s32(a);
22512 }
22513 
22514 // CHECK-LABEL: define i32 @test_vmaxv_u32(<2 x i32> %a) #0 {
22515 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22516 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22517 // CHECK:   [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22518 // CHECK:   ret i32 [[VMAXV_U32_I]]
test_vmaxv_u32(uint32x2_t a)22519 uint32_t test_vmaxv_u32(uint32x2_t a) {
22520   return vmaxv_u32(a);
22521 }
22522 
22523 // CHECK-LABEL: define i32 @test_vaddv_s32(<2 x i32> %a) #0 {
22524 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22525 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22526 // CHECK:   [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22527 // CHECK:   ret i32 [[VADDV_S32_I]]
test_vaddv_s32(int32x2_t a)22528 int32_t test_vaddv_s32(int32x2_t a) {
22529   return vaddv_s32(a);
22530 }
22531 
22532 // CHECK-LABEL: define i32 @test_vaddv_u32(<2 x i32> %a) #0 {
22533 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22534 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22535 // CHECK:   [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22536 // CHECK:   ret i32 [[VADDV_U32_I]]
test_vaddv_u32(uint32x2_t a)22537 uint32_t test_vaddv_u32(uint32x2_t a) {
22538   return vaddv_u32(a);
22539 }
22540 
22541 // CHECK-LABEL: define i64 @test_vaddlv_s32(<2 x i32> %a) #0 {
22542 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22543 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22544 // CHECK:   [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> [[TMP1]]) #4
22545 // CHECK:   ret i64 [[VADDLV_S32_I]]
test_vaddlv_s32(int32x2_t a)22546 int64_t test_vaddlv_s32(int32x2_t a) {
22547   return vaddlv_s32(a);
22548 }
22549 
22550 // CHECK-LABEL: define i64 @test_vaddlv_u32(<2 x i32> %a) #0 {
22551 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22552 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22553 // CHECK:   [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> [[TMP1]]) #4
22554 // CHECK:   ret i64 [[VADDLV_U32_I]]
test_vaddlv_u32(uint32x2_t a)22555 uint64_t test_vaddlv_u32(uint32x2_t a) {
22556   return vaddlv_u32(a);
22557 }
22558