1 // RUN: %clang_cc1 -triple arm64-apple-darwin -target-feature +neon \
2 // RUN: -fallow-half-arguments-and-returns -emit-llvm -o - %s \
3 // RUN: | opt -S -mem2reg | FileCheck %s
4
5 #include <arm_neon.h>
6
7 // CHECK-LABEL: define i8 @test_vget_lane_u8(<8 x i8> %a) #0 {
8 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
9 // CHECK: ret i8 [[VGET_LANE]]
test_vget_lane_u8(uint8x8_t a)10 uint8_t test_vget_lane_u8(uint8x8_t a) {
11 return vget_lane_u8(a, 7);
12 }
13
14 // CHECK-LABEL: define i16 @test_vget_lane_u16(<4 x i16> %a) #0 {
15 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
17 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
18 // CHECK: ret i16 [[VGET_LANE]]
test_vget_lane_u16(uint16x4_t a)19 uint16_t test_vget_lane_u16(uint16x4_t a) {
20 return vget_lane_u16(a, 3);
21 }
22
23 // CHECK-LABEL: define i32 @test_vget_lane_u32(<2 x i32> %a) #0 {
24 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
25 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
26 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
27 // CHECK: ret i32 [[VGET_LANE]]
test_vget_lane_u32(uint32x2_t a)28 uint32_t test_vget_lane_u32(uint32x2_t a) {
29 return vget_lane_u32(a, 1);
30 }
31
32 // CHECK-LABEL: define i8 @test_vget_lane_s8(<8 x i8> %a) #0 {
33 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
34 // CHECK: ret i8 [[VGET_LANE]]
test_vget_lane_s8(int8x8_t a)35 int8_t test_vget_lane_s8(int8x8_t a) {
36 return vget_lane_s8(a, 7);
37 }
38
39 // CHECK-LABEL: define i16 @test_vget_lane_s16(<4 x i16> %a) #0 {
40 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
41 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
42 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
43 // CHECK: ret i16 [[VGET_LANE]]
test_vget_lane_s16(int16x4_t a)44 int16_t test_vget_lane_s16(int16x4_t a) {
45 return vget_lane_s16(a, 3);
46 }
47
48 // CHECK-LABEL: define i32 @test_vget_lane_s32(<2 x i32> %a) #0 {
49 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
50 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
51 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
52 // CHECK: ret i32 [[VGET_LANE]]
test_vget_lane_s32(int32x2_t a)53 int32_t test_vget_lane_s32(int32x2_t a) {
54 return vget_lane_s32(a, 1);
55 }
56
57 // CHECK-LABEL: define i8 @test_vget_lane_p8(<8 x i8> %a) #0 {
58 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
59 // CHECK: ret i8 [[VGET_LANE]]
test_vget_lane_p8(poly8x8_t a)60 poly8_t test_vget_lane_p8(poly8x8_t a) {
61 return vget_lane_p8(a, 7);
62 }
63
64 // CHECK-LABEL: define i16 @test_vget_lane_p16(<4 x i16> %a) #0 {
65 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
66 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
67 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
68 // CHECK: ret i16 [[VGET_LANE]]
test_vget_lane_p16(poly16x4_t a)69 poly16_t test_vget_lane_p16(poly16x4_t a) {
70 return vget_lane_p16(a, 3);
71 }
72
73 // CHECK-LABEL: define float @test_vget_lane_f32(<2 x float> %a) #0 {
74 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
75 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
76 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
77 // CHECK: ret float [[VGET_LANE]]
test_vget_lane_f32(float32x2_t a)78 float32_t test_vget_lane_f32(float32x2_t a) {
79 return vget_lane_f32(a, 1);
80 }
81
82 // CHECK-LABEL: define float @test_vget_lane_f16(<4 x half> %a) #0 {
83 // CHECK: [[__REINT_242:%.*]] = alloca <4 x half>, align 8
84 // CHECK: [[__REINT1_242:%.*]] = alloca i16, align 2
85 // CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
86 // CHECK: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>*
87 // CHECK: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
88 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
89 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
90 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1
91 // CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
92 // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
93 // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
94 // CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
95 // CHECK: ret float [[CONV]]
test_vget_lane_f16(float16x4_t a)96 float32_t test_vget_lane_f16(float16x4_t a) {
97 return vget_lane_f16(a, 1);
98 }
99
100 // CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #0 {
101 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
102 // CHECK: ret i8 [[VGETQ_LANE]]
test_vgetq_lane_u8(uint8x16_t a)103 uint8_t test_vgetq_lane_u8(uint8x16_t a) {
104 return vgetq_lane_u8(a, 15);
105 }
106
107 // CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #0 {
108 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
109 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
110 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
111 // CHECK: ret i16 [[VGETQ_LANE]]
test_vgetq_lane_u16(uint16x8_t a)112 uint16_t test_vgetq_lane_u16(uint16x8_t a) {
113 return vgetq_lane_u16(a, 7);
114 }
115
116 // CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #0 {
117 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
118 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
119 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
120 // CHECK: ret i32 [[VGETQ_LANE]]
test_vgetq_lane_u32(uint32x4_t a)121 uint32_t test_vgetq_lane_u32(uint32x4_t a) {
122 return vgetq_lane_u32(a, 3);
123 }
124
125 // CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #0 {
126 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
127 // CHECK: ret i8 [[VGETQ_LANE]]
test_vgetq_lane_s8(int8x16_t a)128 int8_t test_vgetq_lane_s8(int8x16_t a) {
129 return vgetq_lane_s8(a, 15);
130 }
131
132 // CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #0 {
133 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
134 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
135 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
136 // CHECK: ret i16 [[VGETQ_LANE]]
test_vgetq_lane_s16(int16x8_t a)137 int16_t test_vgetq_lane_s16(int16x8_t a) {
138 return vgetq_lane_s16(a, 7);
139 }
140
141 // CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #0 {
142 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
143 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
144 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
145 // CHECK: ret i32 [[VGETQ_LANE]]
test_vgetq_lane_s32(int32x4_t a)146 int32_t test_vgetq_lane_s32(int32x4_t a) {
147 return vgetq_lane_s32(a, 3);
148 }
149
150 // CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #0 {
151 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
152 // CHECK: ret i8 [[VGETQ_LANE]]
test_vgetq_lane_p8(poly8x16_t a)153 poly8_t test_vgetq_lane_p8(poly8x16_t a) {
154 return vgetq_lane_p8(a, 15);
155 }
156
157 // CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #0 {
158 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
159 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
160 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
161 // CHECK: ret i16 [[VGETQ_LANE]]
test_vgetq_lane_p16(poly16x8_t a)162 poly16_t test_vgetq_lane_p16(poly16x8_t a) {
163 return vgetq_lane_p16(a, 7);
164 }
165
166 // CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #0 {
167 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
168 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
169 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
170 // CHECK: ret float [[VGETQ_LANE]]
test_vgetq_lane_f32(float32x4_t a)171 float32_t test_vgetq_lane_f32(float32x4_t a) {
172 return vgetq_lane_f32(a, 3);
173 }
174
175 // CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #0 {
176 // CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16
177 // CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2
178 // CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
179 // CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
180 // CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
181 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
182 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
183 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
184 // CHECK: store i16 [[VGETQ_LANE]], i16* [[__REINT1_244]], align 2
185 // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
186 // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
187 // CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
188 // CHECK: ret float [[CONV]]
test_vgetq_lane_f16(float16x8_t a)189 float32_t test_vgetq_lane_f16(float16x8_t a) {
190 return vgetq_lane_f16(a, 3);
191 }
192
193 // CHECK-LABEL: define i64 @test_vget_lane_s64(<1 x i64> %a) #0 {
194 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
195 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
196 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
197 // CHECK: ret i64 [[VGET_LANE]]
test_vget_lane_s64(int64x1_t a)198 int64_t test_vget_lane_s64(int64x1_t a) {
199 return vget_lane_s64(a, 0);
200 }
201
202 // CHECK-LABEL: define i64 @test_vget_lane_u64(<1 x i64> %a) #0 {
203 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
204 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
205 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
206 // CHECK: ret i64 [[VGET_LANE]]
test_vget_lane_u64(uint64x1_t a)207 uint64_t test_vget_lane_u64(uint64x1_t a) {
208 return vget_lane_u64(a, 0);
209 }
210
211 // CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #0 {
212 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
213 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
214 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
215 // CHECK: ret i64 [[VGETQ_LANE]]
test_vgetq_lane_s64(int64x2_t a)216 int64_t test_vgetq_lane_s64(int64x2_t a) {
217 return vgetq_lane_s64(a, 1);
218 }
219
220 // CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #0 {
221 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
222 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
223 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
224 // CHECK: ret i64 [[VGETQ_LANE]]
test_vgetq_lane_u64(uint64x2_t a)225 uint64_t test_vgetq_lane_u64(uint64x2_t a) {
226 return vgetq_lane_u64(a, 1);
227 }
228
229
230 // CHECK-LABEL: define <8 x i8> @test_vset_lane_u8(i8 %a, <8 x i8> %b) #0 {
231 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
232 // CHECK: ret <8 x i8> [[VSET_LANE]]
test_vset_lane_u8(uint8_t a,uint8x8_t b)233 uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
234 return vset_lane_u8(a, b, 7);
235 }
236
237 // CHECK-LABEL: define <4 x i16> @test_vset_lane_u16(i16 %a, <4 x i16> %b) #0 {
238 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
239 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
240 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
241 // CHECK: ret <4 x i16> [[VSET_LANE]]
test_vset_lane_u16(uint16_t a,uint16x4_t b)242 uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
243 return vset_lane_u16(a, b, 3);
244 }
245
246 // CHECK-LABEL: define <2 x i32> @test_vset_lane_u32(i32 %a, <2 x i32> %b) #0 {
247 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
248 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
249 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
250 // CHECK: ret <2 x i32> [[VSET_LANE]]
test_vset_lane_u32(uint32_t a,uint32x2_t b)251 uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
252 return vset_lane_u32(a, b, 1);
253 }
254
255 // CHECK-LABEL: define <8 x i8> @test_vset_lane_s8(i8 %a, <8 x i8> %b) #0 {
256 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
257 // CHECK: ret <8 x i8> [[VSET_LANE]]
test_vset_lane_s8(int8_t a,int8x8_t b)258 int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
259 return vset_lane_s8(a, b, 7);
260 }
261
262 // CHECK-LABEL: define <4 x i16> @test_vset_lane_s16(i16 %a, <4 x i16> %b) #0 {
263 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
264 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
265 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
266 // CHECK: ret <4 x i16> [[VSET_LANE]]
test_vset_lane_s16(int16_t a,int16x4_t b)267 int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
268 return vset_lane_s16(a, b, 3);
269 }
270
271 // CHECK-LABEL: define <2 x i32> @test_vset_lane_s32(i32 %a, <2 x i32> %b) #0 {
272 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
273 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
274 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
275 // CHECK: ret <2 x i32> [[VSET_LANE]]
test_vset_lane_s32(int32_t a,int32x2_t b)276 int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
277 return vset_lane_s32(a, b, 1);
278 }
279
280 // CHECK-LABEL: define <8 x i8> @test_vset_lane_p8(i8 %a, <8 x i8> %b) #0 {
281 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
282 // CHECK: ret <8 x i8> [[VSET_LANE]]
test_vset_lane_p8(poly8_t a,poly8x8_t b)283 poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
284 return vset_lane_p8(a, b, 7);
285 }
286
287 // CHECK-LABEL: define <4 x i16> @test_vset_lane_p16(i16 %a, <4 x i16> %b) #0 {
288 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
289 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
290 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
291 // CHECK: ret <4 x i16> [[VSET_LANE]]
test_vset_lane_p16(poly16_t a,poly16x4_t b)292 poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
293 return vset_lane_p16(a, b, 3);
294 }
295
296 // CHECK-LABEL: define <2 x float> @test_vset_lane_f32(float %a, <2 x float> %b) #0 {
297 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8>
298 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
299 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float %a, i32 1
300 // CHECK: ret <2 x float> [[VSET_LANE]]
test_vset_lane_f32(float32_t a,float32x2_t b)301 float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
302 return vset_lane_f32(a, b, 1);
303 }
304
305 // CHECK-LABEL: define <4 x half> @test_vset_lane_f16(half* %a, <4 x half> %b) #0 {
306 // CHECK: [[__REINT_246:%.*]] = alloca half, align 2
307 // CHECK: [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
308 // CHECK: [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
309 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
310 // CHECK: store half [[TMP0]], half* [[__REINT_246]], align 2
311 // CHECK: store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8
312 // CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16*
313 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
314 // CHECK: [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>*
315 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
316 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
317 // CHECK: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
318 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 3
319 // CHECK: store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8
320 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>*
321 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
322 // CHECK: ret <4 x half> [[TMP8]]
test_vset_lane_f16(float16_t * a,float16x4_t b)323 float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
324 return vset_lane_f16(*a, b, 3);
325 }
326
327 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 %a, <16 x i8> %b) #0 {
328 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
329 // CHECK: ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_u8(uint8_t a,uint8x16_t b)330 uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
331 return vsetq_lane_u8(a, b, 15);
332 }
333
334 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #0 {
335 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
336 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
337 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
338 // CHECK: ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_u16(uint16_t a,uint16x8_t b)339 uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
340 return vsetq_lane_u16(a, b, 7);
341 }
342
343 // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #0 {
344 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
345 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
346 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
347 // CHECK: ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_u32(uint32_t a,uint32x4_t b)348 uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
349 return vsetq_lane_u32(a, b, 3);
350 }
351
352 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 %a, <16 x i8> %b) #0 {
353 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
354 // CHECK: ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_s8(int8_t a,int8x16_t b)355 int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
356 return vsetq_lane_s8(a, b, 15);
357 }
358
359 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #0 {
360 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
361 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
362 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
363 // CHECK: ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_s16(int16_t a,int16x8_t b)364 int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
365 return vsetq_lane_s16(a, b, 7);
366 }
367
368 // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #0 {
369 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
370 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
371 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
372 // CHECK: ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_s32(int32_t a,int32x4_t b)373 int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
374 return vsetq_lane_s32(a, b, 3);
375 }
376
377 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 %a, <16 x i8> %b) #0 {
378 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
379 // CHECK: ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_p8(poly8_t a,poly8x16_t b)380 poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
381 return vsetq_lane_p8(a, b, 15);
382 }
383
384 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #0 {
385 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
386 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
387 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
388 // CHECK: ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_p16(poly16_t a,poly16x8_t b)389 poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
390 return vsetq_lane_p16(a, b, 7);
391 }
392
393 // CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #0 {
394 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
395 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
396 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3
397 // CHECK: ret <4 x float> [[VSET_LANE]]
test_vsetq_lane_f32(float32_t a,float32x4_t b)398 float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
399 return vsetq_lane_f32(a, b, 3);
400 }
401
402 // CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #0 {
403 // CHECK: [[__REINT_248:%.*]] = alloca half, align 2
404 // CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
405 // CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
406 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
407 // CHECK: store half [[TMP0]], half* [[__REINT_248]], align 2
408 // CHECK: store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16
409 // CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16*
410 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
411 // CHECK: [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>*
412 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
413 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
414 // CHECK: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
415 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 7
416 // CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
417 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>*
418 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
419 // CHECK: ret <8 x half> [[TMP8]]
test_vsetq_lane_f16(float16_t * a,float16x8_t b)420 float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
421 return vsetq_lane_f16(*a, b, 7);
422 }
423
424 // CHECK-LABEL: define <1 x i64> @test_vset_lane_s64(i64 %a, <1 x i64> %b) #0 {
425 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
426 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
427 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
428 // CHECK: ret <1 x i64> [[VSET_LANE]]
test_vset_lane_s64(int64_t a,int64x1_t b)429 int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
430 return vset_lane_s64(a, b, 0);
431 }
432
433 // CHECK-LABEL: define <1 x i64> @test_vset_lane_u64(i64 %a, <1 x i64> %b) #0 {
434 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
435 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
436 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
437 // CHECK: ret <1 x i64> [[VSET_LANE]]
test_vset_lane_u64(uint64_t a,uint64x1_t b)438 uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
439 return vset_lane_u64(a, b, 0);
440 }
441
442 // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #0 {
443 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
444 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
445 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
446 // CHECK: ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_s64(int64_t a,int64x2_t b)447 int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
448 return vsetq_lane_s64(a, b, 1);
449 }
450
451 // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #0 {
452 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
453 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
454 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
455 // CHECK: ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_u64(uint64_t a,uint64x2_t b)456 uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
457 return vsetq_lane_u64(a, b, 1);
458 }
459