• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
3
4define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
5; CHECK-LABEL: ins16bw:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    mov v0.b[15], w0
8; CHECK-NEXT:    ret
9  %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
10  ret <16 x i8> %tmp3
11}
12
13define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
14; CHECK-LABEL: ins8hw:
15; CHECK:       // %bb.0:
16; CHECK-NEXT:    mov v0.h[6], w0
17; CHECK-NEXT:    ret
18  %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
19  ret <8 x i16> %tmp3
20}
21
22define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
23; CHECK-LABEL: ins4sw:
24; CHECK:       // %bb.0:
25; CHECK-NEXT:    mov v0.s[2], w0
26; CHECK-NEXT:    ret
27  %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
28  ret <4 x i32> %tmp3
29}
30
31define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
32; CHECK-LABEL: ins2dw:
33; CHECK:       // %bb.0:
34; CHECK-NEXT:    mov v0.d[1], x0
35; CHECK-NEXT:    ret
36  %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
37  ret <2 x i64> %tmp3
38}
39
40define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
41; CHECK-LABEL: ins8bw:
42; CHECK:       // %bb.0:
43; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
44; CHECK-NEXT:    mov v0.b[5], w0
45; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
46; CHECK-NEXT:    ret
47  %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
48  ret <8 x i8> %tmp3
49}
50
51define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
52; CHECK-LABEL: ins4hw:
53; CHECK:       // %bb.0:
54; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
55; CHECK-NEXT:    mov v0.h[3], w0
56; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
57; CHECK-NEXT:    ret
58  %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
59  ret <4 x i16> %tmp3
60}
61
62define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
63; CHECK-LABEL: ins2sw:
64; CHECK:       // %bb.0:
65; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
66; CHECK-NEXT:    mov v0.s[1], w0
67; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
68; CHECK-NEXT:    ret
69  %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
70  ret <2 x i32> %tmp3
71}
72
73define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
74; CHECK-LABEL: ins16b16:
75; CHECK:       // %bb.0:
76; CHECK-NEXT:    mov v1.b[15], v0.b[2]
77; CHECK-NEXT:    mov v0.16b, v1.16b
78; CHECK-NEXT:    ret
79  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
80  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
81  ret <16 x i8> %tmp4
82}
83
84define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
85; CHECK-LABEL: ins8h8:
86; CHECK:       // %bb.0:
87; CHECK-NEXT:    mov v1.h[7], v0.h[2]
88; CHECK-NEXT:    mov v0.16b, v1.16b
89; CHECK-NEXT:    ret
90  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
91  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
92  ret <8 x i16> %tmp4
93}
94
95define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
96; CHECK-LABEL: ins4s4:
97; CHECK:       // %bb.0:
98; CHECK-NEXT:    mov v1.s[1], v0.s[2]
99; CHECK-NEXT:    mov v0.16b, v1.16b
100; CHECK-NEXT:    ret
101  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
102  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
103  ret <4 x i32> %tmp4
104}
105
106define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
107; CHECK-LABEL: ins2d2:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    mov v1.d[1], v0.d[0]
110; CHECK-NEXT:    mov v0.16b, v1.16b
111; CHECK-NEXT:    ret
112  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
113  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
114  ret <2 x i64> %tmp4
115}
116
117define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
118; CHECK-LABEL: ins4f4:
119; CHECK:       // %bb.0:
120; CHECK-NEXT:    mov v1.s[1], v0.s[2]
121; CHECK-NEXT:    mov v0.16b, v1.16b
122; CHECK-NEXT:    ret
123  %tmp3 = extractelement <4 x float> %tmp1, i32 2
124  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
125  ret <4 x float> %tmp4
126}
127
128define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
129; CHECK-LABEL: ins2df2:
130; CHECK:       // %bb.0:
131; CHECK-NEXT:    mov v1.d[1], v0.d[0]
132; CHECK-NEXT:    mov v0.16b, v1.16b
133; CHECK-NEXT:    ret
134  %tmp3 = extractelement <2 x double> %tmp1, i32 0
135  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
136  ret <2 x double> %tmp4
137}
138
139define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
140; CHECK-LABEL: ins8b16:
141; CHECK:       // %bb.0:
142; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
143; CHECK-NEXT:    mov v1.b[15], v0.b[2]
144; CHECK-NEXT:    mov v0.16b, v1.16b
145; CHECK-NEXT:    ret
146  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
147  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
148  ret <16 x i8> %tmp4
149}
150
151define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
152; CHECK-LABEL: ins4h8:
153; CHECK:       // %bb.0:
154; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
155; CHECK-NEXT:    mov v1.h[7], v0.h[2]
156; CHECK-NEXT:    mov v0.16b, v1.16b
157; CHECK-NEXT:    ret
158  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
159  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
160  ret <8 x i16> %tmp4
161}
162
163define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
164; CHECK-LABEL: ins2s4:
165; CHECK:       // %bb.0:
166; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
167; CHECK-NEXT:    mov v1.s[1], v0.s[1]
168; CHECK-NEXT:    mov v0.16b, v1.16b
169; CHECK-NEXT:    ret
170  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
171  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
172  ret <4 x i32> %tmp4
173}
174
175define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
176; CHECK-LABEL: ins1d2:
177; CHECK:       // %bb.0:
178; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
179; CHECK-NEXT:    mov v1.d[1], v0.d[0]
180; CHECK-NEXT:    mov v0.16b, v1.16b
181; CHECK-NEXT:    ret
182  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
183  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
184  ret <2 x i64> %tmp4
185}
186
187define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
188; CHECK-LABEL: ins2f4:
189; CHECK:       // %bb.0:
190; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
191; CHECK-NEXT:    mov v1.s[1], v0.s[1]
192; CHECK-NEXT:    mov v0.16b, v1.16b
193; CHECK-NEXT:    ret
194  %tmp3 = extractelement <2 x float> %tmp1, i32 1
195  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
196  ret <4 x float> %tmp4
197}
198
199define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
200; CHECK-LABEL: ins1f2:
201; CHECK:       // %bb.0:
202; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
203; CHECK-NEXT:    mov v1.d[1], v0.d[0]
204; CHECK-NEXT:    mov v0.16b, v1.16b
205; CHECK-NEXT:    ret
206  %tmp3 = extractelement <1 x double> %tmp1, i32 0
207  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
208  ret <2 x double> %tmp4
209}
210
211define <2 x double> @ins1f2_args_flipped(<2 x double> %tmp2, <1 x double> %tmp1) {
212; CHECK-LABEL: ins1f2_args_flipped:
213; CHECK:       // %bb.0:
214; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
215; CHECK-NEXT:    mov v0.d[1], v1.d[0]
216; CHECK-NEXT:    ret
217  %tmp3 = extractelement <1 x double> %tmp1, i32 0
218  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
219  ret <2 x double> %tmp4
220}
221
222define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
223; CHECK-LABEL: ins16b8:
224; CHECK:       // %bb.0:
225; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
226; CHECK-NEXT:    mov v1.b[7], v0.b[2]
227; CHECK-NEXT:    mov v0.16b, v1.16b
228; CHECK-NEXT:    ret
229  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
230  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
231  ret <8 x i8> %tmp4
232}
233
234define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
235; CHECK-LABEL: ins8h4:
236; CHECK:       // %bb.0:
237; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
238; CHECK-NEXT:    mov v1.h[3], v0.h[2]
239; CHECK-NEXT:    mov v0.16b, v1.16b
240; CHECK-NEXT:    ret
241  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
242  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
243  ret <4 x i16> %tmp4
244}
245
246define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
247; CHECK-LABEL: ins4s2:
248; CHECK:       // %bb.0:
249; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
250; CHECK-NEXT:    mov v1.s[1], v0.s[2]
251; CHECK-NEXT:    mov v0.16b, v1.16b
252; CHECK-NEXT:    ret
253  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
254  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
255  ret <2 x i32> %tmp4
256}
257
258define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
259; CHECK-LABEL: ins2d1:
260; CHECK:       // %bb.0:
261; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
262; CHECK-NEXT:    mov v1.d[0], v0.d[0]
263; CHECK-NEXT:    mov v0.16b, v1.16b
264; CHECK-NEXT:    ret
265  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
266  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
267  ret <1 x i64> %tmp4
268}
269
270define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
271; CHECK-LABEL: ins4f2:
272; CHECK:       // %bb.0:
273; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
274; CHECK-NEXT:    mov v1.s[1], v0.s[2]
275; CHECK-NEXT:    mov v0.16b, v1.16b
276; CHECK-NEXT:    ret
277  %tmp3 = extractelement <4 x float> %tmp1, i32 2
278  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
279  ret <2 x float> %tmp4
280}
281
282define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
283; CHECK-LABEL: ins2f1:
284; CHECK:       // %bb.0:
285; CHECK-NEXT:    dup v0.2d, v0.d[1]
286; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
287; CHECK-NEXT:    ret
288  %tmp3 = extractelement <2 x double> %tmp1, i32 1
289  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
290  ret <1 x double> %tmp4
291}
292
293define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
294; CHECK-LABEL: ins8b8:
295; CHECK:       // %bb.0:
296; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
297; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
298; CHECK-NEXT:    mov v1.b[4], v0.b[2]
299; CHECK-NEXT:    mov v0.16b, v1.16b
300; CHECK-NEXT:    ret
301  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
302  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
303  ret <8 x i8> %tmp4
304}
305
306define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
307; CHECK-LABEL: ins4h4:
308; CHECK:       // %bb.0:
309; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
310; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
311; CHECK-NEXT:    mov v1.h[3], v0.h[2]
312; CHECK-NEXT:    mov v0.16b, v1.16b
313; CHECK-NEXT:    ret
314  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
315  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
316  ret <4 x i16> %tmp4
317}
318
319define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
320; CHECK-LABEL: ins2s2:
321; CHECK:       // %bb.0:
322; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
323; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
324; CHECK-NEXT:    mov v1.s[1], v0.s[0]
325; CHECK-NEXT:    mov v0.16b, v1.16b
326; CHECK-NEXT:    ret
327  %tmp3 = extractelement <2 x i32> %tmp1, i32 0
328  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
329  ret <2 x i32> %tmp4
330}
331
332define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
333; CHECK-LABEL: ins1d1:
334; CHECK:       // %bb.0:
335; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
336; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
337; CHECK-NEXT:    mov v1.d[0], v0.d[0]
338; CHECK-NEXT:    mov v0.16b, v1.16b
339; CHECK-NEXT:    ret
340  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
341  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
342  ret <1 x i64> %tmp4
343}
344
345define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
346; CHECK-LABEL: ins2f2:
347; CHECK:       // %bb.0:
348; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
349; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
350; CHECK-NEXT:    mov v1.s[1], v0.s[0]
351; CHECK-NEXT:    mov v0.16b, v1.16b
352; CHECK-NEXT:    ret
353  %tmp3 = extractelement <2 x float> %tmp1, i32 0
354  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
355  ret <2 x float> %tmp4
356}
357
358define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
359; CHECK-LABEL: ins1df1:
360; CHECK:       // %bb.0:
361; CHECK-NEXT:    ret
362  %tmp3 = extractelement <1 x double> %tmp1, i32 0
363  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
364  ret <1 x double> %tmp4
365}
366
367define i32 @umovw16b(<16 x i8> %tmp1) {
368; CHECK-LABEL: umovw16b:
369; CHECK:       // %bb.0:
370; CHECK-NEXT:    umov w0, v0.b[8]
371; CHECK-NEXT:    ret
372  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
373  %tmp4 = zext i8 %tmp3 to i32
374  ret i32 %tmp4
375}
376
377define i32 @umovw8h(<8 x i16> %tmp1) {
378; CHECK-LABEL: umovw8h:
379; CHECK:       // %bb.0:
380; CHECK-NEXT:    umov w0, v0.h[2]
381; CHECK-NEXT:    ret
382  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
383  %tmp4 = zext i16 %tmp3 to i32
384  ret i32 %tmp4
385}
386
387define i32 @umovw4s(<4 x i32> %tmp1) {
388; CHECK-LABEL: umovw4s:
389; CHECK:       // %bb.0:
390; CHECK-NEXT:    mov w0, v0.s[2]
391; CHECK-NEXT:    ret
392  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
393  ret i32 %tmp3
394}
395
396define i64 @umovx2d(<2 x i64> %tmp1) {
397; CHECK-LABEL: umovx2d:
398; CHECK:       // %bb.0:
399; CHECK-NEXT:    mov x0, v0.d[1]
400; CHECK-NEXT:    ret
401  %tmp3 = extractelement <2 x i64> %tmp1, i32 1
402  ret i64 %tmp3
403}
404
405define i32 @umovw8b(<8 x i8> %tmp1) {
406; CHECK-LABEL: umovw8b:
407; CHECK:       // %bb.0:
408; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
409; CHECK-NEXT:    umov w0, v0.b[7]
410; CHECK-NEXT:    ret
411  %tmp3 = extractelement <8 x i8> %tmp1, i32 7
412  %tmp4 = zext i8 %tmp3 to i32
413  ret i32 %tmp4
414}
415
416define i32 @umovw4h(<4 x i16> %tmp1) {
417; CHECK-LABEL: umovw4h:
418; CHECK:       // %bb.0:
419; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
420; CHECK-NEXT:    umov w0, v0.h[2]
421; CHECK-NEXT:    ret
422  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
423  %tmp4 = zext i16 %tmp3 to i32
424  ret i32 %tmp4
425}
426
427define i32 @umovw2s(<2 x i32> %tmp1) {
428; CHECK-LABEL: umovw2s:
429; CHECK:       // %bb.0:
430; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
431; CHECK-NEXT:    mov w0, v0.s[1]
432; CHECK-NEXT:    ret
433  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
434  ret i32 %tmp3
435}
436
437define i64 @umovx1d(<1 x i64> %tmp1) {
438; CHECK-LABEL: umovx1d:
439; CHECK:       // %bb.0:
440; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
441; CHECK-NEXT:    fmov x0, d0
442; CHECK-NEXT:    ret
443  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
444  ret i64 %tmp3
445}
446
447define i32 @smovw16b(<16 x i8> %tmp1) {
448; CHECK-LABEL: smovw16b:
449; CHECK:       // %bb.0:
450; CHECK-NEXT:    smov w8, v0.b[8]
451; CHECK-NEXT:    add w0, w8, w8
452; CHECK-NEXT:    ret
453  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
454  %tmp4 = sext i8 %tmp3 to i32
455  %tmp5 = add i32 %tmp4, %tmp4
456  ret i32 %tmp5
457}
458
459define i32 @smovw8h(<8 x i16> %tmp1) {
460; CHECK-LABEL: smovw8h:
461; CHECK:       // %bb.0:
462; CHECK-NEXT:    smov w8, v0.h[2]
463; CHECK-NEXT:    add w0, w8, w8
464; CHECK-NEXT:    ret
465  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
466  %tmp4 = sext i16 %tmp3 to i32
467  %tmp5 = add i32 %tmp4, %tmp4
468  ret i32 %tmp5
469}
470
471define i64 @smovx16b(<16 x i8> %tmp1) {
472; CHECK-LABEL: smovx16b:
473; CHECK:       // %bb.0:
474; CHECK-NEXT:    smov x0, v0.b[8]
475; CHECK-NEXT:    ret
476  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
477  %tmp4 = sext i8 %tmp3 to i64
478  ret i64 %tmp4
479}
480
481define i64 @smovx8h(<8 x i16> %tmp1) {
482; CHECK-LABEL: smovx8h:
483; CHECK:       // %bb.0:
484; CHECK-NEXT:    smov x0, v0.h[2]
485; CHECK-NEXT:    ret
486  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
487  %tmp4 = sext i16 %tmp3 to i64
488  ret i64 %tmp4
489}
490
491define i64 @smovx4s(<4 x i32> %tmp1) {
492; CHECK-LABEL: smovx4s:
493; CHECK:       // %bb.0:
494; CHECK-NEXT:    smov x0, v0.s[2]
495; CHECK-NEXT:    ret
496  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
497  %tmp4 = sext i32 %tmp3 to i64
498  ret i64 %tmp4
499}
500
501define i32 @smovw8b(<8 x i8> %tmp1) {
502; CHECK-LABEL: smovw8b:
503; CHECK:       // %bb.0:
504; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
505; CHECK-NEXT:    smov w8, v0.b[4]
506; CHECK-NEXT:    add w0, w8, w8
507; CHECK-NEXT:    ret
508  %tmp3 = extractelement <8 x i8> %tmp1, i32 4
509  %tmp4 = sext i8 %tmp3 to i32
510  %tmp5 = add i32 %tmp4, %tmp4
511  ret i32 %tmp5
512}
513
514define i32 @smovw4h(<4 x i16> %tmp1) {
515; CHECK-LABEL: smovw4h:
516; CHECK:       // %bb.0:
517; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
518; CHECK-NEXT:    smov w8, v0.h[2]
519; CHECK-NEXT:    add w0, w8, w8
520; CHECK-NEXT:    ret
521  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
522  %tmp4 = sext i16 %tmp3 to i32
523  %tmp5 = add i32 %tmp4, %tmp4
524  ret i32 %tmp5
525}
526
527define i32 @smovx8b(<8 x i8> %tmp1) {
528; CHECK-LABEL: smovx8b:
529; CHECK:       // %bb.0:
530; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
531; CHECK-NEXT:    smov w0, v0.b[6]
532; CHECK-NEXT:    ret
533  %tmp3 = extractelement <8 x i8> %tmp1, i32 6
534  %tmp4 = sext i8 %tmp3 to i32
535  ret i32 %tmp4
536}
537
538define i32 @smovx4h(<4 x i16> %tmp1) {
539; CHECK-LABEL: smovx4h:
540; CHECK:       // %bb.0:
541; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
542; CHECK-NEXT:    smov w0, v0.h[2]
543; CHECK-NEXT:    ret
544  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
545  %tmp4 = sext i16 %tmp3 to i32
546  ret i32 %tmp4
547}
548
549define i64 @smovx2s(<2 x i32> %tmp1) {
550; CHECK-LABEL: smovx2s:
551; CHECK:       // %bb.0:
552; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
553; CHECK-NEXT:    smov x0, v0.s[1]
554; CHECK-NEXT:    ret
555  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
556  %tmp4 = sext i32 %tmp3 to i64
557  ret i64 %tmp4
558}
559
560define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
561; CHECK-LABEL: test_vcopy_lane_s8:
562; CHECK:       // %bb.0:
563; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
564; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
565; CHECK-NEXT:    mov v0.b[5], v1.b[3]
566; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
567; CHECK-NEXT:    ret
568  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
569  ret <8 x i8> %vset_lane
570}
571
572define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
573; CHECK-LABEL: test_vcopyq_laneq_s8:
574; CHECK:       // %bb.0:
575; CHECK-NEXT:    mov v0.b[14], v1.b[6]
576; CHECK-NEXT:    ret
577  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
578  ret <16 x i8> %vset_lane
579}
580
581define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
582; CHECK-LABEL: test_vcopy_lane_swap_s8:
583; CHECK:       // %bb.0:
584; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
585; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
586; CHECK-NEXT:    mov v1.b[7], v0.b[0]
587; CHECK-NEXT:    mov v0.16b, v1.16b
588; CHECK-NEXT:    ret
589  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
590  ret <8 x i8> %vset_lane
591}
592
593define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
594; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
595; CHECK:       // %bb.0:
596; CHECK-NEXT:    mov v1.b[0], v0.b[15]
597; CHECK-NEXT:    mov v0.16b, v1.16b
598; CHECK-NEXT:    ret
599  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
600  ret <16 x i8> %vset_lane
601}
602
603define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
604; CHECK-LABEL: test_vdup_n_u8:
605; CHECK:       // %bb.0:
606; CHECK-NEXT:    dup v0.8b, w0
607; CHECK-NEXT:    ret
608  %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
609  %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
610  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
611  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
612  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
613  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
614  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
615  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
616  ret <8 x i8> %vecinit7.i
617}
618
619define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
620; CHECK-LABEL: test_vdup_n_u16:
621; CHECK:       // %bb.0:
622; CHECK-NEXT:    dup v0.4h, w0
623; CHECK-NEXT:    ret
624  %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
625  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
626  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
627  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
628  ret <4 x i16> %vecinit3.i
629}
630
631define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
632; CHECK-LABEL: test_vdup_n_u32:
633; CHECK:       // %bb.0:
634; CHECK-NEXT:    dup v0.2s, w0
635; CHECK-NEXT:    ret
636  %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
637  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
638  ret <2 x i32> %vecinit1.i
639}
640
641define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
642; CHECK-LABEL: test_vdup_n_u64:
643; CHECK:       // %bb.0:
644; CHECK-NEXT:    fmov d0, x0
645; CHECK-NEXT:    ret
646  %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
647  ret <1 x i64> %vecinit.i
648}
649
650define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
651; CHECK-LABEL: test_vdupq_n_u8:
652; CHECK:       // %bb.0:
653; CHECK-NEXT:    dup v0.16b, w0
654; CHECK-NEXT:    ret
655  %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
656  %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
657  %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
658  %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
659  %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
660  %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
661  %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
662  %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
663  %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
664  %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
665  %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
666  %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
667  %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
668  %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
669  %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
670  %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
671  ret <16 x i8> %vecinit15.i
672}
673
674define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
675; CHECK-LABEL: test_vdupq_n_u16:
676; CHECK:       // %bb.0:
677; CHECK-NEXT:    dup v0.8h, w0
678; CHECK-NEXT:    ret
679  %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
680  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
681  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
682  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
683  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
684  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
685  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
686  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
687  ret <8 x i16> %vecinit7.i
688}
689
690define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
691; CHECK-LABEL: test_vdupq_n_u32:
692; CHECK:       // %bb.0:
693; CHECK-NEXT:    dup v0.4s, w0
694; CHECK-NEXT:    ret
695  %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
696  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
697  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
698  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
699  ret <4 x i32> %vecinit3.i
700}
701
702define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
703; CHECK-LABEL: test_vdupq_n_u64:
704; CHECK:       // %bb.0:
705; CHECK-NEXT:    dup v0.2d, x0
706; CHECK-NEXT:    ret
707  %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
708  %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
709  ret <2 x i64> %vecinit1.i
710}
711
712define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
713; CHECK-LABEL: test_vdup_lane_s8:
714; CHECK:       // %bb.0:
715; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
716; CHECK-NEXT:    dup v0.8b, v0.b[5]
717; CHECK-NEXT:    ret
718  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
719  ret <8 x i8> %shuffle
720}
721
722define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
723; CHECK-LABEL: test_vdup_lane_s16:
724; CHECK:       // %bb.0:
725; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
726; CHECK-NEXT:    dup v0.4h, v0.h[2]
727; CHECK-NEXT:    ret
728  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
729  ret <4 x i16> %shuffle
730}
731
732define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
733; CHECK-LABEL: test_vdup_lane_s32:
734; CHECK:       // %bb.0:
735; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
736; CHECK-NEXT:    dup v0.2s, v0.s[1]
737; CHECK-NEXT:    ret
738  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
739  ret <2 x i32> %shuffle
740}
741
742define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
743; CHECK-LABEL: test_vdupq_lane_s8:
744; CHECK:       // %bb.0:
745; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
746; CHECK-NEXT:    dup v0.16b, v0.b[5]
747; CHECK-NEXT:    ret
748  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
749  ret <16 x i8> %shuffle
750}
751
752define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
753; CHECK-LABEL: test_vdupq_lane_s16:
754; CHECK:       // %bb.0:
755; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
756; CHECK-NEXT:    dup v0.8h, v0.h[2]
757; CHECK-NEXT:    ret
758  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
759  ret <8 x i16> %shuffle
760}
761
762define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
763; CHECK-LABEL: test_vdupq_lane_s32:
764; CHECK:       // %bb.0:
765; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
766; CHECK-NEXT:    dup v0.4s, v0.s[1]
767; CHECK-NEXT:    ret
768  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
769  ret <4 x i32> %shuffle
770}
771
772define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
773; CHECK-LABEL: test_vdupq_lane_s64:
774; CHECK:       // %bb.0:
775; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
776; CHECK-NEXT:    dup v0.2d, v0.d[0]
777; CHECK-NEXT:    ret
778  %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
779  ret <2 x i64> %shuffle
780}
781
782define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
783; CHECK-LABEL: test_vdup_laneq_s8:
784; CHECK:       // %bb.0:
785; CHECK-NEXT:    dup v0.8b, v0.b[5]
786; CHECK-NEXT:    ret
787  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
788  ret <8 x i8> %shuffle
789}
790
791define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
792; CHECK-LABEL: test_vdup_laneq_s16:
793; CHECK:       // %bb.0:
794; CHECK-NEXT:    dup v0.4h, v0.h[2]
795; CHECK-NEXT:    ret
796  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
797  ret <4 x i16> %shuffle
798}
799
800define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
801; CHECK-LABEL: test_vdup_laneq_s32:
802; CHECK:       // %bb.0:
803; CHECK-NEXT:    dup v0.2s, v0.s[1]
804; CHECK-NEXT:    ret
805  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
806  ret <2 x i32> %shuffle
807}
808
809define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
810; CHECK-LABEL: test_vdupq_laneq_s8:
811; CHECK:       // %bb.0:
812; CHECK-NEXT:    dup v0.16b, v0.b[5]
813; CHECK-NEXT:    ret
814  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
815  ret <16 x i8> %shuffle
816}
817
818define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
819; CHECK-LABEL: test_vdupq_laneq_s16:
820; CHECK:       // %bb.0:
821; CHECK-NEXT:    dup v0.8h, v0.h[2]
822; CHECK-NEXT:    ret
823  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
824  ret <8 x i16> %shuffle
825}
826
827define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
828; CHECK-LABEL: test_vdupq_laneq_s32:
829; CHECK:       // %bb.0:
830; CHECK-NEXT:    dup v0.4s, v0.s[1]
831; CHECK-NEXT:    ret
832  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
833  ret <4 x i32> %shuffle
834}
835
836define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
837; CHECK-LABEL: test_vdupq_laneq_s64:
838; CHECK:       // %bb.0:
839; CHECK-NEXT:    dup v0.2d, v0.d[0]
840; CHECK-NEXT:    ret
841  %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
842  ret <2 x i64> %shuffle
843}
844
845define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
846; CHECK-LABEL: test_bitcastv8i8toi64:
847; CHECK:       // %bb.0:
848; CHECK-NEXT:    fmov x0, d0
849; CHECK-NEXT:    ret
850   %res = bitcast <8 x i8> %in to i64
851   ret i64 %res
852}
853
854define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
855; CHECK-LABEL: test_bitcastv4i16toi64:
856; CHECK:       // %bb.0:
857; CHECK-NEXT:    fmov x0, d0
858; CHECK-NEXT:    ret
859   %res = bitcast <4 x i16> %in to i64
860   ret i64 %res
861}
862
863define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
864; CHECK-LABEL: test_bitcastv2i32toi64:
865; CHECK:       // %bb.0:
866; CHECK-NEXT:    fmov x0, d0
867; CHECK-NEXT:    ret
868   %res = bitcast <2 x i32> %in to i64
869   ret i64 %res
870}
871
872define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
873; CHECK-LABEL: test_bitcastv2f32toi64:
874; CHECK:       // %bb.0:
875; CHECK-NEXT:    fmov x0, d0
876; CHECK-NEXT:    ret
877   %res = bitcast <2 x float> %in to i64
878   ret i64 %res
879}
880
881define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
882; CHECK-LABEL: test_bitcastv1i64toi64:
883; CHECK:       // %bb.0:
884; CHECK-NEXT:    fmov x0, d0
885; CHECK-NEXT:    ret
886   %res = bitcast <1 x i64> %in to i64
887   ret i64 %res
888}
889
890define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
891; CHECK-LABEL: test_bitcastv1f64toi64:
892; CHECK:       // %bb.0:
893; CHECK-NEXT:    fmov x0, d0
894; CHECK-NEXT:    ret
895   %res = bitcast <1 x double> %in to i64
896   ret i64 %res
897}
898
899define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
900; CHECK-LABEL: test_bitcasti64tov8i8:
901; CHECK:       // %bb.0:
902; CHECK-NEXT:    fmov d0, x0
903; CHECK-NEXT:    ret
904   %res = bitcast i64 %in to <8 x i8>
905   ret <8 x i8> %res
906}
907
908define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
909; CHECK-LABEL: test_bitcasti64tov4i16:
910; CHECK:       // %bb.0:
911; CHECK-NEXT:    fmov d0, x0
912; CHECK-NEXT:    ret
913   %res = bitcast i64 %in to <4 x i16>
914   ret <4 x i16> %res
915}
916
917define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
918; CHECK-LABEL: test_bitcasti64tov2i32:
919; CHECK:       // %bb.0:
920; CHECK-NEXT:    fmov d0, x0
921; CHECK-NEXT:    ret
922   %res = bitcast i64 %in to <2 x i32>
923   ret <2 x i32> %res
924}
925
926define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
927; CHECK-LABEL: test_bitcasti64tov2f32:
928; CHECK:       // %bb.0:
929; CHECK-NEXT:    fmov d0, x0
930; CHECK-NEXT:    ret
931   %res = bitcast i64 %in to <2 x float>
932   ret <2 x float> %res
933}
934
935define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
936; CHECK-LABEL: test_bitcasti64tov1i64:
937; CHECK:       // %bb.0:
938; CHECK-NEXT:    fmov d0, x0
939; CHECK-NEXT:    ret
940   %res = bitcast i64 %in to <1 x i64>
941   ret <1 x i64> %res
942}
943
944define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
945; CHECK-LABEL: test_bitcasti64tov1f64:
946; CHECK:       // %bb.0:
947; CHECK-NEXT:    fmov d0, x0
948; CHECK-NEXT:    ret
949   %res = bitcast i64 %in to <1 x double>
950   ret <1 x double> %res
951}
952
953define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
954; CHECK-LABEL: test_bitcastv8i8tov1f64:
955; CHECK:       // %bb.0:
956; CHECK-NEXT:    neg v0.8b, v0.8b
957; CHECK-NEXT:    fcvtzs x8, d0
958; CHECK-NEXT:    fmov d0, x8
959; CHECK-NEXT:    ret
960  %sub.i = sub <8 x i8> zeroinitializer, %a
961  %1 = bitcast <8 x i8> %sub.i to <1 x double>
962  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
963  ret <1 x i64> %vcvt.i
964}
965
966define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
967; CHECK-LABEL: test_bitcastv4i16tov1f64:
968; CHECK:       // %bb.0:
969; CHECK-NEXT:    neg v0.4h, v0.4h
970; CHECK-NEXT:    fcvtzs x8, d0
971; CHECK-NEXT:    fmov d0, x8
972; CHECK-NEXT:    ret
973  %sub.i = sub <4 x i16> zeroinitializer, %a
974  %1 = bitcast <4 x i16> %sub.i to <1 x double>
975  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
976  ret <1 x i64> %vcvt.i
977}
978
979define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
980; CHECK-LABEL: test_bitcastv2i32tov1f64:
981; CHECK:       // %bb.0:
982; CHECK-NEXT:    neg v0.2s, v0.2s
983; CHECK-NEXT:    fcvtzs x8, d0
984; CHECK-NEXT:    fmov d0, x8
985; CHECK-NEXT:    ret
986  %sub.i = sub <2 x i32> zeroinitializer, %a
987  %1 = bitcast <2 x i32> %sub.i to <1 x double>
988  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
989  ret <1 x i64> %vcvt.i
990}
991
992define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
993; CHECK-LABEL: test_bitcastv1i64tov1f64:
994; CHECK:       // %bb.0:
995; CHECK-NEXT:    neg d0, d0
996; CHECK-NEXT:    fcvtzs x8, d0
997; CHECK-NEXT:    fmov d0, x8
998; CHECK-NEXT:    ret
999  %sub.i = sub <1 x i64> zeroinitializer, %a
1000  %1 = bitcast <1 x i64> %sub.i to <1 x double>
1001  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
1002  ret <1 x i64> %vcvt.i
1003}
1004
1005define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
1006; CHECK-LABEL: test_bitcastv2f32tov1f64:
1007; CHECK:       // %bb.0:
1008; CHECK-NEXT:    fneg v0.2s, v0.2s
1009; CHECK-NEXT:    fcvtzs x8, d0
1010; CHECK-NEXT:    fmov d0, x8
1011; CHECK-NEXT:    ret
1012  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
1013  %1 = bitcast <2 x float> %sub.i to <1 x double>
1014  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
1015  ret <1 x i64> %vcvt.i
1016}
1017
1018define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
1019; CHECK-LABEL: test_bitcastv1f64tov8i8:
1020; CHECK:       // %bb.0:
1021; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1022; CHECK-NEXT:    fmov x8, d0
1023; CHECK-NEXT:    scvtf d0, x8
1024; CHECK-NEXT:    neg v0.8b, v0.8b
1025; CHECK-NEXT:    ret
1026  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1027  %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
1028  %sub.i = sub <8 x i8> zeroinitializer, %1
1029  ret <8 x i8> %sub.i
1030}
1031
1032define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
1033; CHECK-LABEL: test_bitcastv1f64tov4i16:
1034; CHECK:       // %bb.0:
1035; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1036; CHECK-NEXT:    fmov x8, d0
1037; CHECK-NEXT:    scvtf d0, x8
1038; CHECK-NEXT:    neg v0.4h, v0.4h
1039; CHECK-NEXT:    ret
1040  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1041  %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
1042  %sub.i = sub <4 x i16> zeroinitializer, %1
1043  ret <4 x i16> %sub.i
1044}
1045
1046define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
1047; CHECK-LABEL: test_bitcastv1f64tov2i32:
1048; CHECK:       // %bb.0:
1049; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1050; CHECK-NEXT:    fmov x8, d0
1051; CHECK-NEXT:    scvtf d0, x8
1052; CHECK-NEXT:    neg v0.2s, v0.2s
1053; CHECK-NEXT:    ret
1054  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1055  %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
1056  %sub.i = sub <2 x i32> zeroinitializer, %1
1057  ret <2 x i32> %sub.i
1058}
1059
1060define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
1061; CHECK-LABEL: test_bitcastv1f64tov1i64:
1062; CHECK:       // %bb.0:
1063; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1064; CHECK-NEXT:    fmov x8, d0
1065; CHECK-NEXT:    scvtf d0, x8
1066; CHECK-NEXT:    neg d0, d0
1067; CHECK-NEXT:    ret
1068  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1069  %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
1070  %sub.i = sub <1 x i64> zeroinitializer, %1
1071  ret <1 x i64> %sub.i
1072}
1073
1074define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
1075; CHECK-LABEL: test_bitcastv1f64tov2f32:
1076; CHECK:       // %bb.0:
1077; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1078; CHECK-NEXT:    fmov x8, d0
1079; CHECK-NEXT:    scvtf d0, x8
1080; CHECK-NEXT:    fneg v0.2s, v0.2s
1081; CHECK-NEXT:    ret
1082  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1083  %1 = bitcast <1 x double> %vcvt.i to <2 x float>
1084  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
1085  ret <2 x float> %sub.i
1086}
1087
1088; Test insert element into an undef vector
1089define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
1090; CHECK-LABEL: scalar_to_vector.v8i8:
1091; CHECK:       // %bb.0:
1092; CHECK-NEXT:    fmov s0, w0
1093; CHECK-NEXT:    ret
1094  %b = insertelement <8 x i8> undef, i8 %a, i32 0
1095  ret <8 x i8> %b
1096}
1097
1098define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
1099; CHECK-LABEL: scalar_to_vector.v16i8:
1100; CHECK:       // %bb.0:
1101; CHECK-NEXT:    fmov s0, w0
1102; CHECK-NEXT:    ret
1103  %b = insertelement <16 x i8> undef, i8 %a, i32 0
1104  ret <16 x i8> %b
1105}
1106
1107define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
1108; CHECK-LABEL: scalar_to_vector.v4i16:
1109; CHECK:       // %bb.0:
1110; CHECK-NEXT:    fmov s0, w0
1111; CHECK-NEXT:    ret
1112  %b = insertelement <4 x i16> undef, i16 %a, i32 0
1113  ret <4 x i16> %b
1114}
1115
1116define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
1117; CHECK-LABEL: scalar_to_vector.v8i16:
1118; CHECK:       // %bb.0:
1119; CHECK-NEXT:    fmov s0, w0
1120; CHECK-NEXT:    ret
1121  %b = insertelement <8 x i16> undef, i16 %a, i32 0
1122  ret <8 x i16> %b
1123}
1124
1125define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
1126; CHECK-LABEL: scalar_to_vector.v2i32:
1127; CHECK:       // %bb.0:
1128; CHECK-NEXT:    fmov s0, w0
1129; CHECK-NEXT:    ret
1130  %b = insertelement <2 x i32> undef, i32 %a, i32 0
1131  ret <2 x i32> %b
1132}
1133
1134define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
1135; CHECK-LABEL: scalar_to_vector.v4i32:
1136; CHECK:       // %bb.0:
1137; CHECK-NEXT:    fmov s0, w0
1138; CHECK-NEXT:    ret
1139  %b = insertelement <4 x i32> undef, i32 %a, i32 0
1140  ret <4 x i32> %b
1141}
1142
1143define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
1144; CHECK-LABEL: scalar_to_vector.v2i64:
1145; CHECK:       // %bb.0:
1146; CHECK-NEXT:    fmov d0, x0
1147; CHECK-NEXT:    ret
1148  %b = insertelement <2 x i64> undef, i64 %a, i32 0
1149  ret <2 x i64> %b
1150}
1151
1152define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
1153; CHECK-LABEL: testDUP.v1i8:
1154; CHECK:       // %bb.0:
1155; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1156; CHECK-NEXT:    dup v0.8b, v0.b[0]
1157; CHECK-NEXT:    ret
1158  %b = extractelement <1 x i8> %a, i32 0
1159  %c = insertelement <8 x i8> undef, i8 %b, i32 0
1160  %d = insertelement <8 x i8> %c, i8 %b, i32 1
1161  %e = insertelement <8 x i8> %d, i8 %b, i32 2
1162  %f = insertelement <8 x i8> %e, i8 %b, i32 3
1163  %g = insertelement <8 x i8> %f, i8 %b, i32 4
1164  %h = insertelement <8 x i8> %g, i8 %b, i32 5
1165  %i = insertelement <8 x i8> %h, i8 %b, i32 6
1166  %j = insertelement <8 x i8> %i, i8 %b, i32 7
1167  ret <8 x i8> %j
1168}
1169
1170define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
1171; CHECK-LABEL: testDUP.v1i16:
1172; CHECK:       // %bb.0:
1173; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1174; CHECK-NEXT:    dup v0.8h, v0.h[0]
1175; CHECK-NEXT:    ret
1176  %b = extractelement <1 x i16> %a, i32 0
1177  %c = insertelement <8 x i16> undef, i16 %b, i32 0
1178  %d = insertelement <8 x i16> %c, i16 %b, i32 1
1179  %e = insertelement <8 x i16> %d, i16 %b, i32 2
1180  %f = insertelement <8 x i16> %e, i16 %b, i32 3
1181  %g = insertelement <8 x i16> %f, i16 %b, i32 4
1182  %h = insertelement <8 x i16> %g, i16 %b, i32 5
1183  %i = insertelement <8 x i16> %h, i16 %b, i32 6
1184  %j = insertelement <8 x i16> %i, i16 %b, i32 7
1185  ret <8 x i16> %j
1186}
1187
1188define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
1189; CHECK-LABEL: testDUP.v1i32:
1190; CHECK:       // %bb.0:
1191; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1192; CHECK-NEXT:    dup v0.4s, v0.s[0]
1193; CHECK-NEXT:    ret
1194  %b = extractelement <1 x i32> %a, i32 0
1195  %c = insertelement <4 x i32> undef, i32 %b, i32 0
1196  %d = insertelement <4 x i32> %c, i32 %b, i32 1
1197  %e = insertelement <4 x i32> %d, i32 %b, i32 2
1198  %f = insertelement <4 x i32> %e, i32 %b, i32 3
1199  ret <4 x i32> %f
1200}
1201
1202define <8 x i8> @getl(<16 x i8> %x) #0 {
1203; CHECK-LABEL: getl:
1204; CHECK:       // %bb.0:
1205; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1206; CHECK-NEXT:    ret
1207  %vecext = extractelement <16 x i8> %x, i32 0
1208  %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
1209  %vecext1 = extractelement <16 x i8> %x, i32 1
1210  %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
1211  %vecext3 = extractelement <16 x i8> %x, i32 2
1212  %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
1213  %vecext5 = extractelement <16 x i8> %x, i32 3
1214  %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
1215  %vecext7 = extractelement <16 x i8> %x, i32 4
1216  %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
1217  %vecext9 = extractelement <16 x i8> %x, i32 5
1218  %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
1219  %vecext11 = extractelement <16 x i8> %x, i32 6
1220  %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
1221  %vecext13 = extractelement <16 x i8> %x, i32 7
1222  %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
1223  ret <8 x i8> %vecinit14
1224}
1225
1226define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
1227; CHECK-LABEL: test_extracts_inserts_varidx_extract:
1228; CHECK:       // %bb.0:
1229; CHECK-NEXT:    .cfi_def_cfa_offset 16
1230; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
1231; CHECK-NEXT:    str q0, [sp, #-16]!
1232; CHECK-NEXT:    and x8, x0, #0x7
1233; CHECK-NEXT:    mov x9, sp
1234; CHECK-NEXT:    bfi x9, x8, #1, #3
1235; CHECK-NEXT:    ldr h1, [x9]
1236; CHECK-NEXT:    mov v1.h[1], v0.h[1]
1237; CHECK-NEXT:    mov v1.h[2], v0.h[2]
1238; CHECK-NEXT:    mov v1.h[3], v0.h[3]
1239; CHECK-NEXT:    mov v0.16b, v1.16b
1240; CHECK-NEXT:    add sp, sp, #16 // =16
1241; CHECK-NEXT:    ret
1242  %tmp = extractelement <8 x i16> %x, i32 %idx
1243  %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0
1244  %tmp3 = extractelement <8 x i16> %x, i32 1
1245  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
1246  %tmp5 = extractelement <8 x i16> %x, i32 2
1247  %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
1248  %tmp7 = extractelement <8 x i16> %x, i32 3
1249  %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
1250  ret <4 x i16> %tmp8
1251}
1252
1253define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
1254; CHECK-LABEL: test_extracts_inserts_varidx_insert:
1255; CHECK:       // %bb.0:
1256; CHECK-NEXT:    sub sp, sp, #16 // =16
1257; CHECK-NEXT:    .cfi_def_cfa_offset 16
1258; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
1259; CHECK-NEXT:    and x8, x0, #0x3
1260; CHECK-NEXT:    add x9, sp, #8 // =8
1261; CHECK-NEXT:    bfi x9, x8, #1, #2
1262; CHECK-NEXT:    str h0, [x9]
1263; CHECK-NEXT:    ldr d1, [sp, #8]
1264; CHECK-NEXT:    mov v1.h[1], v0.h[1]
1265; CHECK-NEXT:    mov v1.h[2], v0.h[2]
1266; CHECK-NEXT:    mov v1.h[3], v0.h[3]
1267; CHECK-NEXT:    mov v0.16b, v1.16b
1268; CHECK-NEXT:    add sp, sp, #16 // =16
1269; CHECK-NEXT:    ret
1270  %tmp = extractelement <8 x i16> %x, i32 0
1271  %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx
1272  %tmp3 = extractelement <8 x i16> %x, i32 1
1273  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
1274  %tmp5 = extractelement <8 x i16> %x, i32 2
1275  %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
1276  %tmp7 = extractelement <8 x i16> %x, i32 3
1277  %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
1278  ret <4 x i16> %tmp8
1279}
1280
1281define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
1282; CHECK-LABEL: test_dup_v2i32_v4i16:
1283; CHECK:       // %bb.0: // %entry
1284; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1285; CHECK-NEXT:    dup v0.4h, v0.h[2]
1286; CHECK-NEXT:    ret
1287entry:
1288  %x = extractelement <2 x i32> %a, i32 1
1289  %vget_lane = trunc i32 %x to i16
1290  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1291  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1292  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1293  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1294  ret <4 x i16> %vecinit3.i
1295}
1296
1297define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
1298; CHECK-LABEL: test_dup_v4i32_v8i16:
1299; CHECK:       // %bb.0: // %entry
1300; CHECK-NEXT:    dup v0.8h, v0.h[6]
1301; CHECK-NEXT:    ret
1302entry:
1303  %x = extractelement <4 x i32> %a, i32 3
1304  %vget_lane = trunc i32 %x to i16
1305  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
1306  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
1307  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1308  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1309  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
1310  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
1311  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
1312  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
1313  ret <8 x i16> %vecinit7.i
1314}
1315
1316define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
1317; CHECK-LABEL: test_dup_v1i64_v4i16:
1318; CHECK:       // %bb.0: // %entry
1319; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1320; CHECK-NEXT:    dup v0.4h, v0.h[0]
1321; CHECK-NEXT:    ret
1322entry:
1323  %x = extractelement <1 x i64> %a, i32 0
1324  %vget_lane = trunc i64 %x to i16
1325  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1326  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1327  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1328  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1329  ret <4 x i16> %vecinit3.i
1330}
1331
1332define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
1333; CHECK-LABEL: test_dup_v1i64_v2i32:
1334; CHECK:       // %bb.0: // %entry
1335; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1336; CHECK-NEXT:    dup v0.2s, v0.s[0]
1337; CHECK-NEXT:    ret
1338entry:
1339  %x = extractelement <1 x i64> %a, i32 0
1340  %vget_lane = trunc i64 %x to i32
1341  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1342  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1343  ret <2 x i32> %vecinit1.i
1344}
1345
1346define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
1347; CHECK-LABEL: test_dup_v2i64_v8i16:
1348; CHECK:       // %bb.0: // %entry
1349; CHECK-NEXT:    dup v0.8h, v0.h[4]
1350; CHECK-NEXT:    ret
1351entry:
1352  %x = extractelement <2 x i64> %a, i32 1
1353  %vget_lane = trunc i64 %x to i16
1354  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
1355  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
1356  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1357  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1358  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
1359  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
1360  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
1361  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
1362  ret <8 x i16> %vecinit7.i
1363}
1364
1365define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
1366; CHECK-LABEL: test_dup_v2i64_v4i32:
1367; CHECK:       // %bb.0: // %entry
1368; CHECK-NEXT:    dup v0.4s, v0.s[2]
1369; CHECK-NEXT:    ret
1370entry:
1371  %x = extractelement <2 x i64> %a, i32 1
1372  %vget_lane = trunc i64 %x to i32
1373  %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
1374  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
1375  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
1376  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
1377  ret <4 x i32> %vecinit3.i
1378}
1379
1380define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
1381; CHECK-LABEL: test_dup_v4i32_v4i16:
1382; CHECK:       // %bb.0: // %entry
1383; CHECK-NEXT:    dup v0.4h, v0.h[2]
1384; CHECK-NEXT:    ret
1385entry:
1386  %x = extractelement <4 x i32> %a, i32 1
1387  %vget_lane = trunc i32 %x to i16
1388  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1389  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1390  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1391  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1392  ret <4 x i16> %vecinit3.i
1393}
1394
1395define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
1396; CHECK-LABEL: test_dup_v2i64_v4i16:
1397; CHECK:       // %bb.0: // %entry
1398; CHECK-NEXT:    dup v0.4h, v0.h[0]
1399; CHECK-NEXT:    ret
1400entry:
1401  %x = extractelement <2 x i64> %a, i32 0
1402  %vget_lane = trunc i64 %x to i16
1403  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1404  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1405  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1406  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1407  ret <4 x i16> %vecinit3.i
1408}
1409
1410define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
1411; CHECK-LABEL: test_dup_v2i64_v2i32:
1412; CHECK:       // %bb.0: // %entry
1413; CHECK-NEXT:    dup v0.2s, v0.s[0]
1414; CHECK-NEXT:    ret
1415entry:
1416  %x = extractelement <2 x i64> %a, i32 0
1417  %vget_lane = trunc i64 %x to i32
1418  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1419  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1420  ret <2 x i32> %vecinit1.i
1421}
1422
1423
1424define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
1425; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
1426; CHECK:       // %bb.0: // %entry
1427; CHECK-NEXT:    fmaxp s0, v0.2s
1428; CHECK-NEXT:    ret
1429entry:
1430  %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1431  %1 = insertelement <1 x float> undef, float %0, i32 0
1432  %2 = extractelement <1 x float> %1, i32 0
1433  %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
1434  ret <2 x float> %vecinit1.i
1435}
1436
1437define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
1438; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
1439; CHECK:       // %bb.0: // %entry
1440; CHECK-NEXT:    fmaxp s0, v0.2s
1441; CHECK-NEXT:    ret
1442entry:
1443  %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1444  %1 = insertelement <1 x float> undef, float %0, i32 0
1445  %2 = extractelement <1 x float> %1, i32 0
1446  %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
1447  ret <4 x float> %vecinit1.i
1448}
1449
1450declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
1451
1452define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
1453; CHECK-LABEL: test_concat_undef_v1i32:
1454; CHECK:       // %bb.0: // %entry
1455; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1456; CHECK-NEXT:    dup v0.2s, v0.s[0]
1457; CHECK-NEXT:    ret
1458entry:
1459  %0 = extractelement <2 x i32> %a, i32 0
1460  %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
1461  ret <2 x i32> %vecinit1.i
1462}
1463
1464declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
1465
1466define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
1467; CHECK-LABEL: test_concat_v1i32_undef:
1468; CHECK:       // %bb.0: // %entry
1469; CHECK-NEXT:    fmov s0, w0
1470; CHECK-NEXT:    sqabs s0, s0
1471; CHECK-NEXT:    ret
1472entry:
1473  %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1474  %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
1475  ret <2 x i32> %vecinit.i432
1476}
1477
1478define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
1479; CHECK-LABEL: test_concat_same_v1i32_v1i32:
1480; CHECK:       // %bb.0: // %entry
1481; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1482; CHECK-NEXT:    dup v0.2s, v0.s[0]
1483; CHECK-NEXT:    ret
1484entry:
1485  %0 = extractelement <2 x i32> %a, i32 0
1486  %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
1487  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
1488  ret <2 x i32> %vecinit1.i
1489}
1490
1491define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
1492; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
1493; CHECK:       // %bb.0: // %entry
1494; CHECK-NEXT:    fmov s1, w1
1495; CHECK-NEXT:    fmov s0, w0
1496; CHECK-NEXT:    sqabs s1, s1
1497; CHECK-NEXT:    sqabs s0, s0
1498; CHECK-NEXT:    fmov w8, s1
1499; CHECK-NEXT:    mov v0.s[1], w8
1500; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1501; CHECK-NEXT:    ret
1502entry:
1503  %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1504  %d = insertelement <2 x i32> undef, i32 %c, i32 0
1505  %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
1506  %f = insertelement <2 x i32> undef, i32 %e, i32 0
1507  %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
1508  ret <2 x i32> %h
1509}
1510
1511define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
1512; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
1513; CHECK:       // %bb.0: // %entry
1514; CHECK-NEXT:    mov v0.d[1], v1.d[0]
1515; CHECK-NEXT:    ret
1516entry:
1517  %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1518  ret <16 x i8> %vecinit30
1519}
1520
1521define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
1522; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
1523; CHECK:       // %bb.0: // %entry
1524; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1525; CHECK-NEXT:    mov v0.d[1], v1.d[0]
1526; CHECK-NEXT:    ret
1527entry:
1528  %vecext = extractelement <8 x i8> %x, i32 0
1529  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1530  %vecext1 = extractelement <8 x i8> %x, i32 1
1531  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1532  %vecext3 = extractelement <8 x i8> %x, i32 2
1533  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1534  %vecext5 = extractelement <8 x i8> %x, i32 3
1535  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1536  %vecext7 = extractelement <8 x i8> %x, i32 4
1537  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1538  %vecext9 = extractelement <8 x i8> %x, i32 5
1539  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1540  %vecext11 = extractelement <8 x i8> %x, i32 6
1541  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1542  %vecext13 = extractelement <8 x i8> %x, i32 7
1543  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1544  %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1545  ret <16 x i8> %vecinit30
1546}
1547
1548define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
1549; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
1550; CHECK:       // %bb.0: // %entry
1551; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1552; CHECK-NEXT:    mov v0.d[1], v1.d[0]
1553; CHECK-NEXT:    ret
1554entry:
1555  %vecext = extractelement <16 x i8> %x, i32 0
1556  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1557  %vecext1 = extractelement <16 x i8> %x, i32 1
1558  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1559  %vecext3 = extractelement <16 x i8> %x, i32 2
1560  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1561  %vecext5 = extractelement <16 x i8> %x, i32 3
1562  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1563  %vecext7 = extractelement <16 x i8> %x, i32 4
1564  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1565  %vecext9 = extractelement <16 x i8> %x, i32 5
1566  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1567  %vecext11 = extractelement <16 x i8> %x, i32 6
1568  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1569  %vecext13 = extractelement <16 x i8> %x, i32 7
1570  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1571  %vecext15 = extractelement <8 x i8> %y, i32 0
1572  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1573  %vecext17 = extractelement <8 x i8> %y, i32 1
1574  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1575  %vecext19 = extractelement <8 x i8> %y, i32 2
1576  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1577  %vecext21 = extractelement <8 x i8> %y, i32 3
1578  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1579  %vecext23 = extractelement <8 x i8> %y, i32 4
1580  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1581  %vecext25 = extractelement <8 x i8> %y, i32 5
1582  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1583  %vecext27 = extractelement <8 x i8> %y, i32 6
1584  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1585  %vecext29 = extractelement <8 x i8> %y, i32 7
1586  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1587  ret <16 x i8> %vecinit30
1588}
1589
1590define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
1591; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
1592; CHECK:       // %bb.0: // %entry
1593; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1594; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1595; CHECK-NEXT:    mov v0.d[1], v1.d[0]
1596; CHECK-NEXT:    ret
1597entry:
1598  %vecext = extractelement <8 x i8> %x, i32 0
1599  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1600  %vecext1 = extractelement <8 x i8> %x, i32 1
1601  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1602  %vecext3 = extractelement <8 x i8> %x, i32 2
1603  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1604  %vecext5 = extractelement <8 x i8> %x, i32 3
1605  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1606  %vecext7 = extractelement <8 x i8> %x, i32 4
1607  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1608  %vecext9 = extractelement <8 x i8> %x, i32 5
1609  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1610  %vecext11 = extractelement <8 x i8> %x, i32 6
1611  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1612  %vecext13 = extractelement <8 x i8> %x, i32 7
1613  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1614  %vecext15 = extractelement <8 x i8> %y, i32 0
1615  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1616  %vecext17 = extractelement <8 x i8> %y, i32 1
1617  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1618  %vecext19 = extractelement <8 x i8> %y, i32 2
1619  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1620  %vecext21 = extractelement <8 x i8> %y, i32 3
1621  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1622  %vecext23 = extractelement <8 x i8> %y, i32 4
1623  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1624  %vecext25 = extractelement <8 x i8> %y, i32 5
1625  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1626  %vecext27 = extractelement <8 x i8> %y, i32 6
1627  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1628  %vecext29 = extractelement <8 x i8> %y, i32 7
1629  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1630  ret <16 x i8> %vecinit30
1631}
1632
1633define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
1634; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
1635; CHECK:       // %bb.0: // %entry
1636; CHECK-NEXT:    mov v0.d[1], v1.d[0]
1637; CHECK-NEXT:    ret
1638entry:
1639  %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1640  ret <8 x i16> %vecinit14
1641}
1642
1643define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
1644; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
1645; CHECK:       // %bb.0: // %entry
1646; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1647; CHECK-NEXT:    mov v0.d[1], v1.d[0]
1648; CHECK-NEXT:    ret
1649entry:
1650  %vecext = extractelement <4 x i16> %x, i32 0
1651  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1652  %vecext1 = extractelement <4 x i16> %x, i32 1
1653  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1654  %vecext3 = extractelement <4 x i16> %x, i32 2
1655  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1656  %vecext5 = extractelement <4 x i16> %x, i32 3
1657  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1658  %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1659  ret <8 x i16> %vecinit14
1660}
1661
1662define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
1663; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
1664; CHECK:       // %bb.0: // %entry
1665; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1666; CHECK-NEXT:    mov v0.d[1], v1.d[0]
1667; CHECK-NEXT:    ret
1668entry:
1669  %vecext = extractelement <8 x i16> %x, i32 0
1670  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1671  %vecext1 = extractelement <8 x i16> %x, i32 1
1672  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1673  %vecext3 = extractelement <8 x i16> %x, i32 2
1674  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1675  %vecext5 = extractelement <8 x i16> %x, i32 3
1676  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1677  %vecext7 = extractelement <4 x i16> %y, i32 0
1678  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1679  %vecext9 = extractelement <4 x i16> %y, i32 1
1680  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1681  %vecext11 = extractelement <4 x i16> %y, i32 2
1682  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1683  %vecext13 = extractelement <4 x i16> %y, i32 3
1684  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1685  ret <8 x i16> %vecinit14
1686}
1687
1688define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
1689; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
1690; CHECK:       // %bb.0: // %entry
1691; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1692; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1693; CHECK-NEXT:    mov v0.d[1], v1.d[0]
1694; CHECK-NEXT:    ret
1695entry:
1696  %vecext = extractelement <4 x i16> %x, i32 0
1697  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1698  %vecext1 = extractelement <4 x i16> %x, i32 1
1699  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1700  %vecext3 = extractelement <4 x i16> %x, i32 2
1701  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1702  %vecext5 = extractelement <4 x i16> %x, i32 3
1703  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1704  %vecext7 = extractelement <4 x i16> %y, i32 0
1705  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1706  %vecext9 = extractelement <4 x i16> %y, i32 1
1707  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1708  %vecext11 = extractelement <4 x i16> %y, i32 2
1709  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1710  %vecext13 = extractelement <4 x i16> %y, i32 3
1711  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1712  ret <8 x i16> %vecinit14
1713}
1714
1715define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
1716; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
1717; CHECK:       // %bb.0: // %entry
1718; CHECK-NEXT:    mov v0.d[1], v1.d[0]
1719; CHECK-NEXT:    ret
1720entry:
1721  %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1722  ret <4 x i32> %vecinit6
1723}
1724
1725define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
1726; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
1727; CHECK:       // %bb.0: // %entry
1728; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1729; CHECK-NEXT:    mov v0.d[1], v1.d[0]
1730; CHECK-NEXT:    ret
1731entry:
1732  %vecext = extractelement <2 x i32> %x, i32 0
1733  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1734  %vecext1 = extractelement <2 x i32> %x, i32 1
1735  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1736  %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1737  ret <4 x i32> %vecinit6
1738}
1739
1740define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
1741; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
1742; CHECK:       // %bb.0: // %entry
1743; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1744; CHECK-NEXT:    mov v0.d[1], v1.d[0]
1745; CHECK-NEXT:    ret
1746entry:
1747  %vecext = extractelement <4 x i32> %x, i32 0
1748  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1749  %vecext1 = extractelement <4 x i32> %x, i32 1
1750  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1751  %vecext3 = extractelement <2 x i32> %y, i32 0
1752  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
1753  %vecext5 = extractelement <2 x i32> %y, i32 1
1754  %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
1755  ret <4 x i32> %vecinit6
1756}
1757
1758define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
1759; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
1760; CHECK:       // %bb.0: // %entry
1761; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1762; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1763; CHECK-NEXT:    mov v0.d[1], v1.d[0]
1764; CHECK-NEXT:    ret
1765entry:
1766  %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1767  ret <4 x i32> %vecinit6
1768}
1769
1770define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
1771; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
1772; CHECK:       // %bb.0: // %entry
1773; CHECK-NEXT:    zip1 v0.2d, v0.2d, v1.2d
1774; CHECK-NEXT:    ret
1775entry:
1776  %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1777  ret <2 x i64> %vecinit2
1778}
1779
1780define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
1781; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
1782; CHECK:       // %bb.0: // %entry
1783; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1784; CHECK-NEXT:    zip1 v0.2d, v0.2d, v1.2d
1785; CHECK-NEXT:    ret
1786entry:
1787  %vecext = extractelement <1 x i64> %x, i32 0
1788  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1789  %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1790  ret <2 x i64> %vecinit2
1791}
1792
1793define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
1794; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
1795; CHECK:       // %bb.0: // %entry
1796; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1797; CHECK-NEXT:    zip1 v0.2d, v0.2d, v1.2d
1798; CHECK-NEXT:    ret
1799entry:
1800  %vecext = extractelement <2 x i64> %x, i32 0
1801  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1802  %vecext1 = extractelement <1 x i64> %y, i32 0
1803  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1804  ret <2 x i64> %vecinit2
1805}
1806
1807define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
1808; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
1809; CHECK:       // %bb.0: // %entry
1810; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1811; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1812; CHECK-NEXT:    mov v0.d[1], v1.d[0]
1813; CHECK-NEXT:    ret
1814entry:
1815  %vecext = extractelement <1 x i64> %x, i32 0
1816  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1817  %vecext1 = extractelement <1 x i64> %y, i32 0
1818  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1819  ret <2 x i64> %vecinit2
1820}
1821
1822
1823define <4 x i16> @concat_vector_v4i16_const() {
1824; CHECK-LABEL: concat_vector_v4i16_const:
1825; CHECK:       // %bb.0:
1826; CHECK-NEXT:    movi v0.2d, #0000000000000000
1827; CHECK-NEXT:    ret
1828 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
1829 ret <4 x i16> %r
1830}
1831
1832define <4 x i16> @concat_vector_v4i16_const_one() {
1833; CHECK-LABEL: concat_vector_v4i16_const_one:
1834; CHECK:       // %bb.0:
1835; CHECK-NEXT:    movi v0.4h, #1
1836; CHECK-NEXT:    ret
1837 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
1838 ret <4 x i16> %r
1839}
1840
1841define <4 x i32> @concat_vector_v4i32_const() {
1842; CHECK-LABEL: concat_vector_v4i32_const:
1843; CHECK:       // %bb.0:
1844; CHECK-NEXT:    movi v0.2d, #0000000000000000
1845; CHECK-NEXT:    ret
1846 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
1847 ret <4 x i32> %r
1848}
1849
1850define <8 x i8> @concat_vector_v8i8_const() {
1851; CHECK-LABEL: concat_vector_v8i8_const:
1852; CHECK:       // %bb.0:
1853; CHECK-NEXT:    movi v0.2d, #0000000000000000
1854; CHECK-NEXT:    ret
1855 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
1856 ret <8 x i8> %r
1857}
1858
1859define <8 x i16> @concat_vector_v8i16_const() {
1860; CHECK-LABEL: concat_vector_v8i16_const:
1861; CHECK:       // %bb.0:
1862; CHECK-NEXT:    movi v0.2d, #0000000000000000
1863; CHECK-NEXT:    ret
1864 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
1865 ret <8 x i16> %r
1866}
1867
1868define <8 x i16> @concat_vector_v8i16_const_one() {
1869; CHECK-LABEL: concat_vector_v8i16_const_one:
1870; CHECK:       // %bb.0:
1871; CHECK-NEXT:    movi v0.8h, #1
1872; CHECK-NEXT:    ret
1873 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
1874 ret <8 x i16> %r
1875}
1876
1877define <16 x i8> @concat_vector_v16i8_const() {
1878; CHECK-LABEL: concat_vector_v16i8_const:
1879; CHECK:       // %bb.0:
1880; CHECK-NEXT:    movi v0.2d, #0000000000000000
1881; CHECK-NEXT:    ret
1882 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
1883 ret <16 x i8> %r
1884}
1885
1886define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
1887; CHECK-LABEL: concat_vector_v4i16:
1888; CHECK:       // %bb.0:
1889; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1890; CHECK-NEXT:    dup v0.4h, v0.h[0]
1891; CHECK-NEXT:    ret
1892 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
1893 ret <4 x i16> %r
1894}
1895
1896define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
1897; CHECK-LABEL: concat_vector_v4i32:
1898; CHECK:       // %bb.0:
1899; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1900; CHECK-NEXT:    dup v0.4s, v0.s[0]
1901; CHECK-NEXT:    ret
1902 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
1903 ret <4 x i32> %r
1904}
1905
1906define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
1907; CHECK-LABEL: concat_vector_v8i8:
1908; CHECK:       // %bb.0:
1909; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1910; CHECK-NEXT:    dup v0.8b, v0.b[0]
1911; CHECK-NEXT:    ret
1912 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
1913 ret <8 x i8> %r
1914}
1915
1916define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
1917; CHECK-LABEL: concat_vector_v8i16:
1918; CHECK:       // %bb.0:
1919; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1920; CHECK-NEXT:    dup v0.8h, v0.h[0]
1921; CHECK-NEXT:    ret
1922 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
1923 ret <8 x i16> %r
1924}
1925
1926define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
1927; CHECK-LABEL: concat_vector_v16i8:
1928; CHECK:       // %bb.0:
1929; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1930; CHECK-NEXT:    dup v0.16b, v0.b[0]
1931; CHECK-NEXT:    ret
1932 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
1933 ret <16 x i8> %r
1934}
1935