• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s --check-prefixes=CHECK
3; RUN: FileCheck --check-prefix=WARN --allow-empty %s < %t
4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
5; WARN-NOT: warning
6
7define <vscale x 2 x i64> @insert_v2i64_nxv2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind {
8; CHECK-LABEL: insert_v2i64_nxv2i64:
9; CHECK:       // %bb.0:
10; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
11; CHECK-NEXT:    addvl sp, sp, #-1
12; CHECK-NEXT:    cntd x8
13; CHECK-NEXT:    sub x8, x8, #1 // =1
14; CHECK-NEXT:    cmp x8, #0 // =0
15; CHECK-NEXT:    csel x8, x8, xzr, lo
16; CHECK-NEXT:    ptrue p0.d
17; CHECK-NEXT:    lsl x8, x8, #3
18; CHECK-NEXT:    mov x9, sp
19; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
20; CHECK-NEXT:    str q1, [x9, x8]
21; CHECK-NEXT:    ld1d { z0.d }, p0/z, [sp]
22; CHECK-NEXT:    addvl sp, sp, #1
23; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
24; CHECK-NEXT:    ret
25  %retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 0)
26  ret <vscale x 2 x i64> %retval
27}
28
29define <vscale x 2 x i64> @insert_v2i64_nxv2i64_idx1(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind {
30; CHECK-LABEL: insert_v2i64_nxv2i64_idx1:
31; CHECK:       // %bb.0:
32; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
33; CHECK-NEXT:    addvl sp, sp, #-1
34; CHECK-NEXT:    cntd x8
35; CHECK-NEXT:    sub x8, x8, #1 // =1
36; CHECK-NEXT:    cmp x8, #1 // =1
37; CHECK-NEXT:    csinc x8, x8, xzr, lo
38; CHECK-NEXT:    ptrue p0.d
39; CHECK-NEXT:    lsl x8, x8, #3
40; CHECK-NEXT:    mov x9, sp
41; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
42; CHECK-NEXT:    str q1, [x9, x8]
43; CHECK-NEXT:    ld1d { z0.d }, p0/z, [sp]
44; CHECK-NEXT:    addvl sp, sp, #1
45; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
46; CHECK-NEXT:    ret
47  %retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 1)
48  ret <vscale x 2 x i64> %retval
49}
50
51define <vscale x 4 x i32> @insert_v4i32_nxv4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind {
52; CHECK-LABEL: insert_v4i32_nxv4i32:
53; CHECK:       // %bb.0:
54; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
55; CHECK-NEXT:    addvl sp, sp, #-1
56; CHECK-NEXT:    cntw x8
57; CHECK-NEXT:    sub x8, x8, #1 // =1
58; CHECK-NEXT:    cmp x8, #0 // =0
59; CHECK-NEXT:    csel x8, x8, xzr, lo
60; CHECK-NEXT:    ptrue p0.s
61; CHECK-NEXT:    lsl x8, x8, #2
62; CHECK-NEXT:    mov x9, sp
63; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
64; CHECK-NEXT:    str q1, [x9, x8]
65; CHECK-NEXT:    ld1w { z0.s }, p0/z, [sp]
66; CHECK-NEXT:    addvl sp, sp, #1
67; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
68; CHECK-NEXT:    ret
69  %retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 0)
70  ret <vscale x 4 x i32> %retval
71}
72
73define <vscale x 4 x i32> @insert_v4i32_nxv4i32_idx1(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind {
74; CHECK-LABEL: insert_v4i32_nxv4i32_idx1:
75; CHECK:       // %bb.0:
76; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
77; CHECK-NEXT:    addvl sp, sp, #-1
78; CHECK-NEXT:    cntw x8
79; CHECK-NEXT:    sub x8, x8, #1 // =1
80; CHECK-NEXT:    cmp x8, #1 // =1
81; CHECK-NEXT:    csinc x8, x8, xzr, lo
82; CHECK-NEXT:    ptrue p0.s
83; CHECK-NEXT:    lsl x8, x8, #2
84; CHECK-NEXT:    mov x9, sp
85; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
86; CHECK-NEXT:    str q1, [x9, x8]
87; CHECK-NEXT:    ld1w { z0.s }, p0/z, [sp]
88; CHECK-NEXT:    addvl sp, sp, #1
89; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
90; CHECK-NEXT:    ret
91  %retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 1)
92  ret <vscale x 4 x i32> %retval
93}
94
95define <vscale x 8 x i16> @insert_v8i16_nxv8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind {
96; CHECK-LABEL: insert_v8i16_nxv8i16:
97; CHECK:       // %bb.0:
98; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
99; CHECK-NEXT:    addvl sp, sp, #-1
100; CHECK-NEXT:    cnth x8
101; CHECK-NEXT:    sub x8, x8, #1 // =1
102; CHECK-NEXT:    cmp x8, #0 // =0
103; CHECK-NEXT:    csel x8, x8, xzr, lo
104; CHECK-NEXT:    ptrue p0.h
105; CHECK-NEXT:    lsl x8, x8, #1
106; CHECK-NEXT:    mov x9, sp
107; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
108; CHECK-NEXT:    str q1, [x9, x8]
109; CHECK-NEXT:    ld1h { z0.h }, p0/z, [sp]
110; CHECK-NEXT:    addvl sp, sp, #1
111; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
112; CHECK-NEXT:    ret
113  %retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 0)
114  ret <vscale x 8 x i16> %retval
115}
116
117define <vscale x 8 x i16> @insert_v8i16_nxv8i16_idx1(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind {
118; CHECK-LABEL: insert_v8i16_nxv8i16_idx1:
119; CHECK:       // %bb.0:
120; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
121; CHECK-NEXT:    addvl sp, sp, #-1
122; CHECK-NEXT:    cnth x8
123; CHECK-NEXT:    sub x8, x8, #1 // =1
124; CHECK-NEXT:    cmp x8, #1 // =1
125; CHECK-NEXT:    csinc x8, x8, xzr, lo
126; CHECK-NEXT:    ptrue p0.h
127; CHECK-NEXT:    lsl x8, x8, #1
128; CHECK-NEXT:    mov x9, sp
129; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
130; CHECK-NEXT:    str q1, [x9, x8]
131; CHECK-NEXT:    ld1h { z0.h }, p0/z, [sp]
132; CHECK-NEXT:    addvl sp, sp, #1
133; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
134; CHECK-NEXT:    ret
135  %retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 1)
136  ret <vscale x 8 x i16> %retval
137}
138
139define <vscale x 16 x i8> @insert_v16i8_nxv16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind {
140; CHECK-LABEL: insert_v16i8_nxv16i8:
141; CHECK:       // %bb.0:
142; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
143; CHECK-NEXT:    addvl sp, sp, #-1
144; CHECK-NEXT:    rdvl x8, #1
145; CHECK-NEXT:    sub x8, x8, #1 // =1
146; CHECK-NEXT:    cmp x8, #0 // =0
147; CHECK-NEXT:    ptrue p0.b
148; CHECK-NEXT:    csel x8, x8, xzr, lo
149; CHECK-NEXT:    mov x9, sp
150; CHECK-NEXT:    st1b { z0.b }, p0, [sp]
151; CHECK-NEXT:    str q1, [x9, x8]
152; CHECK-NEXT:    ld1b { z0.b }, p0/z, [sp]
153; CHECK-NEXT:    addvl sp, sp, #1
154; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
155; CHECK-NEXT:    ret
156  %retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 0)
157  ret <vscale x 16 x i8> %retval
158}
159
160define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx1(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind {
161; CHECK-LABEL: insert_v16i8_nxv16i8_idx1:
162; CHECK:       // %bb.0:
163; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
164; CHECK-NEXT:    addvl sp, sp, #-1
165; CHECK-NEXT:    rdvl x8, #1
166; CHECK-NEXT:    sub x8, x8, #1 // =1
167; CHECK-NEXT:    cmp x8, #1 // =1
168; CHECK-NEXT:    ptrue p0.b
169; CHECK-NEXT:    csinc x8, x8, xzr, lo
170; CHECK-NEXT:    mov x9, sp
171; CHECK-NEXT:    st1b { z0.b }, p0, [sp]
172; CHECK-NEXT:    str q1, [x9, x8]
173; CHECK-NEXT:    ld1b { z0.b }, p0/z, [sp]
174; CHECK-NEXT:    addvl sp, sp, #1
175; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
176; CHECK-NEXT:    ret
177  %retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 1)
178  ret <vscale x 16 x i8> %retval
179}
180
181declare <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64>, <2 x i64>, i64)
182declare <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32>, <4 x i32>, i64)
183declare <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16>, <8 x i16>, i64)
184declare <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8>, <16 x i8>, i64)
185