• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s 2>%t | FileCheck %s
2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
3
4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
5; WARN-NOT: warning
6
7;
8; LDNT1B, LDNT1W, LDNT1H, LDNT1D: base + 32-bit unscaled offsets, zero (uxtw)
9; extended to 64 bits.
10;   e.g. ldnt1h { z0.s }, p0/z, [z0.s, x0]
11;
12
13; LDNT1B
14define <vscale x 4 x i32> @gldnt1b_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
15; CHECK-LABEL: gldnt1b_s_uxtw:
16; CHECK: ldnt1b { z0.s }, p0/z, [z0.s, x0]
17; CHECK-NEXT: ret
18  %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
19                                                                            i8* %base,
20                                                                            <vscale x 4 x i32> %b)
21  %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
22  ret <vscale x 4 x i32> %res
23}
24
25; LDNT1H
26define <vscale x 4 x i32> @gldnt1h_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
27; CHECK-LABEL: gldnt1h_s_uxtw:
28; CHECK: ldnt1h { z0.s }, p0/z, [z0.s, x0]
29; CHECK-NEXT:	ret
30  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
31                                                                              i16* %base,
32                                                                              <vscale x 4 x i32> %b)
33  %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
34  ret <vscale x 4 x i32> %res
35}
36
37; LDNT1W
38define <vscale x 4 x i32> @gldnt1w_s_uxtw(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
39; CHECK-LABEL: gldnt1w_s_uxtw:
40; CHECK: ldnt1w { z0.s }, p0/z, [z0.s, x0]
41; CHECK-NEXT:	ret
42  %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1> %pg,
43                                                                              i32* %base,
44                                                                              <vscale x 4 x i32> %b)
45  ret <vscale x 4 x i32> %load
46}
47
48define <vscale x 4 x float> @gldnt1w_s_uxtw_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
49; CHECK-LABEL: gldnt1w_s_uxtw_float:
50; CHECK: ldnt1w { z0.s }, p0/z, [z0.s, x0]
51; CHECK-NEXT:	ret
52  %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4f32(<vscale x 4 x i1> %pg,
53                                                                                float* %base,
54                                                                                <vscale x 4 x i32> %b)
55  ret <vscale x 4 x float> %load
56}
57
58; LDNT1SB, LDNT1SW, LDNT1SH: base + 32-bit unscaled offsets, zero (uxtw)
59; extended to 64 bits.
60;   e.g. ldnt1sh { z0.s }, p0/z, [z0.s, x0]
61;
62
63; LDNT1SB
64define <vscale x 4 x i32> @gldnt1sb_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
65; CHECK-LABEL: gldnt1sb_s_uxtw:
66; CHECK: ldnt1sb { z0.s }, p0/z, [z0.s, x0]
67; CHECK-NEXT: ret
68  %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
69                                                                            i8* %base,
70                                                                            <vscale x 4 x i32> %b)
71  %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
72  ret <vscale x 4 x i32> %res
73}
74
75; LDNT1SH
76define <vscale x 4 x i32> @gldnt1sh_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
77; CHECK-LABEL: gldnt1sh_s_uxtw:
78; CHECK: ldnt1sh { z0.s }, p0/z, [z0.s, x0]
79; CHECK-NEXT:	ret
80  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
81                                                                              i16* %base,
82                                                                              <vscale x 4 x i32> %b)
83  %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
84  ret <vscale x 4 x i32> %res
85}
86
87; LDNT1B/LDNT1SB
88declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
89declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4i8(<vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
90
91; LDNT1H/LDNT1SH
92declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
93declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
94
95; LDNT1W/LDNT1SW
96declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
97declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
98
99declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
100declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
101