1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s 2>%t | FileCheck %s 2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t 3 4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. 5; WARN-NOT: warning 6 7; 8; LDNT1B, LDNT1W, LDNT1H, LDNT1D: base + 32-bit unscaled offsets, zero (uxtw) 9; extended to 64 bits. 10; e.g. ldnt1h { z0.s }, p0/z, [z0.s, x0] 11; 12 13; LDNT1B 14define <vscale x 4 x i32> @gldnt1b_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) { 15; CHECK-LABEL: gldnt1b_s_uxtw: 16; CHECK: ldnt1b { z0.s }, p0/z, [z0.s, x0] 17; CHECK-NEXT: ret 18 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg, 19 i8* %base, 20 <vscale x 4 x i32> %b) 21 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32> 22 ret <vscale x 4 x i32> %res 23} 24 25; LDNT1H 26define <vscale x 4 x i32> @gldnt1h_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) { 27; CHECK-LABEL: gldnt1h_s_uxtw: 28; CHECK: ldnt1h { z0.s }, p0/z, [z0.s, x0] 29; CHECK-NEXT: ret 30 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg, 31 i16* %base, 32 <vscale x 4 x i32> %b) 33 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32> 34 ret <vscale x 4 x i32> %res 35} 36 37; LDNT1W 38define <vscale x 4 x i32> @gldnt1w_s_uxtw(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) { 39; CHECK-LABEL: gldnt1w_s_uxtw: 40; CHECK: ldnt1w { z0.s }, p0/z, [z0.s, x0] 41; CHECK-NEXT: ret 42 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1> %pg, 43 i32* %base, 44 <vscale x 4 x i32> %b) 45 ret <vscale x 4 x i32> %load 46} 47 48define <vscale x 4 x float> @gldnt1w_s_uxtw_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) { 49; CHECK-LABEL: gldnt1w_s_uxtw_float: 50; CHECK: ldnt1w { z0.s }, p0/z, [z0.s, x0] 51; CHECK-NEXT: ret 52 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4f32(<vscale x 4 x i1> %pg, 53 float* %base, 54 <vscale x 4 x i32> %b) 55 ret <vscale x 4 x float> %load 56} 57 58; LDNT1SB, LDNT1SW, LDNT1SH: base + 32-bit unscaled offsets, zero (uxtw) 59; extended to 64 bits. 60; e.g. ldnt1sh { z0.s }, p0/z, [z0.s, x0] 61; 62 63; LDNT1SB 64define <vscale x 4 x i32> @gldnt1sb_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) { 65; CHECK-LABEL: gldnt1sb_s_uxtw: 66; CHECK: ldnt1sb { z0.s }, p0/z, [z0.s, x0] 67; CHECK-NEXT: ret 68 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg, 69 i8* %base, 70 <vscale x 4 x i32> %b) 71 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32> 72 ret <vscale x 4 x i32> %res 73} 74 75; LDNT1SH 76define <vscale x 4 x i32> @gldnt1sh_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) { 77; CHECK-LABEL: gldnt1sh_s_uxtw: 78; CHECK: ldnt1sh { z0.s }, p0/z, [z0.s, x0] 79; CHECK-NEXT: ret 80 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg, 81 i16* %base, 82 <vscale x 4 x i32> %b) 83 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32> 84 ret <vscale x 4 x i32> %res 85} 86 87; LDNT1B/LDNT1SB 88declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1>, i8*, <vscale x 4 x i32>) 89declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4i8(<vscale x 4 x i1>, i8*, <vscale x 4 x i32>) 90 91; LDNT1H/LDNT1SH 92declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>) 93declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>) 94 95; LDNT1W/LDNT1SW 96declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>) 97declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>) 98 99declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>) 100declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>) 101