1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s 2>%t | FileCheck %s 2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t 3 4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. 5; WARN-NOT: warning 6 7; 8; LDNT1B, LDNT1W, LDNT1H, LDNT1D: vector base + scalar offset 9; ldnt1b { z0.s }, p0/z, [z0.s, x0] 10; 11 12; LDNT1B 13define <vscale x 4 x i32> @gldnt1b_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 14; CHECK-LABEL: gldnt1b_s: 15; CHECK: ldnt1b { z0.s }, p0/z, [z0.s, x0] 16; CHECK-NEXT: ret 17 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg, 18 <vscale x 4 x i32> %base, 19 i64 %offset) 20 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32> 21 ret <vscale x 4 x i32> %res 22} 23 24define <vscale x 2 x i64> @gldnt1b_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 25; CHECK-LABEL: gldnt1b_d: 26; CHECK: ldnt1b { z0.d }, p0/z, [z0.d, x0] 27; CHECK-NEXT: ret 28 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg, 29 <vscale x 2 x i64> %base, 30 i64 %offset) 31 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64> 32 ret <vscale x 2 x i64> %res 33} 34 35; LDNT1H 36define <vscale x 4 x i32> @gldnt1h_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 37; CHECK-LABEL: gldnt1h_s: 38; CHECK: ldnt1h { z0.s }, p0/z, [z0.s, x0] 39; CHECK-NEXT: ret 40 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv416.nxv4i32(<vscale x 4 x i1> %pg, 41 <vscale x 4 x i32> %base, 42 i64 %offset) 43 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32> 44 ret <vscale x 4 x i32> %res 45} 46 47define <vscale x 2 x i64> @gldnt1h_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 48; CHECK-LABEL: gldnt1h_d: 49; CHECK: ldnt1h { z0.d }, p0/z, [z0.d, x0] 50; CHECK-NEXT: ret 51 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg, 52 <vscale x 2 x i64> %base, 53 i64 %offset) 54 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64> 55 ret <vscale x 2 x i64> %res 56} 57 58; LDNT1W 59define <vscale x 4 x i32> @gldnt1w_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 60; CHECK-LABEL: gldnt1w_s: 61; CHECK: ldnt1w { z0.s }, p0/z, [z0.s, x0] 62; CHECK-NEXT: ret 63 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg, 64 <vscale x 4 x i32> %base, 65 i64 %offset) 66 ret <vscale x 4 x i32> %load 67} 68 69define <vscale x 4 x float> @gldnt1w_s_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 70; CHECK-LABEL: gldnt1w_s_float: 71; CHECK: ldnt1w { z0.s }, p0/z, [z0.s, x0] 72; CHECK-NEXT: ret 73 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg, 74 <vscale x 4 x i32> %base, 75 i64 %offset) 76 ret <vscale x 4 x float> %load 77} 78 79define <vscale x 2 x i64> @gldnt1w_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 80; CHECK-LABEL: gldnt1w_d: 81; CHECK: ldnt1w { z0.d }, p0/z, [z0.d, x0] 82; CHECK-NEXT: ret 83 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg, 84 <vscale x 2 x i64> %base, 85 i64 %offset) 86 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64> 87 ret <vscale x 2 x i64> %res 88} 89 90; LDNT1D 91define <vscale x 2 x i64> @gldnt1d_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 92; CHECK-LABEL: gldnt1d_d: 93; CHECK: ldnt1d { z0.d }, p0/z, [z0.d, x0] 94; CHECK-NEXT: ret 95 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg, 96 <vscale x 2 x i64> %base, 97 i64 %offset) 98 ret <vscale x 2 x i64> %load 99} 100 101; LDNT1D 102define <vscale x 2 x double> @gldnt1d_d_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 103; CHECK-LABEL: gldnt1d_d_double: 104; CHECK: ldnt1d { z0.d }, p0/z, [z0.d, x0] 105; CHECK-NEXT: ret 106 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg, 107 <vscale x 2 x i64> %base, 108 i64 %offset) 109 ret <vscale x 2 x double> %load 110} 111 112; 113; LDNT1SB, LDNT1SW, LDNT1SH, LDNT1SD: vector base + scalar offset 114; ldnt1sb { z0.s }, p0/z, [z0.s, x0] 115; 116 117; LDNT1SB 118define <vscale x 4 x i32> @gldnt1sb_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 119; CHECK-LABEL: gldnt1sb_s: 120; CHECK: ldnt1sb { z0.s }, p0/z, [z0.s, x0] 121; CHECK-NEXT: ret 122 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg, 123 <vscale x 4 x i32> %base, 124 i64 %offset) 125 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32> 126 ret <vscale x 4 x i32> %res 127} 128 129define <vscale x 2 x i64> @gldnt1sb_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 130; CHECK-LABEL: gldnt1sb_d: 131; CHECK: ldnt1sb { z0.d }, p0/z, [z0.d, x0] 132; CHECK-NEXT: ret 133 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg, 134 <vscale x 2 x i64> %base, 135 i64 %offset) 136 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64> 137 ret <vscale x 2 x i64> %res 138} 139 140; LDNT1SH 141define <vscale x 4 x i32> @gldnt1sh_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 142; CHECK-LABEL: gldnt1sh_s: 143; CHECK: ldnt1sh { z0.s }, p0/z, [z0.s, x0] 144; CHECK-NEXT: ret 145 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv416.nxv4i32(<vscale x 4 x i1> %pg, 146 <vscale x 4 x i32> %base, 147 i64 %offset) 148 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32> 149 ret <vscale x 4 x i32> %res 150} 151 152define <vscale x 2 x i64> @gldnt1sh_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 153; CHECK-LABEL: gldnt1sh_d: 154; CHECK: ldnt1sh { z0.d }, p0/z, [z0.d, x0] 155; CHECK-NEXT: ret 156 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg, 157 <vscale x 2 x i64> %base, 158 i64 %offset) 159 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64> 160 ret <vscale x 2 x i64> %res 161} 162 163; LDNT1SW 164define <vscale x 2 x i64> @gldnt1sw_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 165; CHECK-LABEL: gldnt1sw_d: 166; CHECK: ldnt1sw { z0.d }, p0/z, [z0.d, x0] 167; CHECK-NEXT: ret 168 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg, 169 <vscale x 2 x i64> %base, 170 i64 %offset) 171 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64> 172 ret <vscale x 2 x i64> %res 173} 174 175; LDNT1B/LDNT1SB 176declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64) 177declare <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64) 178 179; LDNT1H/LDNT1SH 180declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv416.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64) 181declare <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64) 182 183; LDNT1W/LDNT1SW 184declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64) 185declare <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64) 186 187declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64) 188 189; LDNT1D 190declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64) 191 192declare <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64) 193