1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s 2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t 3 4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. 5; WARN-NOT: warning 6 7; 8; LD1B 9; 10 11define <vscale x 16 x i8> @ld1b_i8(<vscale x 16 x i1> %pg, i8* %a, i64 %index) { 12; CHECK-LABEL: ld1b_i8 13; CHECK: ld1b { z0.b }, p0/z, [x0, x1] 14; CHECK-NEXT: ret 15 %base = getelementptr i8, i8* %a, i64 %index 16 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base) 17 ret <vscale x 16 x i8> %load 18} 19 20define <vscale x 8 x i16> @ld1b_h(<vscale x 8 x i1> %pred, i8* %a, i64 %index) { 21; CHECK-LABEL: ld1b_h: 22; CHECK: ld1b { z0.h }, p0/z, [x0, x1] 23; CHECK-NEXT: ret 24 %base = getelementptr i8, i8* %a, i64 %index 25 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pred, i8* %base) 26 %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16> 27 ret <vscale x 8 x i16> %res 28} 29 30define <vscale x 8 x i16> @ld1sb_h(<vscale x 8 x i1> %pred, i8* %a, i64 %index) { 31; CHECK-LABEL: ld1sb_h: 32; CHECK: ld1sb { z0.h }, p0/z, [x0, x1] 33; CHECK-NEXT: ret 34 %base = getelementptr i8, i8* %a, i64 %index 35 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pred, i8* %base) 36 %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16> 37 ret <vscale x 8 x i16> %res 38} 39 40define <vscale x 4 x i32> @ld1b_s(<vscale x 4 x i1> %pred, i8* %a, i64 %index) { 41; CHECK-LABEL: ld1b_s: 42; CHECK: ld1b { z0.s }, p0/z, [x0, x1] 43; CHECK-NEXT: ret 44 %base = getelementptr i8, i8* %a, i64 %index 45 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pred, i8* %base) 46 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32> 47 ret <vscale x 4 x i32> %res 48} 49 50define <vscale x 4 x i32> @ld1sb_s(<vscale x 4 x i1> %pred, i8* %a, i64 %index) { 51; CHECK-LABEL: ld1sb_s: 52; CHECK: ld1sb { z0.s }, p0/z, [x0, x1] 53; CHECK-NEXT: ret 54 %base = getelementptr i8, i8* %a, i64 %index 55 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pred, i8* %base) 56 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32> 57 ret <vscale x 4 x i32> %res 58} 59 60define <vscale x 2 x i64> @ld1b_d(<vscale x 2 x i1> %pred, i8* %a, i64 %index) { 61; CHECK-LABEL: ld1b_d: 62; CHECK: ld1b { z0.d }, p0/z, [x0, x1] 63; CHECK-NEXT: ret 64 %base = getelementptr i8, i8* %a, i64 %index 65 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pred, i8* %base) 66 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64> 67 ret <vscale x 2 x i64> %res 68} 69 70define <vscale x 2 x i64> @ld1sb_d(<vscale x 2 x i1> %pred, i8* %a, i64 %index) { 71; CHECK-LABEL: ld1sb_d: 72; CHECK: ld1sb { z0.d }, p0/z, [x0, x1] 73; CHECK-NEXT: ret 74 %base = getelementptr i8, i8* %a, i64 %index 75 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pred, i8* %base) 76 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64> 77 ret <vscale x 2 x i64> %res 78} 79 80; 81; LD1H 82; 83 84define <vscale x 8 x i16> @ld1h_i16(<vscale x 8 x i1> %pg, i16* %a, i64 %index) { 85; CHECK-LABEL: ld1h_i16 86; CHECK: ld1h { z0.h }, p0/z, [x0, x1, lsl #1] 87; CHECK-NEXT: ret 88 %base = getelementptr i16, i16* %a, i64 %index 89 %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> %pg, i16* %base) 90 ret <vscale x 8 x i16> %load 91} 92 93define <vscale x 8 x half> @ld1h_f16(<vscale x 8 x i1> %pg, half* %a, i64 %index) { 94; CHECK-LABEL: ld1h_f16 95; CHECK: ld1h { z0.h }, p0/z, [x0, x1, lsl #1] 96; CHECK-NEXT: ret 97 %base = getelementptr half, half* %a, i64 %index 98 %load = call <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1> %pg, half* %base) 99 ret <vscale x 8 x half> %load 100} 101 102define <vscale x 8 x bfloat> @ld1h_bf16(<vscale x 8 x i1> %pg, bfloat* %a, i64 %index) #0 { 103; CHECK-LABEL: ld1h_bf16 104; CHECK: ld1h { z0.h }, p0/z, [x0, x1, lsl #1] 105; CHECK-NEXT: ret 106 %base = getelementptr bfloat, bfloat* %a, i64 %index 107 %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1> %pg, bfloat* %base) 108 ret <vscale x 8 x bfloat> %load 109} 110 111define <vscale x 4 x i32> @ld1h_s(<vscale x 4 x i1> %pred, i16* %a, i64 %index) { 112; CHECK-LABEL: ld1h_s: 113; CHECK: ld1h { z0.s }, p0/z, [x0, x1, lsl #1] 114; CHECK-NEXT: ret 115 %base = getelementptr i16, i16* %a, i64 %index 116 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pred, i16* %base) 117 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32> 118 ret <vscale x 4 x i32> %res 119} 120 121define <vscale x 4 x i32> @ld1sh_s(<vscale x 4 x i1> %pred, i16* %a, i64 %index) { 122; CHECK-LABEL: ld1sh_s: 123; CHECK: ld1sh { z0.s }, p0/z, [x0, x1, lsl #1] 124; CHECK-NEXT: ret 125 %base = getelementptr i16, i16* %a, i64 %index 126 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pred, i16* %base) 127 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32> 128 ret <vscale x 4 x i32> %res 129} 130 131define <vscale x 2 x i64> @ld1h_d(<vscale x 2 x i1> %pred, i16* %a, i64 %index) { 132; CHECK-LABEL: ld1h_d: 133; CHECK: ld1h { z0.d }, p0/z, [x0, x1, lsl #1] 134; CHECK-NEXT: ret 135 %base = getelementptr i16, i16* %a, i64 %index 136 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pred, i16* %base) 137 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64> 138 ret <vscale x 2 x i64> %res 139} 140 141define <vscale x 2 x i64> @ld1sh_d(<vscale x 2 x i1> %pred, i16* %a, i64 %index) { 142; CHECK-LABEL: ld1sh_d: 143; CHECK: ld1sh { z0.d }, p0/z, [x0, x1, lsl #1] 144; CHECK-NEXT: ret 145 %base = getelementptr i16, i16* %a, i64 %index 146 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pred, i16* %base) 147 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64> 148 ret <vscale x 2 x i64> %res 149} 150 151; 152; LD1W 153; 154 155define<vscale x 4 x i32> @ld1w(<vscale x 4 x i1> %pg, i32* %a, i64 %index) { 156; CHECK-LABEL: ld1w 157; CHECK: ld1w { z0.s }, p0/z, [x0, x1, lsl #2] 158; CHECK-NEXT: ret 159 %base = getelementptr i32, i32* %a, i64 %index 160 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %pg, i32* %base) 161 ret <vscale x 4 x i32> %load 162} 163 164define<vscale x 4 x float> @ld1w_f32(<vscale x 4 x i1> %pg, float* %a, i64 %index) { 165; CHECK-LABEL: ld1w_f32 166; CHECK: ld1w { z0.s }, p0/z, [x0, x1, lsl #2] 167; CHECK-NEXT: ret 168 %base = getelementptr float, float* %a, i64 %index 169 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1> %pg, float* %base) 170 ret <vscale x 4 x float> %load 171} 172 173define <vscale x 2 x i64> @ld1w_d(<vscale x 2 x i1> %pred, i32* %a, i64 %index) { 174; CHECK-LABEL: ld1w_d: 175; CHECK: ld1w { z0.d }, p0/z, [x0, x1, lsl #2] 176; CHECK-NEXT: ret 177 %base = getelementptr i32, i32* %a, i64 %index 178 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pred, i32* %base) 179 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64> 180 ret <vscale x 2 x i64> %res 181} 182 183define <vscale x 2 x i64> @ld1sw_d(<vscale x 2 x i1> %pred, i32* %a, i64 %index) { 184; CHECK-LABEL: ld1sw_d: 185; CHECK: ld1sw { z0.d }, p0/z, [x0, x1, lsl #2] 186; CHECK-NEXT: ret 187 %base = getelementptr i32, i32* %a, i64 %index 188 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pred, i32* %base) 189 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64> 190 ret <vscale x 2 x i64> %res 191} 192 193; 194; LD1D 195; 196 197define <vscale x 2 x i64> @ld1d(<vscale x 2 x i1> %pg, i64* %a, i64 %index) { 198; CHECK-LABEL: ld1d 199; CHECK: ld1d { z0.d }, p0/z, [x0, x1, lsl #3] 200; CHECK-NEXT: ret 201 %base = getelementptr i64, i64* %a, i64 %index 202 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> %pg, i64* %base) 203 ret <vscale x 2 x i64> %load 204} 205 206define <vscale x 2 x double> @ld1d_f64(<vscale x 2 x i1> %pg, double* %a, i64 %index) { 207; CHECK-LABEL: ld1d_f64 208; CHECK: ld1d { z0.d }, p0/z, [x0, x1, lsl #3] 209; CHECK-NEXT: ret 210 %base = getelementptr double, double* %a, i64 %index 211 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %pg, double* %base) 212 ret <vscale x 2 x double> %load 213} 214 215declare <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1>, i8*) 216 217declare <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1>, i8*) 218declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1>, i16*) 219declare <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1>, half*) 220declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1>, bfloat*) 221 222declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1>, i8*) 223declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1>, i16*) 224declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1>, i32*) 225declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1>, float*) 226 227declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1>, i8*) 228declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1>, i16*) 229declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1>, i32*) 230declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1>, i64*) 231declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1>, double*) 232 233; +bf16 is required for the bfloat version. 234attributes #0 = { "target-features"="+sve,+bf16" } 235