1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s 3; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s 4 5define <vscale x 2 x i64> @masked_gather_nxv2i16(i16* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) { 6; CHECK-LABEL: masked_gather_nxv2i16: 7; CHECK: // %bb.0: 8; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, lsl #1] 9; CHECK-NEXT: ret 10 %ptrs = getelementptr i16, i16* %base, <vscale x 2 x i64> %offsets 11 %vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef) 12 %vals.zext = zext <vscale x 2 x i16> %vals to <vscale x 2 x i64> 13 ret <vscale x 2 x i64> %vals.zext 14} 15 16define <vscale x 2 x i64> @masked_gather_nxv2i32(i32* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) { 17; CHECK-LABEL: masked_gather_nxv2i32: 18; CHECK: // %bb.0: 19; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, lsl #2] 20; CHECK-NEXT: ret 21 %ptrs = getelementptr i32, i32* %base, <vscale x 2 x i64> %offsets 22 %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef) 23 %vals.zext = zext <vscale x 2 x i32> %vals to <vscale x 2 x i64> 24 ret <vscale x 2 x i64> %vals.zext 25} 26 27define <vscale x 2 x i64> @masked_gather_nxv2i64(i64* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) { 28; CHECK-LABEL: masked_gather_nxv2i64: 29; CHECK: // %bb.0: 30; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3] 31; CHECK-NEXT: ret 32 %ptrs = getelementptr i64, i64* %base, <vscale x 2 x i64> %offsets 33 %vals = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef) 34 ret <vscale x 2 x i64> %vals 35} 36 37define <vscale x 2 x half> @masked_gather_nxv2f16(half* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) { 38; CHECK-LABEL: masked_gather_nxv2f16: 39; CHECK: // %bb.0: 40; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, lsl #1] 41; CHECK-NEXT: ret 42 %ptrs = getelementptr half, half* %base, <vscale x 2 x i64> %offsets 43 %vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef) 44 ret <vscale x 2 x half> %vals 45} 46 47define <vscale x 2 x float> @masked_gather_nxv2f32(float* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) { 48; CHECK-LABEL: masked_gather_nxv2f32: 49; CHECK: // %bb.0: 50; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, lsl #2] 51; CHECK-NEXT: ret 52 %ptrs = getelementptr float, float* %base, <vscale x 2 x i64> %offsets 53 %vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef) 54 ret <vscale x 2 x float> %vals 55} 56 57define <vscale x 2 x double> @masked_gather_nxv2f64(double* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) { 58; CHECK-LABEL: masked_gather_nxv2f64: 59; CHECK: // %bb.0: 60; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3] 61; CHECK-NEXT: ret 62 %ptrs = getelementptr double, double* %base, <vscale x 2 x i64> %offsets 63 %vals.sext = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef) 64 ret <vscale x 2 x double> %vals.sext 65} 66 67define <vscale x 2 x i64> @masked_sgather_nxv2i16(i16* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) { 68; CHECK-LABEL: masked_sgather_nxv2i16: 69; CHECK: // %bb.0: 70; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, lsl #1] 71; CHECK-NEXT: ret 72 %ptrs = getelementptr i16, i16* %base, <vscale x 2 x i64> %offsets 73 %vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef) 74 %vals.sext = sext <vscale x 2 x i16> %vals to <vscale x 2 x i64> 75 ret <vscale x 2 x i64> %vals.sext 76} 77 78define <vscale x 2 x i64> @masked_sgather_nxv2i32(i32* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) { 79; CHECK-LABEL: masked_sgather_nxv2i32: 80; CHECK: // %bb.0: 81; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, lsl #2] 82; CHECK-NEXT: ret 83 %ptrs = getelementptr i32, i32* %base, <vscale x 2 x i64> %offsets 84 %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef) 85 %vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64> 86 ret <vscale x 2 x i64> %vals.sext 87} 88 89declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>) 90declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>) 91declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>) 92declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*>, i32, <vscale x 2 x i1>, <vscale x 2 x half>) 93declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*>, i32, <vscale x 2 x i1>, <vscale x 2 x float>) 94declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*>, i32, <vscale x 2 x i1>, <vscale x 2 x double>) 95