1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s 2>%t | FileCheck %s 2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t 3 4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. 5; WARN-NOT: warning 6 7; 8; WHILERW 9; 10 11define <vscale x 16 x i1> @whilerw_i8(i8* %a, i8* %b) { 12; CHECK-LABEL: whilerw_i8: 13; CHECK: whilerw p0.b, x0, x1 14; CHECK-NEXT: ret 15 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilerw.b.nx16i1(i8* %a, i8* %b) 16 ret <vscale x 16 x i1> %out 17} 18 19define <vscale x 8 x i1> @whilerw_i16(i16* %a, i16* %b) { 20; CHECK-LABEL: whilerw_i16: 21; CHECK: whilerw p0.h, x0, x1 22; CHECK-NEXT: ret 23 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nx8i1(i16* %a, i16* %b) 24 ret <vscale x 8 x i1> %out 25} 26 27define <vscale x 4 x i1> @whilerw_i32(i32* %a, i32* %b) { 28; CHECK-LABEL: whilerw_i32: 29; CHECK: whilerw p0.s, x0, x1 30; CHECK-NEXT: ret 31 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nx4i1(i32* %a, i32* %b) 32 ret <vscale x 4 x i1> %out 33} 34 35define <vscale x 2 x i1> @whilerw_i64(i64* %a, i64* %b) { 36; CHECK-LABEL: whilerw_i64: 37; CHECK: whilerw p0.d, x0, x1 38; CHECK-NEXT: ret 39 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nx2i1(i64* %a, i64* %b) 40 ret <vscale x 2 x i1> %out 41} 42 43define <vscale x 8 x i1> @whilerw_bfloat(bfloat* %a, bfloat* %b) { 44; CHECK-LABEL: whilerw_bfloat: 45; CHECK: whilerw p0.h, x0, x1 46; CHECK-NEXT: ret 47 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nx8i1.bf16.bf16(bfloat* %a, bfloat* %b) 48 ret <vscale x 8 x i1> %out 49} 50 51define <vscale x 8 x i1> @whilerw_half(half* %a, half* %b) { 52; CHECK-LABEL: whilerw_half: 53; CHECK: whilerw p0.h, x0, x1 54; CHECK-NEXT: ret 55 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nx8i1.f16.f16(half* %a, half* %b) 56 ret <vscale x 8 x i1> %out 57} 58 59define <vscale x 4 x i1> @whilerw_float(float* %a, float* %b) { 60; CHECK-LABEL: whilerw_float: 61; CHECK: whilerw p0.s, x0, x1 62; CHECK-NEXT: ret 63 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nx4i1.f32.f32(float* %a, float* %b) 64 ret <vscale x 4 x i1> %out 65} 66 67define <vscale x 2 x i1> @whilerw_double(double* %a, double* %b) { 68; CHECK-LABEL: whilerw_double: 69; CHECK: whilerw p0.d, x0, x1 70; CHECK-NEXT: ret 71 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nx2i1.f64.f64(double* %a, double* %b) 72 ret <vscale x 2 x i1> %out 73} 74 75; 76; WHILEWR 77; 78 79define <vscale x 16 x i1> @whilewr_i8(i8* %a, i8* %b) { 80; CHECK-LABEL: whilewr_i8: 81; CHECK: whilewr p0.b, x0, x1 82; CHECK-NEXT: ret 83 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilewr.b.nx16i1(i8* %a, i8* %b) 84 ret <vscale x 16 x i1> %out 85} 86 87define <vscale x 8 x i1> @whilewr_i16(i16* %a, i16* %b) { 88; CHECK-LABEL: whilewr_i16: 89; CHECK: whilewr p0.h, x0, x1 90; CHECK-NEXT: ret 91 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nx8i1(i16* %a, i16* %b) 92 ret <vscale x 8 x i1> %out 93} 94 95define <vscale x 4 x i1> @whilewr_i32(i32* %a, i32* %b) { 96; CHECK-LABEL: whilewr_i32: 97; CHECK: whilewr p0.s, x0, x1 98; CHECK-NEXT: ret 99 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nx4i1(i32* %a, i32* %b) 100 ret <vscale x 4 x i1> %out 101} 102 103define <vscale x 2 x i1> @whilewr_i64(i64* %a, i64* %b) { 104; CHECK-LABEL: whilewr_i64: 105; CHECK: whilewr p0.d, x0, x1 106; CHECK-NEXT: ret 107 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nx2i1(i64* %a, i64* %b) 108 ret <vscale x 2 x i1> %out 109} 110 111define <vscale x 8 x i1> @whilewr_bfloat(bfloat* %a, bfloat* %b) { 112; CHECK-LABEL: whilewr_bfloat: 113; CHECK: whilewr p0.h, x0, x1 114; CHECK-NEXT: ret 115 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nx8i1.bf16.bf16(bfloat* %a, bfloat* %b) 116 ret <vscale x 8 x i1> %out 117} 118 119define <vscale x 8 x i1> @whilewr_half(half* %a, half* %b) { 120; CHECK-LABEL: whilewr_half: 121; CHECK: whilewr p0.h, x0, x1 122; CHECK-NEXT: ret 123 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nx8i1.f16.f16(half* %a, half* %b) 124 ret <vscale x 8 x i1> %out 125} 126 127define <vscale x 4 x i1> @whilewr_float(float* %a, float* %b) { 128; CHECK-LABEL: whilewr_float: 129; CHECK: whilewr p0.s, x0, x1 130; CHECK-NEXT: ret 131 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nx4i1.f32.f32(float* %a, float* %b) 132 ret <vscale x 4 x i1> %out 133} 134 135define <vscale x 2 x i1> @whilewr_double(double* %a, double* %b) { 136; CHECK-LABEL: whilewr_double: 137; CHECK: whilewr p0.d, x0, x1 138; CHECK-NEXT: ret 139 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nx2i1.f64.f64(double* %a, double* %b) 140 ret <vscale x 2 x i1> %out 141} 142 143declare <vscale x 16 x i1> @llvm.aarch64.sve.whilerw.b.nx16i1(i8* %a, i8* %b) 144declare <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nx8i1(i16* %a, i16* %b) 145declare <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nx4i1(i32* %a, i32* %b) 146declare <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nx2i1(i64* %a, i64* %b) 147 148declare <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nx8i1.bf16.bf16(bfloat* %a, bfloat* %b) 149declare <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nx8i1.f16.f16(half* %a, half* %b) 150declare <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nx4i1.f32.f32(float* %a, float* %b) 151declare <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nx2i1.f64.f64(double* %a, double* %b) 152 153declare <vscale x 16 x i1> @llvm.aarch64.sve.whilewr.b.nx16i1(i8* %a, i8* %b) 154declare <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nx8i1(i16* %a, i16* %b) 155declare <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nx4i1(i32* %a, i32* %b) 156declare <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nx2i1(i64* %a, i64* %b) 157 158declare <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nx8i1.bf16.bf16(bfloat* %a, bfloat* %b) 159declare <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nx8i1.f16.f16(half* %a, half* %b) 160declare <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nx4i1.f32.f32(float* %a, float* %b) 161declare <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nx2i1.f64.f64(double* %a, double* %b) 162