1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s 2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t 3 4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. 5; WARN-NOT: warning 6 7; 8; DUP 9; 10 11define <vscale x 16 x i8> @dup_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, i8 %b) { 12; CHECK-LABEL: dup_i8: 13; CHECK: mov z0.b, p0/m, w0 14; CHECK-NEXT: ret 15 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> %a, 16 <vscale x 16 x i1> %pg, 17 i8 %b) 18 ret <vscale x 16 x i8> %out 19} 20 21define <vscale x 8 x i16> @dup_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, i16 %b) { 22; CHECK-LABEL: dup_i16: 23; CHECK: mov z0.h, p0/m, w0 24; CHECK-NEXT: ret 25 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> %a, 26 <vscale x 8 x i1> %pg, 27 i16 %b) 28 ret <vscale x 8 x i16> %out 29} 30 31define <vscale x 4 x i32> @dup_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, i32 %b) { 32; CHECK-LABEL: dup_i32: 33; CHECK: mov z0.s, p0/m, w0 34; CHECK-NEXT: ret 35 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> %a, 36 <vscale x 4 x i1> %pg, 37 i32 %b) 38 ret <vscale x 4 x i32> %out 39} 40 41define <vscale x 2 x i64> @dup_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, i64 %b) { 42; CHECK-LABEL: dup_i64: 43; CHECK: mov z0.d, p0/m, x0 44; CHECK-NEXT: ret 45 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> %a, 46 <vscale x 2 x i1> %pg, 47 i64 %b) 48 ret <vscale x 2 x i64> %out 49} 50 51define <vscale x 8 x half> @dup_f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, half %b) { 52; CHECK-LABEL: dup_f16: 53; CHECK: mov z0.h, p0/m, h1 54; CHECK-NEXT: ret 55 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> %a, 56 <vscale x 8 x i1> %pg, 57 half %b) 58 ret <vscale x 8 x half> %out 59} 60 61define <vscale x 8 x bfloat> @dup_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, bfloat %b) #0 { 62; CHECK-LABEL: dup_bf16: 63; CHECK: mov z0.h, p0/m, h1 64; CHECK-NEXT: ret 65 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> %a, 66 <vscale x 8 x i1> %pg, 67 bfloat %b) 68 ret <vscale x 8 x bfloat> %out 69} 70 71define <vscale x 4 x float> @dup_f32(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, float %b) { 72; CHECK-LABEL: dup_f32: 73; CHECK: mov z0.s, p0/m, s1 74; CHECK-NEXT: ret 75 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> %a, 76 <vscale x 4 x i1> %pg, 77 float %b) 78 ret <vscale x 4 x float> %out 79} 80 81define <vscale x 2 x double> @dup_f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, double %b) { 82; CHECK-LABEL: dup_f64: 83; CHECK: mov z0.d, p0/m, d1 84; CHECK-NEXT: ret 85 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> %a, 86 <vscale x 2 x i1> %pg, 87 double %b) 88 ret <vscale x 2 x double> %out 89} 90 91define <vscale x 8 x bfloat> @test_svdup_n_bf16_z(<vscale x 8 x i1> %pg, bfloat %op) #0 { 92; CHECK-LABEL: test_svdup_n_bf16_z: 93; CHECK: mov z1.h, #0 94; CHECK: mov z1.h, p0/m, h0 95; CHECK: mov z0.d, z1.d 96; CHECK-NEXT: ret 97 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> %pg, bfloat %op) 98 ret <vscale x 8 x bfloat> %out 99} 100 101define <vscale x 8 x bfloat> @test_svdup_n_bf16_m(<vscale x 8 x bfloat> %inactive, <vscale x 8 x i1> %pg, bfloat %op) #0 { 102; CHECK-LABEL: test_svdup_n_bf16_m: 103; CHECK: mov z0.h, p0/m, h1 104; CHECK-NEXT: ret 105 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> %inactive, <vscale x 8 x i1> %pg, bfloat %op) 106 ret <vscale x 8 x bfloat> %out 107} 108 109 110define <vscale x 8 x bfloat> @test_svdup_n_bf16_x(<vscale x 8 x i1> %pg, bfloat %op) #0 { 111; CHECK-LABEL: test_svdup_n_bf16_x: 112; CHECK: mov z0.h, p0/m, h0 113; CHECK-NEXT: ret 114 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> %pg, bfloat %op) 115 ret <vscale x 8 x bfloat> %out 116} 117 118declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8) 119declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16) 120declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32) 121declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64) 122declare <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, half) 123declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat) 124declare <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float) 125declare <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double) 126 127; +bf16 is required for the bfloat version. 128attributes #0 = { "target-features"="+sve,+bf16" } 129