1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s 3 4declare i64 @llvm.vector.reduce.add.i64.v2i64(<2 x i64>) 5declare i32 @llvm.vector.reduce.add.i32.v4i32(<4 x i32>) 6declare i32 @llvm.vector.reduce.add.i32.v8i32(<8 x i32>) 7declare i16 @llvm.vector.reduce.add.i16.v8i16(<8 x i16>) 8declare i16 @llvm.vector.reduce.add.i16.v16i16(<16 x i16>) 9declare i8 @llvm.vector.reduce.add.i8.v16i8(<16 x i8>) 10declare i8 @llvm.vector.reduce.add.i8.v32i8(<32 x i8>) 11 12define arm_aapcs_vfpcc i64 @vaddv_v2i64_i64(<2 x i64> %s1) { 13; CHECK-LABEL: vaddv_v2i64_i64: 14; CHECK: @ %bb.0: @ %entry 15; CHECK-NEXT: vmov r0, s2 16; CHECK-NEXT: vmov r3, s0 17; CHECK-NEXT: vmov r1, s3 18; CHECK-NEXT: vmov r2, s1 19; CHECK-NEXT: adds r0, r0, r3 20; CHECK-NEXT: adcs r1, r2 21; CHECK-NEXT: bx lr 22entry: 23 %r = call i64 @llvm.vector.reduce.add.i64.v2i64(<2 x i64> %s1) 24 ret i64 %r 25} 26 27define arm_aapcs_vfpcc i32 @vaddv_v4i32_i32(<4 x i32> %s1) { 28; CHECK-LABEL: vaddv_v4i32_i32: 29; CHECK: @ %bb.0: @ %entry 30; CHECK-NEXT: vaddv.u32 r0, q0 31; CHECK-NEXT: bx lr 32entry: 33 %r = call i32 @llvm.vector.reduce.add.i32.v4i32(<4 x i32> %s1) 34 ret i32 %r 35} 36 37define arm_aapcs_vfpcc i32 @vaddv_v8i32_i32(<8 x i32> %s1) { 38; CHECK-LABEL: vaddv_v8i32_i32: 39; CHECK: @ %bb.0: @ %entry 40; CHECK-NEXT: vadd.i32 q0, q0, q1 41; CHECK-NEXT: vaddv.u32 r0, q0 42; CHECK-NEXT: bx lr 43entry: 44 %r = call i32 @llvm.vector.reduce.add.i32.v8i32(<8 x i32> %s1) 45 ret i32 %r 46} 47 48define arm_aapcs_vfpcc i16 @vaddv_v8i16_i16(<8 x i16> %s1) { 49; CHECK-LABEL: vaddv_v8i16_i16: 50; CHECK: @ %bb.0: @ %entry 51; CHECK-NEXT: vaddv.u16 r0, q0 52; CHECK-NEXT: bx lr 53entry: 54 %r = call i16 @llvm.vector.reduce.add.i16.v8i16(<8 x i16> %s1) 55 ret i16 %r 56} 57 58define arm_aapcs_vfpcc i16 @vaddv_v16i16_i16(<16 x i16> %s1) { 59; CHECK-LABEL: vaddv_v16i16_i16: 60; CHECK: @ %bb.0: @ %entry 61; CHECK-NEXT: vadd.i16 q0, q0, q1 62; CHECK-NEXT: vaddv.u16 r0, q0 63; CHECK-NEXT: bx lr 64entry: 65 %r = call i16 @llvm.vector.reduce.add.i16.v16i16(<16 x i16> %s1) 66 ret i16 %r 67} 68 69define arm_aapcs_vfpcc i8 @vaddv_v16i8_i8(<16 x i8> %s1) { 70; CHECK-LABEL: vaddv_v16i8_i8: 71; CHECK: @ %bb.0: @ %entry 72; CHECK-NEXT: vaddv.u8 r0, q0 73; CHECK-NEXT: bx lr 74entry: 75 %r = call i8 @llvm.vector.reduce.add.i8.v16i8(<16 x i8> %s1) 76 ret i8 %r 77} 78 79define arm_aapcs_vfpcc i8 @vaddv_v32i8_i8(<32 x i8> %s1) { 80; CHECK-LABEL: vaddv_v32i8_i8: 81; CHECK: @ %bb.0: @ %entry 82; CHECK-NEXT: vadd.i8 q0, q0, q1 83; CHECK-NEXT: vaddv.u8 r0, q0 84; CHECK-NEXT: bx lr 85entry: 86 %r = call i8 @llvm.vector.reduce.add.i8.v32i8(<32 x i8> %s1) 87 ret i8 %r 88} 89 90define arm_aapcs_vfpcc i64 @vaddva_v2i64_i64(<2 x i64> %s1, i64 %x) { 91; CHECK-LABEL: vaddva_v2i64_i64: 92; CHECK: @ %bb.0: @ %entry 93; CHECK-NEXT: .save {r7, lr} 94; CHECK-NEXT: push {r7, lr} 95; CHECK-NEXT: vmov r2, s2 96; CHECK-NEXT: vmov r3, s0 97; CHECK-NEXT: vmov r12, s3 98; CHECK-NEXT: vmov lr, s1 99; CHECK-NEXT: adds r2, r2, r3 100; CHECK-NEXT: adc.w r3, lr, r12 101; CHECK-NEXT: adds r0, r0, r2 102; CHECK-NEXT: adcs r1, r3 103; CHECK-NEXT: pop {r7, pc} 104entry: 105 %t = call i64 @llvm.vector.reduce.add.i64.v2i64(<2 x i64> %s1) 106 %r = add i64 %t, %x 107 ret i64 %r 108} 109 110define arm_aapcs_vfpcc i32 @vaddva_v4i32_i32(<4 x i32> %s1, i32 %x) { 111; CHECK-LABEL: vaddva_v4i32_i32: 112; CHECK: @ %bb.0: @ %entry 113; CHECK-NEXT: vaddva.u32 r0, q0 114; CHECK-NEXT: bx lr 115entry: 116 %t = call i32 @llvm.vector.reduce.add.i32.v4i32(<4 x i32> %s1) 117 %r = add i32 %t, %x 118 ret i32 %r 119} 120 121define arm_aapcs_vfpcc i32 @vaddva_v8i32_i32(<8 x i32> %s1, i32 %x) { 122; CHECK-LABEL: vaddva_v8i32_i32: 123; CHECK: @ %bb.0: @ %entry 124; CHECK-NEXT: vadd.i32 q0, q0, q1 125; CHECK-NEXT: vaddva.u32 r0, q0 126; CHECK-NEXT: bx lr 127entry: 128 %t = call i32 @llvm.vector.reduce.add.i32.v8i32(<8 x i32> %s1) 129 %r = add i32 %t, %x 130 ret i32 %r 131} 132 133define arm_aapcs_vfpcc i16 @vaddva_v8i16_i16(<8 x i16> %s1, i16 %x) { 134; CHECK-LABEL: vaddva_v8i16_i16: 135; CHECK: @ %bb.0: @ %entry 136; CHECK-NEXT: vaddva.u16 r0, q0 137; CHECK-NEXT: bx lr 138entry: 139 %t = call i16 @llvm.vector.reduce.add.i16.v8i16(<8 x i16> %s1) 140 %r = add i16 %t, %x 141 ret i16 %r 142} 143 144define arm_aapcs_vfpcc i16 @vaddva_v16i16_i16(<16 x i16> %s1, i16 %x) { 145; CHECK-LABEL: vaddva_v16i16_i16: 146; CHECK: @ %bb.0: @ %entry 147; CHECK-NEXT: vadd.i16 q0, q0, q1 148; CHECK-NEXT: vaddva.u16 r0, q0 149; CHECK-NEXT: bx lr 150entry: 151 %t = call i16 @llvm.vector.reduce.add.i16.v16i16(<16 x i16> %s1) 152 %r = add i16 %t, %x 153 ret i16 %r 154} 155 156define arm_aapcs_vfpcc i8 @vaddva_v16i8_i8(<16 x i8> %s1, i8 %x) { 157; CHECK-LABEL: vaddva_v16i8_i8: 158; CHECK: @ %bb.0: @ %entry 159; CHECK-NEXT: vaddva.u8 r0, q0 160; CHECK-NEXT: bx lr 161entry: 162 %t = call i8 @llvm.vector.reduce.add.i8.v16i8(<16 x i8> %s1) 163 %r = add i8 %t, %x 164 ret i8 %r 165} 166 167define arm_aapcs_vfpcc i8 @vaddva_v32i8_i8(<32 x i8> %s1, i8 %x) { 168; CHECK-LABEL: vaddva_v32i8_i8: 169; CHECK: @ %bb.0: @ %entry 170; CHECK-NEXT: vadd.i8 q0, q0, q1 171; CHECK-NEXT: vaddva.u8 r0, q0 172; CHECK-NEXT: bx lr 173entry: 174 %t = call i8 @llvm.vector.reduce.add.i8.v32i8(<32 x i8> %s1) 175 %r = add i8 %t, %x 176 ret i8 %r 177} 178