1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+neon | FileCheck %s --check-prefix=CHECK 3 4declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>) 5declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>) 6declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>) 7declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>) 8 9declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>) 10declare float @llvm.vector.reduce.fadd.f32.v5f32(float, <5 x float>) 11declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>) 12declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>) 13 14define half @test_v1f16(<1 x half> %a, half %s) nounwind { 15; CHECK-LABEL: test_v1f16: 16; CHECK: @ %bb.0: 17; CHECK-NEXT: .save {r4, r5, r11, lr} 18; CHECK-NEXT: push {r4, r5, r11, lr} 19; CHECK-NEXT: .vsave {d8} 20; CHECK-NEXT: vpush {d8} 21; CHECK-NEXT: mov r4, r0 22; CHECK-NEXT: mov r0, r1 23; CHECK-NEXT: bl __aeabi_h2f 24; CHECK-NEXT: mov r5, r0 25; CHECK-NEXT: mov r0, r4 26; CHECK-NEXT: bl __aeabi_f2h 27; CHECK-NEXT: vmov s16, r5 28; CHECK-NEXT: bl __aeabi_h2f 29; CHECK-NEXT: vmov s0, r0 30; CHECK-NEXT: vadd.f32 s0, s16, s0 31; CHECK-NEXT: vmov r0, s0 32; CHECK-NEXT: bl __aeabi_f2h 33; CHECK-NEXT: vpop {d8} 34; CHECK-NEXT: pop {r4, r5, r11, lr} 35; CHECK-NEXT: mov pc, lr 36 %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half %s, <1 x half> %a) 37 ret half %b 38} 39 40define half @test_v1f16_neutral(<1 x half> %a) nounwind { 41; CHECK-LABEL: test_v1f16_neutral: 42; CHECK: @ %bb.0: 43; CHECK-NEXT: .save {r11, lr} 44; CHECK-NEXT: push {r11, lr} 45; CHECK-NEXT: bl __aeabi_f2h 46; CHECK-NEXT: mov r1, #255 47; CHECK-NEXT: orr r1, r1, #65280 48; CHECK-NEXT: and r0, r0, r1 49; CHECK-NEXT: pop {r11, lr} 50; CHECK-NEXT: mov pc, lr 51 %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half -0.0, <1 x half> %a) 52 ret half %b 53} 54 55define float @test_v1f32(<1 x float> %a, float %s) nounwind { 56; CHECK-LABEL: test_v1f32: 57; CHECK: @ %bb.0: 58; CHECK-NEXT: vmov s0, r0 59; CHECK-NEXT: vmov s2, r1 60; CHECK-NEXT: vadd.f32 s0, s2, s0 61; CHECK-NEXT: vmov r0, s0 62; CHECK-NEXT: mov pc, lr 63 %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float %s, <1 x float> %a) 64 ret float %b 65} 66 67define float @test_v1f32_neutral(<1 x float> %a) nounwind { 68; CHECK-LABEL: test_v1f32_neutral: 69; CHECK: @ %bb.0: 70; CHECK-NEXT: mov pc, lr 71 %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float -0.0, <1 x float> %a) 72 ret float %b 73} 74 75define double @test_v1f64(<1 x double> %a, double %s) nounwind { 76; CHECK-LABEL: test_v1f64: 77; CHECK: @ %bb.0: 78; CHECK-NEXT: vmov d16, r0, r1 79; CHECK-NEXT: vmov d17, r2, r3 80; CHECK-NEXT: vadd.f64 d16, d17, d16 81; CHECK-NEXT: vmov r0, r1, d16 82; CHECK-NEXT: mov pc, lr 83 %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double %s, <1 x double> %a) 84 ret double %b 85} 86 87define double @test_v1f64_neutral(<1 x double> %a) nounwind { 88; CHECK-LABEL: test_v1f64_neutral: 89; CHECK: @ %bb.0: 90; CHECK-NEXT: mov pc, lr 91 %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double -0.0, <1 x double> %a) 92 ret double %b 93} 94 95define fp128 @test_v1f128(<1 x fp128> %a, fp128 %s) nounwind { 96; CHECK-LABEL: test_v1f128: 97; CHECK: @ %bb.0: 98; CHECK-NEXT: .save {r4, r5, r11, lr} 99; CHECK-NEXT: push {r4, r5, r11, lr} 100; CHECK-NEXT: .pad #16 101; CHECK-NEXT: sub sp, sp, #16 102; CHECK-NEXT: ldr r12, [sp, #32] 103; CHECK-NEXT: ldr lr, [sp, #36] 104; CHECK-NEXT: ldr r4, [sp, #40] 105; CHECK-NEXT: ldr r5, [sp, #44] 106; CHECK-NEXT: stm sp, {r0, r1, r2, r3} 107; CHECK-NEXT: mov r0, r12 108; CHECK-NEXT: mov r1, lr 109; CHECK-NEXT: mov r2, r4 110; CHECK-NEXT: mov r3, r5 111; CHECK-NEXT: bl __addtf3 112; CHECK-NEXT: add sp, sp, #16 113; CHECK-NEXT: pop {r4, r5, r11, lr} 114; CHECK-NEXT: mov pc, lr 115 %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 %s, <1 x fp128> %a) 116 ret fp128 %b 117} 118 119define fp128 @test_v1f128_neutral(<1 x fp128> %a) nounwind { 120; CHECK-LABEL: test_v1f128_neutral: 121; CHECK: @ %bb.0: 122; CHECK-NEXT: mov pc, lr 123 %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 0xL00000000000000008000000000000000, <1 x fp128> %a) 124 ret fp128 %b 125} 126 127define float @test_v3f32(<3 x float> %a, float %s) nounwind { 128; CHECK-LABEL: test_v3f32: 129; CHECK: @ %bb.0: 130; CHECK-NEXT: vmov d3, r2, r3 131; CHECK-NEXT: vldr s0, [sp] 132; CHECK-NEXT: vmov d2, r0, r1 133; CHECK-NEXT: vadd.f32 s0, s0, s4 134; CHECK-NEXT: vadd.f32 s0, s0, s5 135; CHECK-NEXT: vadd.f32 s0, s0, s6 136; CHECK-NEXT: vmov r0, s0 137; CHECK-NEXT: mov pc, lr 138 %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float %s, <3 x float> %a) 139 ret float %b 140} 141 142define float @test_v3f32_neutral(<3 x float> %a) nounwind { 143; CHECK-LABEL: test_v3f32_neutral: 144; CHECK: @ %bb.0: 145; CHECK-NEXT: vmov d1, r2, r3 146; CHECK-NEXT: vmov d0, r0, r1 147; CHECK-NEXT: vadd.f32 s4, s0, s1 148; CHECK-NEXT: vadd.f32 s0, s4, s2 149; CHECK-NEXT: vmov r0, s0 150; CHECK-NEXT: mov pc, lr 151 %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float -0.0, <3 x float> %a) 152 ret float %b 153} 154 155define float @test_v5f32(<5 x float> %a, float %s) nounwind { 156; CHECK-LABEL: test_v5f32: 157; CHECK: @ %bb.0: 158; CHECK-NEXT: vldr s0, [sp, #4] 159; CHECK-NEXT: vmov s2, r0 160; CHECK-NEXT: vadd.f32 s0, s0, s2 161; CHECK-NEXT: vmov s2, r1 162; CHECK-NEXT: vadd.f32 s0, s0, s2 163; CHECK-NEXT: vmov s2, r2 164; CHECK-NEXT: vadd.f32 s0, s0, s2 165; CHECK-NEXT: vmov s2, r3 166; CHECK-NEXT: vadd.f32 s0, s0, s2 167; CHECK-NEXT: vldr s2, [sp] 168; CHECK-NEXT: vadd.f32 s0, s0, s2 169; CHECK-NEXT: vmov r0, s0 170; CHECK-NEXT: mov pc, lr 171 %b = call float @llvm.vector.reduce.fadd.f32.v5f32(float %s, <5 x float> %a) 172 ret float %b 173} 174 175define float @test_v5f32_neutral(<5 x float> %a) nounwind { 176; CHECK-LABEL: test_v5f32_neutral: 177; CHECK: @ %bb.0: 178; CHECK-NEXT: vmov s0, r1 179; CHECK-NEXT: vmov s2, r0 180; CHECK-NEXT: vadd.f32 s0, s2, s0 181; CHECK-NEXT: vmov s2, r2 182; CHECK-NEXT: vadd.f32 s0, s0, s2 183; CHECK-NEXT: vmov s2, r3 184; CHECK-NEXT: vadd.f32 s0, s0, s2 185; CHECK-NEXT: vldr s2, [sp] 186; CHECK-NEXT: vadd.f32 s0, s0, s2 187; CHECK-NEXT: vmov r0, s0 188; CHECK-NEXT: mov pc, lr 189 %b = call float @llvm.vector.reduce.fadd.f32.v5f32(float -0.0, <5 x float> %a) 190 ret float %b 191} 192 193define fp128 @test_v2f128(<2 x fp128> %a, fp128 %s) nounwind { 194; CHECK-LABEL: test_v2f128: 195; CHECK: @ %bb.0: 196; CHECK-NEXT: .save {r4, r5, r11, lr} 197; CHECK-NEXT: push {r4, r5, r11, lr} 198; CHECK-NEXT: .pad #16 199; CHECK-NEXT: sub sp, sp, #16 200; CHECK-NEXT: ldr r12, [sp, #48] 201; CHECK-NEXT: ldr lr, [sp, #52] 202; CHECK-NEXT: ldr r4, [sp, #56] 203; CHECK-NEXT: ldr r5, [sp, #60] 204; CHECK-NEXT: stm sp, {r0, r1, r2, r3} 205; CHECK-NEXT: mov r0, r12 206; CHECK-NEXT: mov r1, lr 207; CHECK-NEXT: mov r2, r4 208; CHECK-NEXT: mov r3, r5 209; CHECK-NEXT: bl __addtf3 210; CHECK-NEXT: ldr r4, [sp, #32] 211; CHECK-NEXT: ldr r5, [sp, #40] 212; CHECK-NEXT: ldr lr, [sp, #44] 213; CHECK-NEXT: ldr r12, [sp, #36] 214; CHECK-NEXT: stm sp, {r4, r12} 215; CHECK-NEXT: str r5, [sp, #8] 216; CHECK-NEXT: str lr, [sp, #12] 217; CHECK-NEXT: bl __addtf3 218; CHECK-NEXT: add sp, sp, #16 219; CHECK-NEXT: pop {r4, r5, r11, lr} 220; CHECK-NEXT: mov pc, lr 221 %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 %s, <2 x fp128> %a) 222 ret fp128 %b 223} 224 225define fp128 @test_v2f128_neutral(<2 x fp128> %a) nounwind { 226; CHECK-LABEL: test_v2f128_neutral: 227; CHECK: @ %bb.0: 228; CHECK-NEXT: .save {r4, r5, r11, lr} 229; CHECK-NEXT: push {r4, r5, r11, lr} 230; CHECK-NEXT: .pad #16 231; CHECK-NEXT: sub sp, sp, #16 232; CHECK-NEXT: ldr r12, [sp, #36] 233; CHECK-NEXT: ldr lr, [sp, #32] 234; CHECK-NEXT: ldr r4, [sp, #40] 235; CHECK-NEXT: ldr r5, [sp, #44] 236; CHECK-NEXT: str lr, [sp] 237; CHECK-NEXT: str r12, [sp, #4] 238; CHECK-NEXT: str r4, [sp, #8] 239; CHECK-NEXT: str r5, [sp, #12] 240; CHECK-NEXT: bl __addtf3 241; CHECK-NEXT: add sp, sp, #16 242; CHECK-NEXT: pop {r4, r5, r11, lr} 243; CHECK-NEXT: mov pc, lr 244 %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a) 245 ret fp128 %b 246} 247 248define float @test_v16f32(<16 x float> %a, float %s) nounwind { 249; CHECK-LABEL: test_v16f32: 250; CHECK: @ %bb.0: 251; CHECK-NEXT: vmov d3, r2, r3 252; CHECK-NEXT: vldr s0, [sp, #48] 253; CHECK-NEXT: vmov d2, r0, r1 254; CHECK-NEXT: mov r0, sp 255; CHECK-NEXT: vadd.f32 s0, s0, s4 256; CHECK-NEXT: vadd.f32 s0, s0, s5 257; CHECK-NEXT: vadd.f32 s0, s0, s6 258; CHECK-NEXT: vadd.f32 s0, s0, s7 259; CHECK-NEXT: vld1.64 {d2, d3}, [r0] 260; CHECK-NEXT: add r0, sp, #16 261; CHECK-NEXT: vadd.f32 s0, s0, s4 262; CHECK-NEXT: vadd.f32 s0, s0, s5 263; CHECK-NEXT: vadd.f32 s0, s0, s6 264; CHECK-NEXT: vadd.f32 s0, s0, s7 265; CHECK-NEXT: vld1.64 {d2, d3}, [r0] 266; CHECK-NEXT: add r0, sp, #32 267; CHECK-NEXT: vadd.f32 s0, s0, s4 268; CHECK-NEXT: vadd.f32 s0, s0, s5 269; CHECK-NEXT: vadd.f32 s0, s0, s6 270; CHECK-NEXT: vadd.f32 s0, s0, s7 271; CHECK-NEXT: vld1.64 {d2, d3}, [r0] 272; CHECK-NEXT: vadd.f32 s0, s0, s4 273; CHECK-NEXT: vadd.f32 s0, s0, s5 274; CHECK-NEXT: vadd.f32 s0, s0, s6 275; CHECK-NEXT: vadd.f32 s0, s0, s7 276; CHECK-NEXT: vmov r0, s0 277; CHECK-NEXT: mov pc, lr 278 %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float %s, <16 x float> %a) 279 ret float %b 280} 281 282define float @test_v16f32_neutral(<16 x float> %a) nounwind { 283; CHECK-LABEL: test_v16f32_neutral: 284; CHECK: @ %bb.0: 285; CHECK-NEXT: vmov d1, r2, r3 286; CHECK-NEXT: vmov d0, r0, r1 287; CHECK-NEXT: mov r0, sp 288; CHECK-NEXT: vadd.f32 s4, s0, s1 289; CHECK-NEXT: vadd.f32 s4, s4, s2 290; CHECK-NEXT: vadd.f32 s0, s4, s3 291; CHECK-NEXT: vld1.64 {d2, d3}, [r0] 292; CHECK-NEXT: add r0, sp, #16 293; CHECK-NEXT: vadd.f32 s0, s0, s4 294; CHECK-NEXT: vadd.f32 s0, s0, s5 295; CHECK-NEXT: vadd.f32 s0, s0, s6 296; CHECK-NEXT: vadd.f32 s0, s0, s7 297; CHECK-NEXT: vld1.64 {d2, d3}, [r0] 298; CHECK-NEXT: add r0, sp, #32 299; CHECK-NEXT: vadd.f32 s0, s0, s4 300; CHECK-NEXT: vadd.f32 s0, s0, s5 301; CHECK-NEXT: vadd.f32 s0, s0, s6 302; CHECK-NEXT: vadd.f32 s0, s0, s7 303; CHECK-NEXT: vld1.64 {d2, d3}, [r0] 304; CHECK-NEXT: vadd.f32 s0, s0, s4 305; CHECK-NEXT: vadd.f32 s0, s0, s5 306; CHECK-NEXT: vadd.f32 s0, s0, s6 307; CHECK-NEXT: vadd.f32 s0, s0, s7 308; CHECK-NEXT: vmov r0, s0 309; CHECK-NEXT: mov pc, lr 310 %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float -0.0, <16 x float> %a) 311 ret float %b 312} 313