1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -o - %s | FileCheck %s 3; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -o - %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-FP %s 4 5define <16 x i8> @vector_add_i8(<16 x i8> %lhs, <16 x i8> %rhs) { 6; CHECK-LABEL: vector_add_i8: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vmov d1, r2, r3 9; CHECK-NEXT: vmov d0, r0, r1 10; CHECK-NEXT: mov r0, sp 11; CHECK-NEXT: vldrw.u32 q1, [r0] 12; CHECK-NEXT: vadd.i8 q0, q0, q1 13; CHECK-NEXT: vmov r0, r1, d0 14; CHECK-NEXT: vmov r2, r3, d1 15; CHECK-NEXT: bx lr 16entry: 17 %sum = add <16 x i8> %lhs, %rhs 18 ret <16 x i8> %sum 19} 20 21define <8 x i16> @vector_add_i16(<8 x i16> %lhs, <8 x i16> %rhs) { 22; CHECK-LABEL: vector_add_i16: 23; CHECK: @ %bb.0: @ %entry 24; CHECK-NEXT: vmov d1, r2, r3 25; CHECK-NEXT: vmov d0, r0, r1 26; CHECK-NEXT: mov r0, sp 27; CHECK-NEXT: vldrw.u32 q1, [r0] 28; CHECK-NEXT: vadd.i16 q0, q0, q1 29; CHECK-NEXT: vmov r0, r1, d0 30; CHECK-NEXT: vmov r2, r3, d1 31; CHECK-NEXT: bx lr 32entry: 33 %sum = add <8 x i16> %lhs, %rhs 34 ret <8 x i16> %sum 35} 36 37define <4 x i32> @vector_add_i32(<4 x i32> %lhs, <4 x i32> %rhs) { 38; CHECK-LABEL: vector_add_i32: 39; CHECK: @ %bb.0: @ %entry 40; CHECK-NEXT: vmov d1, r2, r3 41; CHECK-NEXT: vmov d0, r0, r1 42; CHECK-NEXT: mov r0, sp 43; CHECK-NEXT: vldrw.u32 q1, [r0] 44; CHECK-NEXT: vadd.i32 q0, q0, q1 45; CHECK-NEXT: vmov r0, r1, d0 46; CHECK-NEXT: vmov r2, r3, d1 47; CHECK-NEXT: bx lr 48entry: 49 %sum = add <4 x i32> %lhs, %rhs 50 ret <4 x i32> %sum 51} 52 53define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) { 54; CHECK-FP-LABEL: vector_add_i64: 55; CHECK-FP: @ %bb.0: @ %entry 56; CHECK-FP-NEXT: .save {r7, lr} 57; CHECK-FP-NEXT: push {r7, lr} 58; CHECK-FP-NEXT: vmov d1, r2, r3 59; CHECK-FP-NEXT: vmov d0, r0, r1 60; CHECK-FP-NEXT: add r0, sp, #8 61; CHECK-FP-NEXT: vldrw.u32 q1, [r0] 62; CHECK-FP-NEXT: vmov r1, s2 63; CHECK-FP-NEXT: vmov r0, s3 64; CHECK-FP-NEXT: vmov r3, s6 65; CHECK-FP-NEXT: vmov r2, s7 66; CHECK-FP-NEXT: adds.w lr, r1, r3 67; CHECK-FP-NEXT: vmov r3, s0 68; CHECK-FP-NEXT: vmov r1, s4 69; CHECK-FP-NEXT: adc.w r12, r0, r2 70; CHECK-FP-NEXT: vmov r2, s1 71; CHECK-FP-NEXT: vmov r0, s5 72; CHECK-FP-NEXT: adds r1, r1, r3 73; CHECK-FP-NEXT: vmov.32 q0[0], r1 74; CHECK-FP-NEXT: adcs r0, r2 75; CHECK-FP-NEXT: vmov.32 q0[1], r0 76; CHECK-FP-NEXT: vmov.32 q0[2], lr 77; CHECK-FP-NEXT: vmov.32 q0[3], r12 78; CHECK-FP-NEXT: vmov r0, r1, d0 79; CHECK-FP-NEXT: vmov r2, r3, d1 80; CHECK-FP-NEXT: pop {r7, pc} 81entry: 82 %sum = add <2 x i64> %lhs, %rhs 83 ret <2 x i64> %sum 84} 85 86define <8 x half> @vector_add_f16(<8 x half> %lhs, <8 x half> %rhs) { 87; CHECK-FP-LABEL: vector_add_f16: 88; CHECK-FP: @ %bb.0: @ %entry 89; CHECK-FP-NEXT: vmov d1, r2, r3 90; CHECK-FP-NEXT: vmov d0, r0, r1 91; CHECK-FP-NEXT: mov r0, sp 92; CHECK-FP-NEXT: vldrw.u32 q1, [r0] 93; CHECK-FP-NEXT: vadd.f16 q0, q0, q1 94; CHECK-FP-NEXT: vmov r0, r1, d0 95; CHECK-FP-NEXT: vmov r2, r3, d1 96; CHECK-FP-NEXT: bx lr 97entry: 98 %sum = fadd <8 x half> %lhs, %rhs 99 ret <8 x half> %sum 100} 101 102define <4 x float> @vector_add_f32(<4 x float> %lhs, <4 x float> %rhs) { 103; CHECK-FP-LABEL: vector_add_f32: 104; CHECK-FP: @ %bb.0: @ %entry 105; CHECK-FP-NEXT: vmov d1, r2, r3 106; CHECK-FP-NEXT: vmov d0, r0, r1 107; CHECK-FP-NEXT: mov r0, sp 108; CHECK-FP-NEXT: vldrw.u32 q1, [r0] 109; CHECK-FP-NEXT: vadd.f32 q0, q0, q1 110; CHECK-FP-NEXT: vmov r0, r1, d0 111; CHECK-FP-NEXT: vmov r2, r3, d1 112; CHECK-FP-NEXT: bx lr 113entry: 114 %sum = fadd <4 x float> %lhs, %rhs 115 ret <4 x float> %sum 116} 117 118define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) { 119; CHECK-FP-LABEL: vector_add_f64: 120; CHECK-FP: @ %bb.0: @ %entry 121; CHECK-FP-NEXT: .save {r4, r5, r6, r7, lr} 122; CHECK-FP-NEXT: push {r4, r5, r6, r7, lr} 123; CHECK-FP-NEXT: .pad #4 124; CHECK-FP-NEXT: sub sp, #4 125; CHECK-FP-NEXT: .vsave {d8, d9} 126; CHECK-FP-NEXT: vpush {d8, d9} 127; CHECK-FP-NEXT: mov r5, r0 128; CHECK-FP-NEXT: add r0, sp, #40 129; CHECK-FP-NEXT: vldrw.u32 q4, [r0] 130; CHECK-FP-NEXT: mov r4, r2 131; CHECK-FP-NEXT: mov r6, r3 132; CHECK-FP-NEXT: mov r7, r1 133; CHECK-FP-NEXT: vmov r2, r3, d9 134; CHECK-FP-NEXT: mov r0, r4 135; CHECK-FP-NEXT: mov r1, r6 136; CHECK-FP-NEXT: bl __aeabi_dadd 137; CHECK-FP-NEXT: vmov r2, r3, d8 138; CHECK-FP-NEXT: vmov d9, r0, r1 139; CHECK-FP-NEXT: mov r0, r5 140; CHECK-FP-NEXT: mov r1, r7 141; CHECK-FP-NEXT: bl __aeabi_dadd 142; CHECK-FP-NEXT: vmov d8, r0, r1 143; CHECK-FP-NEXT: vmov r2, r3, d9 144; CHECK-FP-NEXT: vmov r0, r1, d8 145; CHECK-FP-NEXT: vpop {d8, d9} 146; CHECK-FP-NEXT: add sp, #4 147; CHECK-FP-NEXT: pop {r4, r5, r6, r7, pc} 148entry: 149 %sum = fadd <2 x double> %lhs, %rhs 150 ret <2 x double> %sum 151} 152