1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -verify-machineinstrs -mattr=+mve.fp %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc <4 x i32> @vaddqr_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { 5; CHECK-LABEL: vaddqr_v4i32: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vadd.i32 q0, q0, r0 8; CHECK-NEXT: bx lr 9entry: 10 %i = insertelement <4 x i32> undef, i32 %src2, i32 0 11 %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 12 %c = add <4 x i32> %src, %sp 13 ret <4 x i32> %c 14} 15 16define arm_aapcs_vfpcc <8 x i16> @vaddqr_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { 17; CHECK-LABEL: vaddqr_v8i16: 18; CHECK: @ %bb.0: @ %entry 19; CHECK-NEXT: vadd.i16 q0, q0, r0 20; CHECK-NEXT: bx lr 21entry: 22 %i = insertelement <8 x i16> undef, i16 %src2, i32 0 23 %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 24 %c = add <8 x i16> %src, %sp 25 ret <8 x i16> %c 26} 27 28define arm_aapcs_vfpcc <16 x i8> @vaddqr_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { 29; CHECK-LABEL: vaddqr_v16i8: 30; CHECK: @ %bb.0: @ %entry 31; CHECK-NEXT: vadd.i8 q0, q0, r0 32; CHECK-NEXT: bx lr 33entry: 34 %i = insertelement <16 x i8> undef, i8 %src2, i32 0 35 %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 36 %c = add <16 x i8> %src, %sp 37 ret <16 x i8> %c 38} 39 40define arm_aapcs_vfpcc <4 x i32> @vaddqr_v4i32_2(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { 41; CHECK-LABEL: vaddqr_v4i32_2: 42; CHECK: @ %bb.0: @ %entry 43; CHECK-NEXT: vadd.i32 q0, q0, r0 44; CHECK-NEXT: bx lr 45entry: 46 %i = insertelement <4 x i32> undef, i32 %src2, i32 0 47 %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 48 %c = add <4 x i32> %sp, %src 49 ret <4 x i32> %c 50} 51 52define arm_aapcs_vfpcc <8 x i16> @vaddqr_v8i16_2(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { 53; CHECK-LABEL: vaddqr_v8i16_2: 54; CHECK: @ %bb.0: @ %entry 55; CHECK-NEXT: vadd.i16 q0, q0, r0 56; CHECK-NEXT: bx lr 57entry: 58 %i = insertelement <8 x i16> undef, i16 %src2, i32 0 59 %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 60 %c = add <8 x i16> %sp, %src 61 ret <8 x i16> %c 62} 63 64define arm_aapcs_vfpcc <16 x i8> @vaddqr_v16i8_2(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { 65; CHECK-LABEL: vaddqr_v16i8_2: 66; CHECK: @ %bb.0: @ %entry 67; CHECK-NEXT: vadd.i8 q0, q0, r0 68; CHECK-NEXT: bx lr 69entry: 70 %i = insertelement <16 x i8> undef, i8 %src2, i32 0 71 %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 72 %c = add <16 x i8> %sp, %src 73 ret <16 x i8> %c 74} 75 76define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { 77; CHECK-LABEL: vaddqr_v4f32: 78; CHECK: @ %bb.0: @ %entry 79; CHECK-NEXT: vmov r0, s4 80; CHECK-NEXT: vadd.f32 q0, q0, r0 81; CHECK-NEXT: bx lr 82entry: 83 %i = insertelement <4 x float> undef, float %src2, i32 0 84 %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 85 %c = fadd <4 x float> %src, %sp 86 ret <4 x float> %c 87} 88 89define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) { 90; CHECK-LABEL: vaddqr_v8f16: 91; CHECK: @ %bb.0: @ %entry 92; CHECK-NEXT: ldrh r0, [r0] 93; CHECK-NEXT: vadd.f16 q0, q0, r0 94; CHECK-NEXT: bx lr 95entry: 96 %src2 = load half, half *%src2p, align 2 97 %i = insertelement <8 x half> undef, half %src2, i32 0 98 %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 99 %c = fadd <8 x half> %src, %sp 100 ret <8 x half> %c 101} 102 103define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32_2(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { 104; CHECK-LABEL: vaddqr_v4f32_2: 105; CHECK: @ %bb.0: @ %entry 106; CHECK-NEXT: vmov r0, s4 107; CHECK-NEXT: vadd.f32 q0, q0, r0 108; CHECK-NEXT: bx lr 109entry: 110 %i = insertelement <4 x float> undef, float %src2, i32 0 111 %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 112 %c = fadd <4 x float> %sp, %src 113 ret <4 x float> %c 114} 115 116define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_2(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) { 117; CHECK-LABEL: vaddqr_v8f16_2: 118; CHECK: @ %bb.0: @ %entry 119; CHECK-NEXT: ldrh r0, [r0] 120; CHECK-NEXT: vadd.f16 q0, q0, r0 121; CHECK-NEXT: bx lr 122entry: 123 %src2 = load half, half *%src2p, align 2 124 %i = insertelement <8 x half> undef, half %src2, i32 0 125 %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 126 %c = fadd <8 x half> %sp, %src 127 ret <8 x half> %c 128} 129 130define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32_3(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { 131; CHECK-LABEL: vaddqr_v4f32_3: 132; CHECK: @ %bb.0: @ %entry 133; CHECK-NEXT: vmov r0, s4 134; CHECK-NEXT: vadd.f32 q0, q0, r0 135; CHECK-NEXT: bx lr 136entry: 137 %src2bc = bitcast float %src2 to i32 138 %i = insertelement <4 x i32> undef, i32 %src2bc, i32 0 139 %spbc = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 140 %sp = bitcast <4 x i32> %spbc to <4 x float> 141 %c = fadd <4 x float> %src, %sp 142 ret <4 x float> %c 143} 144 145define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_3(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) { 146; CHECK-LABEL: vaddqr_v8f16_3: 147; CHECK: @ %bb.0: @ %entry 148; CHECK-NEXT: ldrh r0, [r0] 149; CHECK-NEXT: vadd.f16 q0, q0, r0 150; CHECK-NEXT: bx lr 151entry: 152 %src2 = load half, half *%src2p, align 2 153 %src2bc = bitcast half %src2 to i16 154 %i = insertelement <8 x i16> undef, i16 %src2bc, i32 0 155 %spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 156 %sp = bitcast <8 x i16> %spbc to <8 x half> 157 %c = fadd <8 x half> %src, %sp 158 ret <8 x half> %c 159} 160 161define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32_4(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { 162; CHECK-LABEL: vaddqr_v4f32_4: 163; CHECK: @ %bb.0: @ %entry 164; CHECK-NEXT: vmov r0, s4 165; CHECK-NEXT: vadd.f32 q0, q0, r0 166; CHECK-NEXT: bx lr 167entry: 168 %src2bc = bitcast float %src2 to i32 169 %i = insertelement <4 x i32> undef, i32 %src2bc, i32 0 170 %spbc = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 171 %sp = bitcast <4 x i32> %spbc to <4 x float> 172 %c = fadd <4 x float> %sp, %src 173 ret <4 x float> %c 174} 175 176define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_4(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) { 177; CHECK-LABEL: vaddqr_v8f16_4: 178; CHECK: @ %bb.0: @ %entry 179; CHECK-NEXT: ldrh r0, [r0] 180; CHECK-NEXT: vadd.f16 q0, q0, r0 181; CHECK-NEXT: bx lr 182entry: 183 %src2 = load half, half *%src2p, align 2 184 %src2bc = bitcast half %src2 to i16 185 %i = insertelement <8 x i16> undef, i16 %src2bc, i32 0 186 %spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 187 %sp = bitcast <8 x i16> %spbc to <8 x half> 188 %c = fadd <8 x half> %sp, %src 189 ret <8 x half> %c 190} 191