1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s 3 4declare half @llvm.aarch64.neon.fmulx.f16(half, half) 5declare <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half>, <4 x half>) 6declare <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half>, <8 x half>) 7declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) 8declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) 9declare half @llvm.fma.f16(half, half, half) #1 10 11define dso_local <4 x half> @t_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) { 12; CHECK-LABEL: t_vfma_lane_f16: 13; CHECK-NEXT: .cfi_startproc 14; CHECK-NEXT: // %bb.0: // %entry 15; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 16; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0] 17; CHECK-NEXT: ret 18entry: 19 %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer 20 %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %lane1, <4 x half> %a) 21 ret <4 x half> %fmla3 22} 23 24define dso_local <8 x half> @t_vfmaq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) { 25; CHECK-LABEL: t_vfmaq_lane_f16: 26; CHECK-NEXT: .cfi_startproc 27; CHECK-NEXT: // %bb.0: // %entry 28; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 29; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0] 30; CHECK-NEXT: ret 31entry: 32 %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer 33 %fmla3 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %lane1, <8 x half> %a) 34 ret <8 x half> %fmla3 35} 36 37define dso_local <4 x half> @t_vfma_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) { 38; CHECK-LABEL: t_vfma_laneq_f16: 39; CHECK-NEXT: .cfi_startproc 40; CHECK-NEXT: // %bb.0: // %entry 41; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0] 42; CHECK-NEXT: ret 43entry: 44 %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer 45 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %lane1, <4 x half> %b, <4 x half> %a) 46 ret <4 x half> %0 47} 48 49define dso_local <8 x half> @t_vfmaq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) { 50; CHECK-LABEL: t_vfmaq_laneq_f16: 51; CHECK-NEXT: .cfi_startproc 52; CHECK-NEXT: // %bb.0: // %entry 53; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0] 54; CHECK-NEXT: ret 55entry: 56 %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer 57 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %lane1, <8 x half> %b, <8 x half> %a) 58 ret <8 x half> %0 59} 60 61define dso_local <4 x half> @t_vfma_n_f16(<4 x half> %a, <4 x half> %b, half %c) { 62; CHECK-LABEL: t_vfma_n_f16: 63; CHECK-NEXT: .cfi_startproc 64; CHECK-NEXT: // %bb.0: // %entry 65; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2 66; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0] 67; CHECK-NEXT: ret 68entry: 69 %vecinit = insertelement <4 x half> undef, half %c, i32 0 70 %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer 71 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %vecinit3, <4 x half> %a) #4 72 ret <4 x half> %0 73} 74 75define dso_local <8 x half> @t_vfmaq_n_f16(<8 x half> %a, <8 x half> %b, half %c) { 76; CHECK-LABEL: t_vfmaq_n_f16: 77; CHECK-NEXT: .cfi_startproc 78; CHECK-NEXT: // %bb.0: // %entry 79; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2 80; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0] 81; CHECK-NEXT: ret 82entry: 83 %vecinit = insertelement <8 x half> undef, half %c, i32 0 84 %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer 85 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %vecinit7, <8 x half> %a) #4 86 ret <8 x half> %0 87} 88 89define dso_local half @t_vfmah_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) { 90; CHECK-LABEL: t_vfmah_lane_f16: 91; CHECK-NEXT: .cfi_startproc 92; CHECK-NEXT: // %bb.0: // %entry 93; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 94; CHECK-NEXT: fmla h0, h1, v2.h[0] 95; CHECK-NEXT: ret 96entry: 97 %extract = extractelement <4 x half> %c, i32 0 98 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) 99 ret half %0 100} 101 102define dso_local half @t_vfmah_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) { 103; CHECK-LABEL: t_vfmah_laneq_f16: 104; CHECK-NEXT: .cfi_startproc 105; CHECK-NEXT: // %bb.0: // %entry 106; CHECK-NEXT: fmla h0, h1, v2.h[0] 107; CHECK-NEXT: ret 108entry: 109 %extract = extractelement <8 x half> %c, i32 0 110 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) 111 ret half %0 112} 113 114define dso_local <4 x half> @t_vfms_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) { 115; CHECK-LABEL: t_vfms_lane_f16: 116; CHECK-NEXT: .cfi_startproc 117; CHECK-NEXT: // %bb.0: // %entry 118; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 119; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0] 120; CHECK-NEXT: ret 121entry: 122 %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b 123 %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer 124 %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub, <4 x half> %lane1, <4 x half> %a) 125 ret <4 x half> %fmla3 126} 127 128define dso_local <8 x half> @t_vfmsq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) { 129; CHECK-LABEL: t_vfmsq_lane_f16: 130; CHECK-NEXT: .cfi_startproc 131; CHECK-NEXT: // %bb.0: // %entry 132; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 133; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0] 134; CHECK-NEXT: ret 135entry: 136 %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b 137 %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer 138 %fmla3 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub, <8 x half> %lane1, <8 x half> %a) 139 ret <8 x half> %fmla3 140} 141 142define dso_local <4 x half> @t_vfms_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) { 143; CHECK-LABEL: t_vfms_laneq_f16: 144; CHECK-NEXT: .cfi_startproc 145; CHECK-NEXT: // %bb.0: // %entry 146; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0] 147; CHECK-NEXT: ret 148entry: 149 %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b 150 %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer 151 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %lane1, <4 x half> %sub, <4 x half> %a) 152 ret <4 x half> %0 153} 154 155define dso_local <8 x half> @t_vfmsq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) { 156; CHECK-LABEL: t_vfmsq_laneq_f16: 157; CHECK-NEXT: .cfi_startproc 158; CHECK-NEXT: // %bb.0: // %entry 159; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0] 160; CHECK-NEXT: ret 161entry: 162 %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b 163 %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer 164 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %lane1, <8 x half> %sub, <8 x half> %a) 165 ret <8 x half> %0 166} 167 168define dso_local <4 x half> @t_vfms_n_f16(<4 x half> %a, <4 x half> %b, half %c) { 169; CHECK-LABEL: t_vfms_n_f16: 170; CHECK-NEXT: .cfi_startproc 171; CHECK-NEXT: // %bb.0: // %entry 172; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2 173; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0] 174; CHECK-NEXT: ret 175entry: 176 %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b 177 %vecinit = insertelement <4 x half> undef, half %c, i32 0 178 %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer 179 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub, <4 x half> %vecinit3, <4 x half> %a) #4 180 ret <4 x half> %0 181} 182 183define dso_local <8 x half> @t_vfmsq_n_f16(<8 x half> %a, <8 x half> %b, half %c) { 184; CHECK-LABEL: t_vfmsq_n_f16: 185; CHECK-NEXT: .cfi_startproc 186; CHECK-NEXT: // %bb.0: // %entry 187; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2 188; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0] 189; CHECK-NEXT: ret 190entry: 191 %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b 192 %vecinit = insertelement <8 x half> undef, half %c, i32 0 193 %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer 194 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub, <8 x half> %vecinit7, <8 x half> %a) #4 195 ret <8 x half> %0 196} 197 198define dso_local half @t_vfmsh_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) { 199; CHECK-LABEL: t_vfmsh_lane_f16: 200; CHECK-NEXT: .cfi_startproc 201; CHECK-NEXT: // %bb.0: // %entry 202; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 203; CHECK-NEXT: fmls h0, h1, v2.h[0] 204; CHECK-NEXT: ret 205entry: 206 %0 = fsub half 0xH8000, %b 207 %extract = extractelement <4 x half> %c, i32 0 208 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a) 209 ret half %1 210} 211 212define dso_local half @t_vfmsh_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) { 213; CHECK-LABEL: t_vfmsh_laneq_f16: 214; CHECK-NEXT: .cfi_startproc 215; CHECK-NEXT: // %bb.0: // %entry 216; CHECK-NEXT: fmls h0, h1, v2.h[0] 217; CHECK-NEXT: ret 218entry: 219 %0 = fsub half 0xH8000, %b 220 %extract = extractelement <8 x half> %c, i32 0 221 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a) 222 ret half %1 223} 224 225define dso_local <4 x half> @t_vmul_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) { 226; CHECK-LABEL: t_vmul_laneq_f16: 227; CHECK-NEXT: .cfi_startproc 228; CHECK-NEXT: // %bb.0: // %entry 229; CHECK-NEXT: fmul v0.4h, v0.4h, v1.h[0] 230; CHECK-NEXT: ret 231entry: 232 %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer 233 %mul = fmul <4 x half> %shuffle, %a 234 ret <4 x half> %mul 235} 236 237define dso_local <8 x half> @t_vmulq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) { 238; CHECK-LABEL: t_vmulq_laneq_f16: 239; CHECK-NEXT: .cfi_startproc 240; CHECK-NEXT: // %bb.0: // %entry 241; CHECK-NEXT: fmul v0.8h, v0.8h, v1.h[0] 242; CHECK-NEXT: ret 243entry: 244 %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer 245 %mul = fmul <8 x half> %shuffle, %a 246 ret <8 x half> %mul 247} 248 249define dso_local half @t_vmulh_lane_f16(half %a, <4 x half> %c, i32 %lane) { 250; CHECK-LABEL: t_vmulh_lane_f16: 251; CHECK-NEXT: .cfi_startproc 252; CHECK-NEXT: // %bb.0: // %entry 253; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 254; CHECK-NEXT: fmul h0, h0, v1.h[0] 255; CHECK-NEXT: ret 256entry: 257 %0 = extractelement <4 x half> %c, i32 0 258 %1 = fmul half %0, %a 259 ret half %1 260} 261 262define dso_local half @t_vmulh_laneq_f16(half %a, <8 x half> %c, i32 %lane) { 263; CHECK-LABEL: t_vmulh_laneq_f16: 264; CHECK-NEXT: .cfi_startproc 265; CHECK-NEXT: // %bb.0: // %entry 266; CHECK-NEXT: fmul h0, h0, v1.h[0] 267; CHECK-NEXT: ret 268entry: 269 %0 = extractelement <8 x half> %c, i32 0 270 %1 = fmul half %0, %a 271 ret half %1 272} 273 274define dso_local half @t_vmulx_f16(half %a, half %b) { 275; CHECK-LABEL: t_vmulx_f16: 276; CHECK-NEXT: .cfi_startproc 277; CHECK-NEXT: // %bb.0: // %entry 278; CHECK-NEXT: fmulx h0, h0, h1 279; CHECK-NEXT: ret 280entry: 281 %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %b) 282 ret half %fmulx.i 283} 284 285define dso_local half @t_vmulxh_lane_f16(half %a, <4 x half> %b, i32 %lane) { 286; CHECK-LABEL: t_vmulxh_lane_f16: 287; CHECK-NEXT: .cfi_startproc 288; CHECK-NEXT: // %bb.0: // %entry 289; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 290; CHECK-NEXT: fmulx h0, h0, v1.h[3] 291; CHECK-NEXT: ret 292entry: 293 %extract = extractelement <4 x half> %b, i32 3 294 %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract) 295 ret half %fmulx.i 296} 297 298define dso_local <4 x half> @t_vmulx_lane_f16(<4 x half> %a, <4 x half> %b, i32 %lane) { 299; CHECK-LABEL: t_vmulx_lane_f16: 300; CHECK-NEXT: .cfi_startproc 301; CHECK-NEXT: // %bb.0: // %entry 302; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 303; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.h[0] 304; CHECK-NEXT: ret 305entry: 306 %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> zeroinitializer 307 %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %shuffle) #4 308 ret <4 x half> %vmulx2.i 309} 310 311define dso_local <8 x half> @t_vmulxq_lane_f16(<8 x half> %a, <4 x half> %b, i32 %lane) { 312; CHECK-LABEL: t_vmulxq_lane_f16: 313; CHECK-NEXT: .cfi_startproc 314; CHECK-NEXT: // %bb.0: // %entry 315; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 316; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.h[0] 317; CHECK-NEXT: ret 318entry: 319 %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> zeroinitializer 320 %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %shuffle) #4 321 ret <8 x half> %vmulx2.i 322} 323 324define dso_local <4 x half> @t_vmulx_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) { 325; CHECK-LABEL: t_vmulx_laneq_f16: 326; CHECK-NEXT: .cfi_startproc 327; CHECK-NEXT: // %bb.0: // %entry 328; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.h[0] 329; CHECK-NEXT: ret 330entry: 331 %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer 332 %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %shuffle) #4 333 ret <4 x half> %vmulx2.i 334} 335 336define dso_local <8 x half> @t_vmulxq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) { 337; CHECK-LABEL: t_vmulxq_laneq_f16: 338; CHECK-NEXT: .cfi_startproc 339; CHECK-NEXT: // %bb.0: // %entry 340; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.h[0] 341; CHECK-NEXT: ret 342entry: 343 %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer 344 %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %shuffle) #4 345 ret <8 x half> %vmulx2.i 346} 347 348define dso_local half @t_vmulxh_laneq_f16(half %a, <8 x half> %b, i32 %lane) { 349; CHECK-LABEL: t_vmulxh_laneq_f16: 350; CHECK-NEXT: .cfi_startproc 351; CHECK-NEXT: // %bb.0: // %entry 352; CHECK-NEXT: fmulx h0, h0, v1.h[7] 353; CHECK-NEXT: ret 354entry: 355 %extract = extractelement <8 x half> %b, i32 7 356 %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract) 357 ret half %fmulx.i 358} 359 360define dso_local <4 x half> @t_vmulx_n_f16(<4 x half> %a, half %c) { 361; CHECK-LABEL: t_vmulx_n_f16: 362; CHECK-NEXT: .cfi_startproc 363; CHECK-NEXT: // %bb.0: // %entry 364; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1 365; CHECK-NEXT: dup v1.4h, v1.h[0] 366; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.4h 367; CHECK-NEXT: ret 368entry: 369 %vecinit = insertelement <4 x half> undef, half %c, i32 0 370 %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer 371 %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %vecinit3) #4 372 ret <4 x half> %vmulx2.i 373} 374 375define dso_local <8 x half> @t_vmulxq_n_f16(<8 x half> %a, half %c) { 376; CHECK-LABEL: t_vmulxq_n_f16: 377; CHECK-NEXT: .cfi_startproc 378; CHECK-NEXT: // %bb.0: // %entry 379; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1 380; CHECK-NEXT: dup v1.8h, v1.h[0] 381; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.8h 382; CHECK-NEXT: ret 383entry: 384 %vecinit = insertelement <8 x half> undef, half %c, i32 0 385 %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer 386 %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %vecinit7) #4 387 ret <8 x half> %vmulx2.i 388} 389 390define dso_local half @t_vfmah_lane3_f16(half %a, half %b, <4 x half> %c) { 391; CHECK-LABEL: t_vfmah_lane3_f16: 392; CHECK-NEXT: .cfi_startproc 393; CHECK-NEXT: // %bb.0: // %entry 394; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 395; CHECK-NEXT: fmla h0, h1, v2.h[3] 396; CHECK-NEXT: ret 397entry: 398 %extract = extractelement <4 x half> %c, i32 3 399 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) 400 ret half %0 401} 402 403define dso_local half @t_vfmah_laneq7_f16(half %a, half %b, <8 x half> %c) { 404; CHECK-LABEL: t_vfmah_laneq7_f16: 405; CHECK-NEXT: .cfi_startproc 406; CHECK-NEXT: // %bb.0: // %entry 407; CHECK-NEXT: fmla h0, h1, v2.h[7] 408; CHECK-NEXT: ret 409entry: 410 %extract = extractelement <8 x half> %c, i32 7 411 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) 412 ret half %0 413} 414 415define dso_local half @t_vfmsh_lane3_f16(half %a, half %b, <4 x half> %c) { 416; CHECK-LABEL: t_vfmsh_lane3_f16: 417; CHECK-NEXT: .cfi_startproc 418; CHECK-NEXT: // %bb.0: // %entry 419; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 420; CHECK-NEXT: fmls h0, h1, v2.h[3] 421; CHECK-NEXT: ret 422entry: 423 %0 = fsub half 0xH8000, %b 424 %extract = extractelement <4 x half> %c, i32 3 425 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a) 426 ret half %1 427} 428 429define dso_local half @t_vfmsh_laneq7_f16(half %a, half %b, <8 x half> %c) { 430; CHECK-LABEL: t_vfmsh_laneq7_f16: 431; CHECK-NEXT: .cfi_startproc 432; CHECK-NEXT: // %bb.0: // %entry 433; CHECK-NEXT: fmls h0, h1, v2.h[7] 434; CHECK-NEXT: ret 435entry: 436 %0 = fsub half 0xH8000, %b 437 %extract = extractelement <8 x half> %c, i32 7 438 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a) 439 ret half %1 440} 441 442define dso_local half @t_fadd_vfmah_f16(half %a, half %b, <4 x half> %c, <4 x half> %d) { 443; CHECK-LABEL: t_fadd_vfmah_f16: 444; CHECK-NEXT: .cfi_startproc 445; CHECK-NEXT: // %bb.0: // %entry 446; CHECK-NEXT: fadd v2.4h, v2.4h, v3.4h 447; CHECK-NEXT: fmla h0, h1, v2.h[3] 448; CHECK-NEXT: ret 449entry: 450 %0 = fadd <4 x half> %c, %d 451 %extract = extractelement <4 x half> %0, i32 3 452 %1 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) 453 ret half %1 454} 455