1; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s 2; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma | FileCheck %s 3; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s 4 5attributes #0 = { nounwind } 6 7declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 8define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 9; CHECK-LABEL: test_x86_fmadd_baa_ss: 10; CHECK: # BB#0: 11; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 12; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 13; CHECK-NEXT: vfmadd213ss %xmm1, %xmm1, %xmm0 14; CHECK-NEXT: retq 15 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 16 ret <4 x float> %res 17} 18 19define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 20; CHECK-LABEL: test_x86_fmadd_aba_ss: 21; CHECK: # BB#0: 22; CHECK-NEXT: vmovaps (%rcx), %xmm0 23; CHECK-NEXT: vfmadd132ss (%rdx), %xmm0, %xmm0 24; CHECK-NEXT: retq 25 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 26 ret <4 x float> %res 27} 28 29define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 30; CHECK-LABEL: test_x86_fmadd_bba_ss: 31; CHECK: # BB#0: 32; CHECK-NEXT: vmovaps (%rdx), %xmm0 33; CHECK-NEXT: vfmadd213ss (%rcx), %xmm0, %xmm0 34; CHECK-NEXT: retq 35 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 36 ret <4 x float> %res 37} 38 39declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 40define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 41; CHECK-LABEL: test_x86_fmadd_baa_ps: 42; CHECK: # BB#0: 43; CHECK-NEXT: vmovaps (%rcx), %xmm0 44; CHECK-NEXT: vfmadd132ps (%rdx), %xmm0, %xmm0 45; CHECK-NEXT: retq 46 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 47 ret <4 x float> %res 48} 49 50define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 51; CHECK-LABEL: test_x86_fmadd_aba_ps: 52; CHECK: # BB#0: 53; CHECK-NEXT: vmovaps (%rcx), %xmm0 54; CHECK-NEXT: vfmadd231ps (%rdx), %xmm0, %xmm0 55; CHECK-NEXT: retq 56 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 57 ret <4 x float> %res 58} 59 60define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 61; CHECK-LABEL: test_x86_fmadd_bba_ps: 62; CHECK: # BB#0: 63; CHECK-NEXT: vmovaps (%rdx), %xmm0 64; CHECK-NEXT: vfmadd213ps (%rcx), %xmm0, %xmm0 65; CHECK-NEXT: retq 66 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 67 ret <4 x float> %res 68} 69 70declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 71define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 72; CHECK-LABEL: test_x86_fmadd_baa_ps_y: 73; CHECK: # BB#0: 74; CHECK-NEXT: vmovaps (%rcx), %ymm0 75; CHECK-NEXT: vfmadd132ps (%rdx), %ymm0, %ymm0 76; CHECK-NEXT: retq 77 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 78 ret <8 x float> %res 79} 80 81define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 82; CHECK-LABEL: test_x86_fmadd_aba_ps_y: 83; CHECK: # BB#0: 84; CHECK-NEXT: vmovaps (%rcx), %ymm0 85; CHECK-NEXT: vfmadd231ps (%rdx), %ymm0, %ymm0 86; CHECK-NEXT: retq 87 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 88 ret <8 x float> %res 89} 90 91define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 92; CHECK-LABEL: test_x86_fmadd_bba_ps_y: 93; CHECK: # BB#0: 94; CHECK-NEXT: vmovaps (%rdx), %ymm0 95; CHECK-NEXT: vfmadd213ps (%rcx), %ymm0, %ymm0 96; CHECK-NEXT: retq 97 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 98 ret <8 x float> %res 99} 100 101declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 102define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 103; CHECK-LABEL: test_x86_fmadd_baa_sd: 104; CHECK: # BB#0: 105; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 106; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 107; CHECK-NEXT: vfmadd213sd %xmm1, %xmm1, %xmm0 108; CHECK-NEXT: retq 109 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 110 ret <2 x double> %res 111} 112 113define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 114; CHECK-LABEL: test_x86_fmadd_aba_sd: 115; CHECK: # BB#0: 116; CHECK-NEXT: vmovapd (%rcx), %xmm0 117; CHECK-NEXT: vfmadd132sd (%rdx), %xmm0, %xmm0 118; CHECK-NEXT: retq 119 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 120 ret <2 x double> %res 121} 122 123define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 124; CHECK-LABEL: test_x86_fmadd_bba_sd: 125; CHECK: # BB#0: 126; CHECK-NEXT: vmovapd (%rdx), %xmm0 127; CHECK-NEXT: vfmadd213sd (%rcx), %xmm0, %xmm0 128; CHECK-NEXT: retq 129 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 130 ret <2 x double> %res 131} 132 133declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 134define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 135; CHECK-LABEL: test_x86_fmadd_baa_pd: 136; CHECK: # BB#0: 137; CHECK-NEXT: vmovapd (%rcx), %xmm0 138; CHECK-NEXT: vfmadd132pd (%rdx), %xmm0, %xmm0 139; CHECK-NEXT: retq 140 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 141 ret <2 x double> %res 142} 143 144define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 145; CHECK-LABEL: test_x86_fmadd_aba_pd: 146; CHECK: # BB#0: 147; CHECK-NEXT: vmovapd (%rcx), %xmm0 148; CHECK-NEXT: vfmadd231pd (%rdx), %xmm0, %xmm0 149; CHECK-NEXT: retq 150 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 151 ret <2 x double> %res 152} 153 154define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 155; CHECK-LABEL: test_x86_fmadd_bba_pd: 156; CHECK: # BB#0: 157; CHECK-NEXT: vmovapd (%rdx), %xmm0 158; CHECK-NEXT: vfmadd213pd (%rcx), %xmm0, %xmm0 159; CHECK-NEXT: retq 160 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 161 ret <2 x double> %res 162} 163 164declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 165define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 166; CHECK-LABEL: test_x86_fmadd_baa_pd_y: 167; CHECK: # BB#0: 168; CHECK-NEXT: vmovapd (%rcx), %ymm0 169; CHECK-NEXT: vfmadd132pd (%rdx), %ymm0, %ymm0 170; CHECK-NEXT: retq 171 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 172 ret <4 x double> %res 173} 174 175define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 176; CHECK-LABEL: test_x86_fmadd_aba_pd_y: 177; CHECK: # BB#0: 178; CHECK-NEXT: vmovapd (%rcx), %ymm0 179; CHECK-NEXT: vfmadd231pd (%rdx), %ymm0, %ymm0 180; CHECK-NEXT: retq 181 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 182 ret <4 x double> %res 183} 184 185define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 186; CHECK-LABEL: test_x86_fmadd_bba_pd_y: 187; CHECK: # BB#0: 188; CHECK-NEXT: vmovapd (%rdx), %ymm0 189; CHECK-NEXT: vfmadd213pd (%rcx), %ymm0, %ymm0 190; CHECK-NEXT: retq 191 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 192 ret <4 x double> %res 193} 194 195 196declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 197define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 198; CHECK-LABEL: test_x86_fnmadd_baa_ss: 199; CHECK: # BB#0: 200; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 201; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 202; CHECK-NEXT: vfnmadd213ss %xmm1, %xmm1, %xmm0 203; CHECK-NEXT: retq 204 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 205 ret <4 x float> %res 206} 207 208define <4 x float> @test_x86_fnmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 209; CHECK-LABEL: test_x86_fnmadd_aba_ss: 210; CHECK: # BB#0: 211; CHECK-NEXT: vmovaps (%rcx), %xmm0 212; CHECK-NEXT: vfnmadd132ss (%rdx), %xmm0, %xmm0 213; CHECK-NEXT: retq 214 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 215 ret <4 x float> %res 216} 217 218define <4 x float> @test_x86_fnmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 219; CHECK-LABEL: test_x86_fnmadd_bba_ss: 220; CHECK: # BB#0: 221; CHECK-NEXT: vmovaps (%rdx), %xmm0 222; CHECK-NEXT: vfnmadd213ss (%rcx), %xmm0, %xmm0 223; CHECK-NEXT: retq 224 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 225 ret <4 x float> %res 226} 227 228declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 229define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 230; CHECK-LABEL: test_x86_fnmadd_baa_ps: 231; CHECK: # BB#0: 232; CHECK-NEXT: vmovaps (%rcx), %xmm0 233; CHECK-NEXT: vfnmadd132ps (%rdx), %xmm0, %xmm0 234; CHECK-NEXT: retq 235 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 236 ret <4 x float> %res 237} 238 239define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 240; CHECK-LABEL: test_x86_fnmadd_aba_ps: 241; CHECK: # BB#0: 242; CHECK-NEXT: vmovaps (%rcx), %xmm0 243; CHECK-NEXT: vfnmadd231ps (%rdx), %xmm0, %xmm0 244; CHECK-NEXT: retq 245 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 246 ret <4 x float> %res 247} 248 249define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 250; CHECK-LABEL: test_x86_fnmadd_bba_ps: 251; CHECK: # BB#0: 252; CHECK-NEXT: vmovaps (%rdx), %xmm0 253; CHECK-NEXT: vfnmadd213ps (%rcx), %xmm0, %xmm0 254; CHECK-NEXT: retq 255 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 256 ret <4 x float> %res 257} 258 259declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 260define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 261; CHECK-LABEL: test_x86_fnmadd_baa_ps_y: 262; CHECK: # BB#0: 263; CHECK-NEXT: vmovaps (%rcx), %ymm0 264; CHECK-NEXT: vfnmadd132ps (%rdx), %ymm0, %ymm0 265; CHECK-NEXT: retq 266 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 267 ret <8 x float> %res 268} 269 270define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 271; CHECK-LABEL: test_x86_fnmadd_aba_ps_y: 272; CHECK: # BB#0: 273; CHECK-NEXT: vmovaps (%rcx), %ymm0 274; CHECK-NEXT: vfnmadd231ps (%rdx), %ymm0, %ymm0 275; CHECK-NEXT: retq 276 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 277 ret <8 x float> %res 278} 279 280define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 281; CHECK-LABEL: test_x86_fnmadd_bba_ps_y: 282; CHECK: # BB#0: 283; CHECK-NEXT: vmovaps (%rdx), %ymm0 284; CHECK-NEXT: vfnmadd213ps (%rcx), %ymm0, %ymm0 285; CHECK-NEXT: retq 286 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 287 ret <8 x float> %res 288} 289 290declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 291define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 292; CHECK-LABEL: test_x86_fnmadd_baa_sd: 293; CHECK: # BB#0: 294; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 295; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 296; CHECK-NEXT: vfnmadd213sd %xmm1, %xmm1, %xmm0 297; CHECK-NEXT: retq 298 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 299 ret <2 x double> %res 300} 301 302define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 303; CHECK-LABEL: test_x86_fnmadd_aba_sd: 304; CHECK: # BB#0: 305; CHECK-NEXT: vmovapd (%rcx), %xmm0 306; CHECK-NEXT: vfnmadd132sd (%rdx), %xmm0, %xmm0 307; CHECK-NEXT: retq 308 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 309 ret <2 x double> %res 310} 311 312define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 313; CHECK-LABEL: test_x86_fnmadd_bba_sd: 314; CHECK: # BB#0: 315; CHECK-NEXT: vmovapd (%rdx), %xmm0 316; CHECK-NEXT: vfnmadd213sd (%rcx), %xmm0, %xmm0 317; CHECK-NEXT: retq 318 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 319 ret <2 x double> %res 320} 321 322declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 323define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 324; CHECK-LABEL: test_x86_fnmadd_baa_pd: 325; CHECK: # BB#0: 326; CHECK-NEXT: vmovapd (%rcx), %xmm0 327; CHECK-NEXT: vfnmadd132pd (%rdx), %xmm0, %xmm0 328; CHECK-NEXT: retq 329 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 330 ret <2 x double> %res 331} 332 333define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 334; CHECK-LABEL: test_x86_fnmadd_aba_pd: 335; CHECK: # BB#0: 336; CHECK-NEXT: vmovapd (%rcx), %xmm0 337; CHECK-NEXT: vfnmadd231pd (%rdx), %xmm0, %xmm0 338; CHECK-NEXT: retq 339 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 340 ret <2 x double> %res 341} 342 343define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 344; CHECK-LABEL: test_x86_fnmadd_bba_pd: 345; CHECK: # BB#0: 346; CHECK-NEXT: vmovapd (%rdx), %xmm0 347; CHECK-NEXT: vfnmadd213pd (%rcx), %xmm0, %xmm0 348; CHECK-NEXT: retq 349 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 350 ret <2 x double> %res 351} 352 353declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 354define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 355; CHECK-LABEL: test_x86_fnmadd_baa_pd_y: 356; CHECK: # BB#0: 357; CHECK-NEXT: vmovapd (%rcx), %ymm0 358; CHECK-NEXT: vfnmadd132pd (%rdx), %ymm0, %ymm0 359; CHECK-NEXT: retq 360 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 361 ret <4 x double> %res 362} 363 364define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 365; CHECK-LABEL: test_x86_fnmadd_aba_pd_y: 366; CHECK: # BB#0: 367; CHECK-NEXT: vmovapd (%rcx), %ymm0 368; CHECK-NEXT: vfnmadd231pd (%rdx), %ymm0, %ymm0 369; CHECK-NEXT: retq 370 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 371 ret <4 x double> %res 372} 373 374define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 375; CHECK-LABEL: test_x86_fnmadd_bba_pd_y: 376; CHECK: # BB#0: 377; CHECK-NEXT: vmovapd (%rdx), %ymm0 378; CHECK-NEXT: vfnmadd213pd (%rcx), %ymm0, %ymm0 379; CHECK-NEXT: retq 380 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 381 ret <4 x double> %res 382} 383 384 385declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 386define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 387; CHECK-LABEL: test_x86_fmsub_baa_ss: 388; CHECK: # BB#0: 389; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 390; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 391; CHECK-NEXT: vfmsub213ss %xmm1, %xmm1, %xmm0 392; CHECK-NEXT: retq 393 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 394 ret <4 x float> %res 395} 396 397define <4 x float> @test_x86_fmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 398; CHECK-LABEL: test_x86_fmsub_aba_ss: 399; CHECK: # BB#0: 400; CHECK-NEXT: vmovaps (%rcx), %xmm0 401; CHECK-NEXT: vfmsub132ss (%rdx), %xmm0, %xmm0 402; CHECK-NEXT: retq 403 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 404 ret <4 x float> %res 405} 406 407define <4 x float> @test_x86_fmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 408; CHECK-LABEL: test_x86_fmsub_bba_ss: 409; CHECK: # BB#0: 410; CHECK-NEXT: vmovaps (%rdx), %xmm0 411; CHECK-NEXT: vfmsub213ss (%rcx), %xmm0, %xmm0 412; CHECK-NEXT: retq 413 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 414 ret <4 x float> %res 415} 416 417declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 418define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 419; CHECK-LABEL: test_x86_fmsub_baa_ps: 420; CHECK: # BB#0: 421; CHECK-NEXT: vmovaps (%rcx), %xmm0 422; CHECK-NEXT: vfmsub132ps (%rdx), %xmm0, %xmm0 423; CHECK-NEXT: retq 424 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 425 ret <4 x float> %res 426} 427 428define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 429; CHECK-LABEL: test_x86_fmsub_aba_ps: 430; CHECK: # BB#0: 431; CHECK-NEXT: vmovaps (%rcx), %xmm0 432; CHECK-NEXT: vfmsub231ps (%rdx), %xmm0, %xmm0 433; CHECK-NEXT: retq 434 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 435 ret <4 x float> %res 436} 437 438define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 439; CHECK-LABEL: test_x86_fmsub_bba_ps: 440; CHECK: # BB#0: 441; CHECK-NEXT: vmovaps (%rdx), %xmm0 442; CHECK-NEXT: vfmsub213ps (%rcx), %xmm0, %xmm0 443; CHECK-NEXT: retq 444 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 445 ret <4 x float> %res 446} 447 448declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 449define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 450; CHECK-LABEL: test_x86_fmsub_baa_ps_y: 451; CHECK: # BB#0: 452; CHECK-NEXT: vmovaps (%rcx), %ymm0 453; CHECK-NEXT: vfmsub132ps (%rdx), %ymm0, %ymm0 454; CHECK-NEXT: retq 455 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 456 ret <8 x float> %res 457} 458 459define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 460; CHECK-LABEL: test_x86_fmsub_aba_ps_y: 461; CHECK: # BB#0: 462; CHECK-NEXT: vmovaps (%rcx), %ymm0 463; CHECK-NEXT: vfmsub231ps (%rdx), %ymm0, %ymm0 464; CHECK-NEXT: retq 465 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 466 ret <8 x float> %res 467} 468 469define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 470; CHECK-LABEL: test_x86_fmsub_bba_ps_y: 471; CHECK: # BB#0: 472; CHECK-NEXT: vmovaps (%rdx), %ymm0 473; CHECK-NEXT: vfmsub213ps (%rcx), %ymm0, %ymm0 474; CHECK-NEXT: retq 475 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 476 ret <8 x float> %res 477} 478 479declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 480define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 481; CHECK-LABEL: test_x86_fmsub_baa_sd: 482; CHECK: # BB#0: 483; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 484; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 485; CHECK-NEXT: vfmsub213sd %xmm1, %xmm1, %xmm0 486; CHECK-NEXT: retq 487 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 488 ret <2 x double> %res 489} 490 491define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 492; CHECK-LABEL: test_x86_fmsub_aba_sd: 493; CHECK: # BB#0: 494; CHECK-NEXT: vmovapd (%rcx), %xmm0 495; CHECK-NEXT: vfmsub132sd (%rdx), %xmm0, %xmm0 496; CHECK-NEXT: retq 497 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 498 ret <2 x double> %res 499} 500 501define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 502; CHECK-LABEL: test_x86_fmsub_bba_sd: 503; CHECK: # BB#0: 504; CHECK-NEXT: vmovapd (%rdx), %xmm0 505; CHECK-NEXT: vfmsub213sd (%rcx), %xmm0, %xmm0 506; CHECK-NEXT: retq 507 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 508 ret <2 x double> %res 509} 510 511declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 512define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 513; CHECK-LABEL: test_x86_fmsub_baa_pd: 514; CHECK: # BB#0: 515; CHECK-NEXT: vmovapd (%rcx), %xmm0 516; CHECK-NEXT: vfmsub132pd (%rdx), %xmm0, %xmm0 517; CHECK-NEXT: retq 518 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 519 ret <2 x double> %res 520} 521 522define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 523; CHECK-LABEL: test_x86_fmsub_aba_pd: 524; CHECK: # BB#0: 525; CHECK-NEXT: vmovapd (%rcx), %xmm0 526; CHECK-NEXT: vfmsub231pd (%rdx), %xmm0, %xmm0 527; CHECK-NEXT: retq 528 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 529 ret <2 x double> %res 530} 531 532define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 533; CHECK-LABEL: test_x86_fmsub_bba_pd: 534; CHECK: # BB#0: 535; CHECK-NEXT: vmovapd (%rdx), %xmm0 536; CHECK-NEXT: vfmsub213pd (%rcx), %xmm0, %xmm0 537; CHECK-NEXT: retq 538 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 539 ret <2 x double> %res 540} 541 542declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 543define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 544; CHECK-LABEL: test_x86_fmsub_baa_pd_y: 545; CHECK: # BB#0: 546; CHECK-NEXT: vmovapd (%rcx), %ymm0 547; CHECK-NEXT: vfmsub132pd (%rdx), %ymm0, %ymm0 548; CHECK-NEXT: retq 549 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 550 ret <4 x double> %res 551} 552 553define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 554; CHECK-LABEL: test_x86_fmsub_aba_pd_y: 555; CHECK: # BB#0: 556; CHECK-NEXT: vmovapd (%rcx), %ymm0 557; CHECK-NEXT: vfmsub231pd (%rdx), %ymm0, %ymm0 558; CHECK-NEXT: retq 559 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 560 ret <4 x double> %res 561} 562 563define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 564; CHECK-LABEL: test_x86_fmsub_bba_pd_y: 565; CHECK: # BB#0: 566; CHECK-NEXT: vmovapd (%rdx), %ymm0 567; CHECK-NEXT: vfmsub213pd (%rcx), %ymm0, %ymm0 568; CHECK-NEXT: retq 569 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 570 ret <4 x double> %res 571} 572 573 574declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 575define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 576; CHECK-LABEL: test_x86_fnmsub_baa_ss: 577; CHECK: # BB#0: 578; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 579; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 580; CHECK-NEXT: vfnmsub213ss %xmm1, %xmm1, %xmm0 581; CHECK-NEXT: retq 582 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 583 ret <4 x float> %res 584} 585 586define <4 x float> @test_x86_fnmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 587; CHECK-LABEL: test_x86_fnmsub_aba_ss: 588; CHECK: # BB#0: 589; CHECK-NEXT: vmovaps (%rcx), %xmm0 590; CHECK-NEXT: vfnmsub132ss (%rdx), %xmm0, %xmm0 591; CHECK-NEXT: retq 592 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 593 ret <4 x float> %res 594} 595 596define <4 x float> @test_x86_fnmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 597; CHECK-LABEL: test_x86_fnmsub_bba_ss: 598; CHECK: # BB#0: 599; CHECK-NEXT: vmovaps (%rdx), %xmm0 600; CHECK-NEXT: vfnmsub213ss (%rcx), %xmm0, %xmm0 601; CHECK-NEXT: retq 602 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 603 ret <4 x float> %res 604} 605 606declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 607define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 608; CHECK-LABEL: test_x86_fnmsub_baa_ps: 609; CHECK: # BB#0: 610; CHECK-NEXT: vmovaps (%rcx), %xmm0 611; CHECK-NEXT: vfnmsub132ps (%rdx), %xmm0, %xmm0 612; CHECK-NEXT: retq 613 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 614 ret <4 x float> %res 615} 616 617define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 618; CHECK-LABEL: test_x86_fnmsub_aba_ps: 619; CHECK: # BB#0: 620; CHECK-NEXT: vmovaps (%rcx), %xmm0 621; CHECK-NEXT: vfnmsub231ps (%rdx), %xmm0, %xmm0 622; CHECK-NEXT: retq 623 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 624 ret <4 x float> %res 625} 626 627define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 628; CHECK-LABEL: test_x86_fnmsub_bba_ps: 629; CHECK: # BB#0: 630; CHECK-NEXT: vmovaps (%rdx), %xmm0 631; CHECK-NEXT: vfnmsub213ps (%rcx), %xmm0, %xmm0 632; CHECK-NEXT: retq 633 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 634 ret <4 x float> %res 635} 636 637declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 638define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 639; CHECK-LABEL: test_x86_fnmsub_baa_ps_y: 640; CHECK: # BB#0: 641; CHECK-NEXT: vmovaps (%rcx), %ymm0 642; CHECK-NEXT: vfnmsub132ps (%rdx), %ymm0, %ymm0 643; CHECK-NEXT: retq 644 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 645 ret <8 x float> %res 646} 647 648define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 649; CHECK-LABEL: test_x86_fnmsub_aba_ps_y: 650; CHECK: # BB#0: 651; CHECK-NEXT: vmovaps (%rcx), %ymm0 652; CHECK-NEXT: vfnmsub231ps (%rdx), %ymm0, %ymm0 653; CHECK-NEXT: retq 654 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 655 ret <8 x float> %res 656} 657 658define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 659; CHECK-LABEL: test_x86_fnmsub_bba_ps_y: 660; CHECK: # BB#0: 661; CHECK-NEXT: vmovaps (%rdx), %ymm0 662; CHECK-NEXT: vfnmsub213ps (%rcx), %ymm0, %ymm0 663; CHECK-NEXT: retq 664 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 665 ret <8 x float> %res 666} 667 668declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 669define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 670; CHECK-LABEL: test_x86_fnmsub_baa_sd: 671; CHECK: # BB#0: 672; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 673; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 674; CHECK-NEXT: vfnmsub213sd %xmm1, %xmm1, %xmm0 675; CHECK-NEXT: retq 676 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 677 ret <2 x double> %res 678} 679 680define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 681; CHECK-LABEL: test_x86_fnmsub_aba_sd: 682; CHECK: # BB#0: 683; CHECK-NEXT: vmovapd (%rcx), %xmm0 684; CHECK-NEXT: vfnmsub132sd (%rdx), %xmm0, %xmm0 685; CHECK-NEXT: retq 686 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 687 ret <2 x double> %res 688} 689 690define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 691; CHECK-LABEL: test_x86_fnmsub_bba_sd: 692; CHECK: # BB#0: 693; CHECK-NEXT: vmovapd (%rdx), %xmm0 694; CHECK-NEXT: vfnmsub213sd (%rcx), %xmm0, %xmm0 695; CHECK-NEXT: retq 696 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 697 ret <2 x double> %res 698} 699 700declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 701define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 702; CHECK-LABEL: test_x86_fnmsub_baa_pd: 703; CHECK: # BB#0: 704; CHECK-NEXT: vmovapd (%rcx), %xmm0 705; CHECK-NEXT: vfnmsub132pd (%rdx), %xmm0, %xmm0 706; CHECK-NEXT: retq 707 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 708 ret <2 x double> %res 709} 710 711define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 712; CHECK-LABEL: test_x86_fnmsub_aba_pd: 713; CHECK: # BB#0: 714; CHECK-NEXT: vmovapd (%rcx), %xmm0 715; CHECK-NEXT: vfnmsub231pd (%rdx), %xmm0, %xmm0 716; CHECK-NEXT: retq 717 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 718 ret <2 x double> %res 719} 720 721define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 722; CHECK-LABEL: test_x86_fnmsub_bba_pd: 723; CHECK: # BB#0: 724; CHECK-NEXT: vmovapd (%rdx), %xmm0 725; CHECK-NEXT: vfnmsub213pd (%rcx), %xmm0, %xmm0 726; CHECK-NEXT: retq 727 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 728 ret <2 x double> %res 729} 730 731declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 732define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 733; CHECK-LABEL: test_x86_fnmsub_baa_pd_y: 734; CHECK: # BB#0: 735; CHECK-NEXT: vmovapd (%rcx), %ymm0 736; CHECK-NEXT: vfnmsub132pd (%rdx), %ymm0, %ymm0 737; CHECK-NEXT: retq 738 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 739 ret <4 x double> %res 740} 741 742define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 743; CHECK-LABEL: test_x86_fnmsub_aba_pd_y: 744; CHECK: # BB#0: 745; CHECK-NEXT: vmovapd (%rcx), %ymm0 746; CHECK-NEXT: vfnmsub231pd (%rdx), %ymm0, %ymm0 747; CHECK-NEXT: retq 748 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 749 ret <4 x double> %res 750} 751 752define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 753; CHECK-LABEL: test_x86_fnmsub_bba_pd_y: 754; CHECK: # BB#0: 755; CHECK-NEXT: vmovapd (%rdx), %ymm0 756; CHECK-NEXT: vfnmsub213pd (%rcx), %ymm0, %ymm0 757; CHECK-NEXT: retq 758 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 759 ret <4 x double> %res 760} 761 762