1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA 3; RUN: llc < %s -mtriple=x86_64-pc-windows -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-WIN 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA 7 8; VFMADD 9define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 10; CHECK-LABEL: test_x86_fma_vfmadd_ss: 11; CHECK-NEXT: # BB#0: 12; 13; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 14; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 15; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0 16; 17; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 18; 19; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 20; 21; CHECK-NEXT: retq 22 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 23 ret <4 x float> %res 24} 25 26define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 27; CHECK-LABEL: test_x86_fma_vfmadd_bac_ss: 28; CHECK-NEXT: # BB#0: 29; 30; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 31; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 32; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0 33; 34; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 35; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 36; 37; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0 38; CHECK-NEXT: retq 39 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) 40 ret <4 x float> %res 41} 42declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) 43 44define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 45; CHECK-LABEL: test_x86_fma_vfmadd_sd: 46; CHECK-NEXT: # BB#0: 47; 48; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 49; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 50; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0 51; 52; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 53; 54; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 55; 56; CHECK-NEXT: retq 57 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 58 ret <2 x double> %res 59} 60 61define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 62; CHECK-LABEL: test_x86_fma_vfmadd_bac_sd: 63; CHECK-NEXT: # BB#0: 64; 65; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 66; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 67; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0 68; 69; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 70; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 71; 72; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 73; 74; CHECK-NEXT: retq 75 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) 76 ret <2 x double> %res 77} 78declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) 79 80define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 81; CHECK-LABEL: test_x86_fma_vfmadd_ps: 82; CHECK-NEXT: # BB#0: 83; 84; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 85; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 86; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0 87; 88; CHECK-FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 89; 90; CHECK-FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 91; 92; CHECK-NEXT: retq 93 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 94 ret <4 x float> %res 95} 96declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) 97 98define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 99; CHECK-LABEL: test_x86_fma_vfmadd_pd: 100; CHECK-NEXT: # BB#0: 101; 102; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 103; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 104; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0 105; 106; CHECK-FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 107; 108; CHECK-FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 109; 110; CHECK-NEXT: retq 111 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 112 ret <2 x double> %res 113} 114declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) 115 116define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 117; CHECK-LABEL: test_x86_fma_vfmadd_ps_256: 118; CHECK-NEXT: # BB#0: 119; 120; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 121; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 122; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0 123; 124; CHECK-FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 125; 126; CHECK-FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 127; 128; CHECK-NEXT: retq 129 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 130 ret <8 x float> %res 131} 132declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 133 134define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 135; CHECK-LABEL: test_x86_fma_vfmadd_pd_256: 136; CHECK-NEXT: # BB#0: 137; 138; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 139; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 140; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0 141; 142; CHECK-FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 143; 144; CHECK-FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 145; 146; CHECK-NEXT: retq 147 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 148 ret <4 x double> %res 149} 150declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 151 152; VFMSUB 153define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 154; CHECK-LABEL: test_x86_fma_vfmsub_ss: 155; CHECK-NEXT: # BB#0: 156; 157; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 158; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 159; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0 160; 161; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 162; 163; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 164; 165; CHECK-NEXT: retq 166 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 167 ret <4 x float> %res 168} 169 170define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 171; CHECK-LABEL: test_x86_fma_vfmsub_bac_ss: 172; CHECK-NEXT: # BB#0: 173; 174; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 175; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 176; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0 177; 178; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 179; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 180; 181; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm0, %xmm1, %xmm0 182; 183; CHECK-NEXT: retq 184 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) 185 ret <4 x float> %res 186} 187declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) 188 189define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 190; CHECK-LABEL: test_x86_fma_vfmsub_sd: 191; CHECK-NEXT: # BB#0: 192; 193; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 194; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 195; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0 196; 197; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 198; 199; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 200; 201; CHECK-NEXT: retq 202 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 203 ret <2 x double> %res 204} 205 206define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 207; CHECK-LABEL: test_x86_fma_vfmsub_bac_sd: 208; CHECK-NEXT: # BB#0: 209; 210; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 211; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 212; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0 213; 214; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 215; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 216; 217; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm0, %xmm1, %xmm0 218; 219; CHECK-NEXT: retq 220 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) 221 ret <2 x double> %res 222} 223declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) 224 225define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 226; CHECK-LABEL: test_x86_fma_vfmsub_ps: 227; CHECK-NEXT: # BB#0: 228; 229; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 230; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 231; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0 232; 233; CHECK-FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 234; 235; CHECK-FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 236; 237; CHECK-NEXT: retq 238 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 239 ret <4 x float> %res 240} 241declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) 242 243define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 244; CHECK-LABEL: test_x86_fma_vfmsub_pd: 245; CHECK-NEXT: # BB#0: 246; 247; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 248; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 249; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0 250; 251; CHECK-FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 252; 253; CHECK-FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 254; 255; CHECK-NEXT: retq 256 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 257 ret <2 x double> %res 258} 259declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) 260 261define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 262; CHECK-LABEL: test_x86_fma_vfmsub_ps_256: 263; CHECK-NEXT: # BB#0: 264; 265; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 266; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 267; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0 268; 269; CHECK-FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 270; 271; CHECK-FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 272; 273; CHECK-NEXT: retq 274 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 275 ret <8 x float> %res 276} 277declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 278 279define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 280; CHECK-LABEL: test_x86_fma_vfmsub_pd_256: 281; CHECK-NEXT: # BB#0: 282; 283; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 284; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 285; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0 286; 287; CHECK-FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 288; 289; CHECK-FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 290; 291; CHECK-NEXT: retq 292 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 293 ret <4 x double> %res 294} 295declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 296 297; VFNMADD 298define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 299; CHECK-LABEL: test_x86_fma_vfnmadd_ss: 300; CHECK-NEXT: # BB#0: 301; 302; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 303; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 304; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0 305; 306; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 307; 308; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 309; 310; CHECK-NEXT: retq 311 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 312 ret <4 x float> %res 313} 314 315define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 316; CHECK-LABEL: test_x86_fma_vfnmadd_bac_ss: 317; CHECK-NEXT: # BB#0: 318; 319; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 320; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 321; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0 322; 323; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 324; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 325; 326; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm0, %xmm1, %xmm0 327; 328; CHECK-NEXT: retq 329 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) 330 ret <4 x float> %res 331} 332declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) 333 334define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 335; CHECK-LABEL: test_x86_fma_vfnmadd_sd: 336; CHECK-NEXT: # BB#0: 337; 338; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 339; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 340; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0 341; 342; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 343; 344; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 345; 346; CHECK-NEXT: retq 347 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 348 ret <2 x double> %res 349} 350 351define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 352; CHECK-LABEL: test_x86_fma_vfnmadd_bac_sd: 353; CHECK-NEXT: # BB#0: 354; 355; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 356; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 357; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0 358; 359; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 360; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 361; 362; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm0, %xmm1, %xmm0 363; 364; CHECK-NEXT: retq 365 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) 366 ret <2 x double> %res 367} 368declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) 369 370define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 371; CHECK-LABEL: test_x86_fma_vfnmadd_ps: 372; CHECK-NEXT: # BB#0: 373; 374; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 375; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 376; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0 377; 378; CHECK-FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 379; 380; CHECK-FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 381; 382; CHECK-NEXT: retq 383 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 384 ret <4 x float> %res 385} 386declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) 387 388define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 389; CHECK-LABEL: test_x86_fma_vfnmadd_pd: 390; CHECK-NEXT: # BB#0: 391; 392; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 393; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 394; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0 395; 396; CHECK-FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 397; 398; CHECK-FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 399; 400; CHECK-NEXT: retq 401 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 402 ret <2 x double> %res 403} 404declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) 405 406define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 407; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256: 408; CHECK-NEXT: # BB#0: 409; 410; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 411; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 412; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0 413; 414; CHECK-FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 415; 416; CHECK-FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 417; 418; CHECK-NEXT: retq 419 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 420 ret <8 x float> %res 421} 422declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 423 424define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 425; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256: 426; CHECK-NEXT: # BB#0: 427; 428; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 429; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 430; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0 431; 432; CHECK-FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 433; 434; CHECK-FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 435; 436; CHECK-NEXT: retq 437 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 438 ret <4 x double> %res 439} 440declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 441 442; VFNMSUB 443define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 444; CHECK-LABEL: test_x86_fma_vfnmsub_ss: 445; CHECK-NEXT: # BB#0: 446; 447; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 448; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 449; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0 450; 451; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 452; 453; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 454; 455; CHECK-NEXT: retq 456 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 457 ret <4 x float> %res 458} 459 460define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 461; CHECK-LABEL: test_x86_fma_vfnmsub_bac_ss: 462; CHECK-NEXT: # BB#0: 463; 464; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 465; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 466; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0 467; 468; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 469; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 470; 471; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm0, %xmm1, %xmm0 472; 473; CHECK-NEXT: retq 474 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) 475 ret <4 x float> %res 476} 477declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) 478 479define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 480; CHECK-LABEL: test_x86_fma_vfnmsub_sd: 481; CHECK-NEXT: # BB#0: 482; 483; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 484; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 485; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0 486; 487; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 488; 489; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 490; 491; CHECK-NEXT: retq 492 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 493 ret <2 x double> %res 494} 495 496define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 497; CHECK-LABEL: test_x86_fma_vfnmsub_bac_sd: 498; CHECK-NEXT: # BB#0: 499; 500; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 501; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 502; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0 503; 504; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 505; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 506; 507; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm0, %xmm1, %xmm0 508; 509; CHECK-NEXT: retq 510 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) 511 ret <2 x double> %res 512} 513declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) 514 515define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 516; CHECK-LABEL: test_x86_fma_vfnmsub_ps: 517; CHECK-NEXT: # BB#0: 518; 519; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 520; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 521; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0 522; 523; CHECK-FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 524; 525; CHECK-FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 526; 527; CHECK-NEXT: retq 528 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 529 ret <4 x float> %res 530} 531declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) 532 533define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 534; CHECK-LABEL: test_x86_fma_vfnmsub_pd: 535; CHECK-NEXT: # BB#0: 536; 537; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 538; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 539; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0 540; 541; CHECK-FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 542; 543; CHECK-FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 544; 545; CHECK-NEXT: retq 546 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 547 ret <2 x double> %res 548} 549declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) 550 551define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 552; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256: 553; CHECK-NEXT: # BB#0: 554; 555; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 556; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 557; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0 558; 559; CHECK-FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 560; 561; CHECK-FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 562; 563; CHECK-NEXT: retq 564 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 565 ret <8 x float> %res 566} 567declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 568 569define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 570; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256: 571; CHECK-NEXT: # BB#0: 572; 573; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 574; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 575; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0 576; 577; CHECK-FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 578; 579; CHECK-FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 580; 581; CHECK-NEXT: retq 582 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 583 ret <4 x double> %res 584} 585declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 586 587; VFMADDSUB 588define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 589; CHECK-LABEL: test_x86_fma_vfmaddsub_ps: 590; CHECK-NEXT: # BB#0: 591; 592; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 593; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 594; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0 595; 596; CHECK-FMA-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 597; 598; CHECK-FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 599; 600; CHECK-NEXT: retq 601 %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 602 ret <4 x float> %res 603} 604declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>) 605 606define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 607; CHECK-LABEL: test_x86_fma_vfmaddsub_pd: 608; CHECK-NEXT: # BB#0: 609; 610; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 611; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 612; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0 613; 614; CHECK-FMA-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 615; 616; CHECK-FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 617; 618; CHECK-NEXT: retq 619 %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 620 ret <2 x double> %res 621} 622declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>) 623 624define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 625; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256: 626; CHECK-NEXT: # BB#0: 627; 628; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 629; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 630; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0 631; 632; CHECK-FMA-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 633; 634; CHECK-FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 635; 636; CHECK-NEXT: retq 637 %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 638 ret <8 x float> %res 639} 640declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 641 642define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 643; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256: 644; CHECK-NEXT: # BB#0: 645; 646; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 647; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 648; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0 649; 650; CHECK-FMA-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 651; 652; CHECK-FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 653; 654; CHECK-NEXT: retq 655 %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 656 ret <4 x double> %res 657} 658declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 659 660; VFMSUBADD 661define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 662; CHECK-LABEL: test_x86_fma_vfmsubadd_ps: 663; CHECK-NEXT: # BB#0: 664; 665; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 666; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 667; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0 668; 669; CHECK-FMA-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 670; 671; CHECK-FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 672; 673; CHECK-NEXT: retq 674 %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 675 ret <4 x float> %res 676} 677declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>) 678 679define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 680; CHECK-LABEL: test_x86_fma_vfmsubadd_pd: 681; CHECK-NEXT: # BB#0: 682; 683; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 684; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 685; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0 686; 687; CHECK-FMA-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 688; 689; CHECK-FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 690; 691; CHECK-NEXT: retq 692 %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 693 ret <2 x double> %res 694} 695declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>) 696 697define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 698; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256: 699; CHECK-NEXT: # BB#0: 700; 701; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 702; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 703; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0 704; 705; CHECK-FMA-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 706; 707; CHECK-FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 708; 709; CHECK-NEXT: retq 710 %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 711 ret <8 x float> %res 712} 713declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 714 715define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 716; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256: 717; CHECK-NEXT: # BB#0: 718; 719; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 720; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 721; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0 722; 723; CHECK-FMA-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 724; 725; CHECK-FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 726; 727; CHECK-NEXT: retq 728 %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 729 ret <4 x double> %res 730} 731declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 732 733attributes #0 = { nounwind } 734