1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 6 7; 8; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) 9; 10 11define float @test_f32_fmadd(float %a0, float %a1, float %a2) { 12; FMA-LABEL: test_f32_fmadd: 13; FMA: # BB#0: 14; FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 15; FMA-NEXT: retq 16; 17; FMA4-LABEL: test_f32_fmadd: 18; FMA4: # BB#0: 19; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 20; FMA4-NEXT: retq 21; 22; AVX512-LABEL: test_f32_fmadd: 23; AVX512: # BB#0: 24; AVX512-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 25; AVX512-NEXT: vmovaps %zmm1, %zmm0 26; AVX512-NEXT: retq 27 %x = fmul float %a0, %a1 28 %res = fadd float %x, %a2 29 ret float %res 30} 31 32define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 33; FMA-LABEL: test_4f32_fmadd: 34; FMA: # BB#0: 35; FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 36; FMA-NEXT: retq 37; 38; FMA4-LABEL: test_4f32_fmadd: 39; FMA4: # BB#0: 40; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 41; FMA4-NEXT: retq 42; 43; AVX512-LABEL: test_4f32_fmadd: 44; AVX512: # BB#0: 45; AVX512-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 46; AVX512-NEXT: retq 47 %x = fmul <4 x float> %a0, %a1 48 %res = fadd <4 x float> %x, %a2 49 ret <4 x float> %res 50} 51 52define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 53; FMA-LABEL: test_8f32_fmadd: 54; FMA: # BB#0: 55; FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 56; FMA-NEXT: retq 57; 58; FMA4-LABEL: test_8f32_fmadd: 59; FMA4: # BB#0: 60; FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 61; FMA4-NEXT: retq 62; 63; AVX512-LABEL: test_8f32_fmadd: 64; AVX512: # BB#0: 65; AVX512-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 66; AVX512-NEXT: retq 67 %x = fmul <8 x float> %a0, %a1 68 %res = fadd <8 x float> %x, %a2 69 ret <8 x float> %res 70} 71 72define double @test_f64_fmadd(double %a0, double %a1, double %a2) { 73; FMA-LABEL: test_f64_fmadd: 74; FMA: # BB#0: 75; FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 76; FMA-NEXT: retq 77; 78; FMA4-LABEL: test_f64_fmadd: 79; FMA4: # BB#0: 80; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 81; FMA4-NEXT: retq 82; 83; AVX512-LABEL: test_f64_fmadd: 84; AVX512: # BB#0: 85; AVX512-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 86; AVX512-NEXT: vmovaps %zmm1, %zmm0 87; AVX512-NEXT: retq 88 %x = fmul double %a0, %a1 89 %res = fadd double %x, %a2 90 ret double %res 91} 92 93define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 94; FMA-LABEL: test_2f64_fmadd: 95; FMA: # BB#0: 96; FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 97; FMA-NEXT: retq 98; 99; FMA4-LABEL: test_2f64_fmadd: 100; FMA4: # BB#0: 101; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 102; FMA4-NEXT: retq 103; 104; AVX512-LABEL: test_2f64_fmadd: 105; AVX512: # BB#0: 106; AVX512-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 107; AVX512-NEXT: retq 108 %x = fmul <2 x double> %a0, %a1 109 %res = fadd <2 x double> %x, %a2 110 ret <2 x double> %res 111} 112 113define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 114; FMA-LABEL: test_4f64_fmadd: 115; FMA: # BB#0: 116; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 117; FMA-NEXT: retq 118; 119; FMA4-LABEL: test_4f64_fmadd: 120; FMA4: # BB#0: 121; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 122; FMA4-NEXT: retq 123; 124; AVX512-LABEL: test_4f64_fmadd: 125; AVX512: # BB#0: 126; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 127; AVX512-NEXT: retq 128 %x = fmul <4 x double> %a0, %a1 129 %res = fadd <4 x double> %x, %a2 130 ret <4 x double> %res 131} 132 133; 134; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z) 135; 136 137define float @test_f32_fmsub(float %a0, float %a1, float %a2) { 138; FMA-LABEL: test_f32_fmsub: 139; FMA: # BB#0: 140; FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 141; FMA-NEXT: retq 142; 143; FMA4-LABEL: test_f32_fmsub: 144; FMA4: # BB#0: 145; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 146; FMA4-NEXT: retq 147; 148; AVX512-LABEL: test_f32_fmsub: 149; AVX512: # BB#0: 150; AVX512-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 151; AVX512-NEXT: vmovaps %zmm1, %zmm0 152; AVX512-NEXT: retq 153 %x = fmul float %a0, %a1 154 %res = fsub float %x, %a2 155 ret float %res 156} 157 158define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 159; FMA-LABEL: test_4f32_fmsub: 160; FMA: # BB#0: 161; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 162; FMA-NEXT: retq 163; 164; FMA4-LABEL: test_4f32_fmsub: 165; FMA4: # BB#0: 166; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 167; FMA4-NEXT: retq 168; 169; AVX512-LABEL: test_4f32_fmsub: 170; AVX512: # BB#0: 171; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 172; AVX512-NEXT: retq 173 %x = fmul <4 x float> %a0, %a1 174 %res = fsub <4 x float> %x, %a2 175 ret <4 x float> %res 176} 177 178define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 179; FMA-LABEL: test_8f32_fmsub: 180; FMA: # BB#0: 181; FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 182; FMA-NEXT: retq 183; 184; FMA4-LABEL: test_8f32_fmsub: 185; FMA4: # BB#0: 186; FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 187; FMA4-NEXT: retq 188; 189; AVX512-LABEL: test_8f32_fmsub: 190; AVX512: # BB#0: 191; AVX512-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 192; AVX512-NEXT: retq 193 %x = fmul <8 x float> %a0, %a1 194 %res = fsub <8 x float> %x, %a2 195 ret <8 x float> %res 196} 197 198define double @test_f64_fmsub(double %a0, double %a1, double %a2) { 199; FMA-LABEL: test_f64_fmsub: 200; FMA: # BB#0: 201; FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 202; FMA-NEXT: retq 203; 204; FMA4-LABEL: test_f64_fmsub: 205; FMA4: # BB#0: 206; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 207; FMA4-NEXT: retq 208; 209; AVX512-LABEL: test_f64_fmsub: 210; AVX512: # BB#0: 211; AVX512-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 212; AVX512-NEXT: vmovaps %zmm1, %zmm0 213; AVX512-NEXT: retq 214 %x = fmul double %a0, %a1 215 %res = fsub double %x, %a2 216 ret double %res 217} 218 219define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 220; FMA-LABEL: test_2f64_fmsub: 221; FMA: # BB#0: 222; FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 223; FMA-NEXT: retq 224; 225; FMA4-LABEL: test_2f64_fmsub: 226; FMA4: # BB#0: 227; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 228; FMA4-NEXT: retq 229; 230; AVX512-LABEL: test_2f64_fmsub: 231; AVX512: # BB#0: 232; AVX512-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 233; AVX512-NEXT: retq 234 %x = fmul <2 x double> %a0, %a1 235 %res = fsub <2 x double> %x, %a2 236 ret <2 x double> %res 237} 238 239define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 240; FMA-LABEL: test_4f64_fmsub: 241; FMA: # BB#0: 242; FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 243; FMA-NEXT: retq 244; 245; FMA4-LABEL: test_4f64_fmsub: 246; FMA4: # BB#0: 247; FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 248; FMA4-NEXT: retq 249; 250; AVX512-LABEL: test_4f64_fmsub: 251; AVX512: # BB#0: 252; AVX512-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 253; AVX512-NEXT: retq 254 %x = fmul <4 x double> %a0, %a1 255 %res = fsub <4 x double> %x, %a2 256 ret <4 x double> %res 257} 258 259; 260; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z) 261; 262 263define float @test_f32_fnmadd(float %a0, float %a1, float %a2) { 264; FMA-LABEL: test_f32_fnmadd: 265; FMA: # BB#0: 266; FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 267; FMA-NEXT: retq 268; 269; FMA4-LABEL: test_f32_fnmadd: 270; FMA4: # BB#0: 271; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 272; FMA4-NEXT: retq 273; 274; AVX512-LABEL: test_f32_fnmadd: 275; AVX512: # BB#0: 276; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 277; AVX512-NEXT: vmovaps %zmm1, %zmm0 278; AVX512-NEXT: retq 279 %x = fmul float %a0, %a1 280 %res = fsub float %a2, %x 281 ret float %res 282} 283 284define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 285; FMA-LABEL: test_4f32_fnmadd: 286; FMA: # BB#0: 287; FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 288; FMA-NEXT: retq 289; 290; FMA4-LABEL: test_4f32_fnmadd: 291; FMA4: # BB#0: 292; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 293; FMA4-NEXT: retq 294; 295; AVX512-LABEL: test_4f32_fnmadd: 296; AVX512: # BB#0: 297; AVX512-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 298; AVX512-NEXT: retq 299 %x = fmul <4 x float> %a0, %a1 300 %res = fsub <4 x float> %a2, %x 301 ret <4 x float> %res 302} 303 304define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 305; FMA-LABEL: test_8f32_fnmadd: 306; FMA: # BB#0: 307; FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 308; FMA-NEXT: retq 309; 310; FMA4-LABEL: test_8f32_fnmadd: 311; FMA4: # BB#0: 312; FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 313; FMA4-NEXT: retq 314; 315; AVX512-LABEL: test_8f32_fnmadd: 316; AVX512: # BB#0: 317; AVX512-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 318; AVX512-NEXT: retq 319 %x = fmul <8 x float> %a0, %a1 320 %res = fsub <8 x float> %a2, %x 321 ret <8 x float> %res 322} 323 324define double @test_f64_fnmadd(double %a0, double %a1, double %a2) { 325; FMA-LABEL: test_f64_fnmadd: 326; FMA: # BB#0: 327; FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 328; FMA-NEXT: retq 329; 330; FMA4-LABEL: test_f64_fnmadd: 331; FMA4: # BB#0: 332; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 333; FMA4-NEXT: retq 334; 335; AVX512-LABEL: test_f64_fnmadd: 336; AVX512: # BB#0: 337; AVX512-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 338; AVX512-NEXT: vmovaps %zmm1, %zmm0 339; AVX512-NEXT: retq 340 %x = fmul double %a0, %a1 341 %res = fsub double %a2, %x 342 ret double %res 343} 344 345define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 346; FMA-LABEL: test_2f64_fnmadd: 347; FMA: # BB#0: 348; FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 349; FMA-NEXT: retq 350; 351; FMA4-LABEL: test_2f64_fnmadd: 352; FMA4: # BB#0: 353; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 354; FMA4-NEXT: retq 355; 356; AVX512-LABEL: test_2f64_fnmadd: 357; AVX512: # BB#0: 358; AVX512-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 359; AVX512-NEXT: retq 360 %x = fmul <2 x double> %a0, %a1 361 %res = fsub <2 x double> %a2, %x 362 ret <2 x double> %res 363} 364 365define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 366; FMA-LABEL: test_4f64_fnmadd: 367; FMA: # BB#0: 368; FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 369; FMA-NEXT: retq 370; 371; FMA4-LABEL: test_4f64_fnmadd: 372; FMA4: # BB#0: 373; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 374; FMA4-NEXT: retq 375; 376; AVX512-LABEL: test_4f64_fnmadd: 377; AVX512: # BB#0: 378; AVX512-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 379; AVX512-NEXT: retq 380 %x = fmul <4 x double> %a0, %a1 381 %res = fsub <4 x double> %a2, %x 382 ret <4 x double> %res 383} 384 385; 386; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z) 387; 388 389define float @test_f32_fnmsub(float %a0, float %a1, float %a2) { 390; FMA-LABEL: test_f32_fnmsub: 391; FMA: # BB#0: 392; FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 393; FMA-NEXT: retq 394; 395; FMA4-LABEL: test_f32_fnmsub: 396; FMA4: # BB#0: 397; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 398; FMA4-NEXT: retq 399; 400; AVX512-LABEL: test_f32_fnmsub: 401; AVX512: # BB#0: 402; AVX512-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 403; AVX512-NEXT: vmovaps %zmm1, %zmm0 404; AVX512-NEXT: retq 405 %x = fmul float %a0, %a1 406 %y = fsub float -0.000000e+00, %x 407 %res = fsub float %y, %a2 408 ret float %res 409} 410 411define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 412; FMA-LABEL: test_4f32_fnmsub: 413; FMA: # BB#0: 414; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 415; FMA-NEXT: retq 416; 417; FMA4-LABEL: test_4f32_fnmsub: 418; FMA4: # BB#0: 419; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 420; FMA4-NEXT: retq 421; 422; AVX512-LABEL: test_4f32_fnmsub: 423; AVX512: # BB#0: 424; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 425; AVX512-NEXT: retq 426 %x = fmul <4 x float> %a0, %a1 427 %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 428 %res = fsub <4 x float> %y, %a2 429 ret <4 x float> %res 430} 431 432define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 433; FMA-LABEL: test_8f32_fnmsub: 434; FMA: # BB#0: 435; FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 436; FMA-NEXT: retq 437; 438; FMA4-LABEL: test_8f32_fnmsub: 439; FMA4: # BB#0: 440; FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 441; FMA4-NEXT: retq 442; 443; AVX512-LABEL: test_8f32_fnmsub: 444; AVX512: # BB#0: 445; AVX512-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 446; AVX512-NEXT: retq 447 %x = fmul <8 x float> %a0, %a1 448 %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 449 %res = fsub <8 x float> %y, %a2 450 ret <8 x float> %res 451} 452 453define double @test_f64_fnmsub(double %a0, double %a1, double %a2) { 454; FMA-LABEL: test_f64_fnmsub: 455; FMA: # BB#0: 456; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 457; FMA-NEXT: retq 458; 459; FMA4-LABEL: test_f64_fnmsub: 460; FMA4: # BB#0: 461; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 462; FMA4-NEXT: retq 463; 464; AVX512-LABEL: test_f64_fnmsub: 465; AVX512: # BB#0: 466; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 467; AVX512-NEXT: vmovaps %zmm1, %zmm0 468; AVX512-NEXT: retq 469 %x = fmul double %a0, %a1 470 %y = fsub double -0.000000e+00, %x 471 %res = fsub double %y, %a2 472 ret double %res 473} 474 475define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 476; FMA-LABEL: test_2f64_fnmsub: 477; FMA: # BB#0: 478; FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 479; FMA-NEXT: retq 480; 481; FMA4-LABEL: test_2f64_fnmsub: 482; FMA4: # BB#0: 483; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 484; FMA4-NEXT: retq 485; 486; AVX512-LABEL: test_2f64_fnmsub: 487; AVX512: # BB#0: 488; AVX512-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 489; AVX512-NEXT: retq 490 %x = fmul <2 x double> %a0, %a1 491 %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x 492 %res = fsub <2 x double> %y, %a2 493 ret <2 x double> %res 494} 495 496define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 497; FMA-LABEL: test_4f64_fnmsub: 498; FMA: # BB#0: 499; FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 500; FMA-NEXT: retq 501; 502; FMA4-LABEL: test_4f64_fnmsub: 503; FMA4: # BB#0: 504; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 505; FMA4-NEXT: retq 506; 507; AVX512-LABEL: test_4f64_fnmsub: 508; AVX512: # BB#0: 509; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 510; AVX512-NEXT: retq 511 %x = fmul <4 x double> %a0, %a1 512 %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x 513 %res = fsub <4 x double> %y, %a2 514 ret <4 x double> %res 515} 516 517; 518; Load Folding Patterns 519; 520 521define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) { 522; FMA-LABEL: test_4f32_fmadd_load: 523; FMA: # BB#0: 524; FMA-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 525; FMA-NEXT: retq 526; 527; FMA4-LABEL: test_4f32_fmadd_load: 528; FMA4: # BB#0: 529; FMA4-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 530; FMA4-NEXT: retq 531; 532; AVX512-LABEL: test_4f32_fmadd_load: 533; AVX512: # BB#0: 534; AVX512-NEXT: vmovaps (%rdi), %xmm2 535; AVX512-NEXT: vfmadd213ps %xmm1, %xmm0, %xmm2 536; AVX512-NEXT: vmovaps %zmm2, %zmm0 537; AVX512-NEXT: retq 538 %x = load <4 x float>, <4 x float>* %a0 539 %y = fmul <4 x float> %x, %a1 540 %res = fadd <4 x float> %y, %a2 541 ret <4 x float> %res 542} 543 544define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <2 x double> %a2) { 545; FMA-LABEL: test_2f64_fmsub_load: 546; FMA: # BB#0: 547; FMA-NEXT: vfmsub132pd (%rdi), %xmm1, %xmm0 548; FMA-NEXT: retq 549; 550; FMA4-LABEL: test_2f64_fmsub_load: 551; FMA4: # BB#0: 552; FMA4-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 553; FMA4-NEXT: retq 554; 555; AVX512-LABEL: test_2f64_fmsub_load: 556; AVX512: # BB#0: 557; AVX512-NEXT: vmovapd (%rdi), %xmm2 558; AVX512-NEXT: vfmsub213pd %xmm1, %xmm0, %xmm2 559; AVX512-NEXT: vmovaps %zmm2, %zmm0 560; AVX512-NEXT: retq 561 %x = load <2 x double>, <2 x double>* %a0 562 %y = fmul <2 x double> %x, %a1 563 %res = fsub <2 x double> %y, %a2 564 ret <2 x double> %res 565} 566 567; 568; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) 569; 570 571define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) { 572; FMA-LABEL: test_v4f32_mul_add_x_one_y: 573; FMA: # BB#0: 574; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 575; FMA-NEXT: retq 576; 577; FMA4-LABEL: test_v4f32_mul_add_x_one_y: 578; FMA4: # BB#0: 579; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 580; FMA4-NEXT: retq 581; 582; AVX512-LABEL: test_v4f32_mul_add_x_one_y: 583; AVX512: # BB#0: 584; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 585; AVX512-NEXT: retq 586 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 587 %m = fmul <4 x float> %a, %y 588 ret <4 x float> %m 589} 590 591define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) { 592; FMA-LABEL: test_v4f32_mul_y_add_x_one: 593; FMA: # BB#0: 594; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 595; FMA-NEXT: retq 596; 597; FMA4-LABEL: test_v4f32_mul_y_add_x_one: 598; FMA4: # BB#0: 599; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 600; FMA4-NEXT: retq 601; 602; AVX512-LABEL: test_v4f32_mul_y_add_x_one: 603; AVX512: # BB#0: 604; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 605; AVX512-NEXT: retq 606 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 607 %m = fmul <4 x float> %y, %a 608 ret <4 x float> %m 609} 610 611define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) { 612; FMA-LABEL: test_v4f32_mul_add_x_negone_y: 613; FMA: # BB#0: 614; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 615; FMA-NEXT: retq 616; 617; FMA4-LABEL: test_v4f32_mul_add_x_negone_y: 618; FMA4: # BB#0: 619; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 620; FMA4-NEXT: retq 621; 622; AVX512-LABEL: test_v4f32_mul_add_x_negone_y: 623; AVX512: # BB#0: 624; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 625; AVX512-NEXT: retq 626 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 627 %m = fmul <4 x float> %a, %y 628 ret <4 x float> %m 629} 630 631define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) { 632; FMA-LABEL: test_v4f32_mul_y_add_x_negone: 633; FMA: # BB#0: 634; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 635; FMA-NEXT: retq 636; 637; FMA4-LABEL: test_v4f32_mul_y_add_x_negone: 638; FMA4: # BB#0: 639; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 640; FMA4-NEXT: retq 641; 642; AVX512-LABEL: test_v4f32_mul_y_add_x_negone: 643; AVX512: # BB#0: 644; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 645; AVX512-NEXT: retq 646 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 647 %m = fmul <4 x float> %y, %a 648 ret <4 x float> %m 649} 650 651define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) { 652; FMA-LABEL: test_v4f32_mul_sub_one_x_y: 653; FMA: # BB#0: 654; FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 655; FMA-NEXT: retq 656; 657; FMA4-LABEL: test_v4f32_mul_sub_one_x_y: 658; FMA4: # BB#0: 659; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 660; FMA4-NEXT: retq 661; 662; AVX512-LABEL: test_v4f32_mul_sub_one_x_y: 663; AVX512: # BB#0: 664; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 665; AVX512-NEXT: retq 666 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 667 %m = fmul <4 x float> %s, %y 668 ret <4 x float> %m 669} 670 671define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) { 672; FMA-LABEL: test_v4f32_mul_y_sub_one_x: 673; FMA: # BB#0: 674; FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 675; FMA-NEXT: retq 676; 677; FMA4-LABEL: test_v4f32_mul_y_sub_one_x: 678; FMA4: # BB#0: 679; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 680; FMA4-NEXT: retq 681; 682; AVX512-LABEL: test_v4f32_mul_y_sub_one_x: 683; AVX512: # BB#0: 684; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 685; AVX512-NEXT: retq 686 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 687 %m = fmul <4 x float> %y, %s 688 ret <4 x float> %m 689} 690 691define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) { 692; FMA-LABEL: test_v4f32_mul_sub_negone_x_y: 693; FMA: # BB#0: 694; FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 695; FMA-NEXT: retq 696; 697; FMA4-LABEL: test_v4f32_mul_sub_negone_x_y: 698; FMA4: # BB#0: 699; FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 700; FMA4-NEXT: retq 701; 702; AVX512-LABEL: test_v4f32_mul_sub_negone_x_y: 703; AVX512: # BB#0: 704; AVX512-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 705; AVX512-NEXT: retq 706 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 707 %m = fmul <4 x float> %s, %y 708 ret <4 x float> %m 709} 710 711define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) { 712; FMA-LABEL: test_v4f32_mul_y_sub_negone_x: 713; FMA: # BB#0: 714; FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 715; FMA-NEXT: retq 716; 717; FMA4-LABEL: test_v4f32_mul_y_sub_negone_x: 718; FMA4: # BB#0: 719; FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 720; FMA4-NEXT: retq 721; 722; AVX512-LABEL: test_v4f32_mul_y_sub_negone_x: 723; AVX512: # BB#0: 724; AVX512-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 725; AVX512-NEXT: retq 726 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 727 %m = fmul <4 x float> %y, %s 728 ret <4 x float> %m 729} 730 731define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { 732; FMA-LABEL: test_v4f32_mul_sub_x_one_y: 733; FMA: # BB#0: 734; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 735; FMA-NEXT: retq 736; 737; FMA4-LABEL: test_v4f32_mul_sub_x_one_y: 738; FMA4: # BB#0: 739; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 740; FMA4-NEXT: retq 741; 742; AVX512-LABEL: test_v4f32_mul_sub_x_one_y: 743; AVX512: # BB#0: 744; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 745; AVX512-NEXT: retq 746 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 747 %m = fmul <4 x float> %s, %y 748 ret <4 x float> %m 749} 750 751define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { 752; FMA-LABEL: test_v4f32_mul_y_sub_x_one: 753; FMA: # BB#0: 754; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 755; FMA-NEXT: retq 756; 757; FMA4-LABEL: test_v4f32_mul_y_sub_x_one: 758; FMA4: # BB#0: 759; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 760; FMA4-NEXT: retq 761; 762; AVX512-LABEL: test_v4f32_mul_y_sub_x_one: 763; AVX512: # BB#0: 764; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 765; AVX512-NEXT: retq 766 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 767 %m = fmul <4 x float> %y, %s 768 ret <4 x float> %m 769} 770 771define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) { 772; FMA-LABEL: test_v4f32_mul_sub_x_negone_y: 773; FMA: # BB#0: 774; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 775; FMA-NEXT: retq 776; 777; FMA4-LABEL: test_v4f32_mul_sub_x_negone_y: 778; FMA4: # BB#0: 779; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 780; FMA4-NEXT: retq 781; 782; AVX512-LABEL: test_v4f32_mul_sub_x_negone_y: 783; AVX512: # BB#0: 784; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 785; AVX512-NEXT: retq 786 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 787 %m = fmul <4 x float> %s, %y 788 ret <4 x float> %m 789} 790 791define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) { 792; FMA-LABEL: test_v4f32_mul_y_sub_x_negone: 793; FMA: # BB#0: 794; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 795; FMA-NEXT: retq 796; 797; FMA4-LABEL: test_v4f32_mul_y_sub_x_negone: 798; FMA4: # BB#0: 799; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 800; FMA4-NEXT: retq 801; 802; AVX512-LABEL: test_v4f32_mul_y_sub_x_negone: 803; AVX512: # BB#0: 804; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 805; AVX512-NEXT: retq 806 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 807 %m = fmul <4 x float> %y, %s 808 ret <4 x float> %m 809} 810 811; 812; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) 813; 814 815define float @test_f32_interp(float %x, float %y, float %t) { 816; FMA-LABEL: test_f32_interp: 817; FMA: # BB#0: 818; FMA-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 819; FMA-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 820; FMA-NEXT: retq 821; 822; FMA4-LABEL: test_f32_interp: 823; FMA4: # BB#0: 824; FMA4-NEXT: vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1 825; FMA4-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0 826; FMA4-NEXT: retq 827; 828; AVX512-LABEL: test_f32_interp: 829; AVX512: # BB#0: 830; AVX512-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 831; AVX512-NEXT: vfmadd213ss %xmm1, %xmm0, %xmm2 832; AVX512-NEXT: vmovaps %zmm2, %zmm0 833; AVX512-NEXT: retq 834 %t1 = fsub float 1.0, %t 835 %tx = fmul float %x, %t 836 %ty = fmul float %y, %t1 837 %r = fadd float %tx, %ty 838 ret float %r 839} 840 841define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) { 842; FMA-LABEL: test_v4f32_interp: 843; FMA: # BB#0: 844; FMA-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1 845; FMA-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 846; FMA-NEXT: retq 847; 848; FMA4-LABEL: test_v4f32_interp: 849; FMA4: # BB#0: 850; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1 851; FMA4-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0 852; FMA4-NEXT: retq 853; 854; AVX512-LABEL: test_v4f32_interp: 855; AVX512: # BB#0: 856; AVX512-NEXT: vmovaps %zmm2, %zmm3 857; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm3 858; AVX512-NEXT: vfmadd213ps %xmm3, %xmm2, %xmm0 859; AVX512-NEXT: retq 860 %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t 861 %tx = fmul <4 x float> %x, %t 862 %ty = fmul <4 x float> %y, %t1 863 %r = fadd <4 x float> %tx, %ty 864 ret <4 x float> %r 865} 866 867define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) { 868; FMA-LABEL: test_v8f32_interp: 869; FMA: # BB#0: 870; FMA-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1 871; FMA-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 872; FMA-NEXT: retq 873; 874; FMA4-LABEL: test_v8f32_interp: 875; FMA4: # BB#0: 876; FMA4-NEXT: vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1 877; FMA4-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0 878; FMA4-NEXT: retq 879; 880; AVX512-LABEL: test_v8f32_interp: 881; AVX512: # BB#0: 882; AVX512-NEXT: vmovaps %zmm2, %zmm3 883; AVX512-NEXT: vfnmadd213ps %ymm1, %ymm1, %ymm3 884; AVX512-NEXT: vfmadd213ps %ymm3, %ymm2, %ymm0 885; AVX512-NEXT: retq 886 %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t 887 %tx = fmul <8 x float> %x, %t 888 %ty = fmul <8 x float> %y, %t1 889 %r = fadd <8 x float> %tx, %ty 890 ret <8 x float> %r 891} 892 893define double @test_f64_interp(double %x, double %y, double %t) { 894; FMA-LABEL: test_f64_interp: 895; FMA: # BB#0: 896; FMA-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 897; FMA-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 898; FMA-NEXT: retq 899; 900; FMA4-LABEL: test_f64_interp: 901; FMA4: # BB#0: 902; FMA4-NEXT: vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1 903; FMA4-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0 904; FMA4-NEXT: retq 905; 906; AVX512-LABEL: test_f64_interp: 907; AVX512: # BB#0: 908; AVX512-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 909; AVX512-NEXT: vfmadd213sd %xmm1, %xmm0, %xmm2 910; AVX512-NEXT: vmovaps %zmm2, %zmm0 911; AVX512-NEXT: retq 912 %t1 = fsub double 1.0, %t 913 %tx = fmul double %x, %t 914 %ty = fmul double %y, %t1 915 %r = fadd double %tx, %ty 916 ret double %r 917} 918 919define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) { 920; FMA-LABEL: test_v2f64_interp: 921; FMA: # BB#0: 922; FMA-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1 923; FMA-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 924; FMA-NEXT: retq 925; 926; FMA4-LABEL: test_v2f64_interp: 927; FMA4: # BB#0: 928; FMA4-NEXT: vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1 929; FMA4-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0 930; FMA4-NEXT: retq 931; 932; AVX512-LABEL: test_v2f64_interp: 933; AVX512: # BB#0: 934; AVX512-NEXT: vmovaps %zmm2, %zmm3 935; AVX512-NEXT: vfnmadd213pd %xmm1, %xmm1, %xmm3 936; AVX512-NEXT: vfmadd213pd %xmm3, %xmm2, %xmm0 937; AVX512-NEXT: retq 938 %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t 939 %tx = fmul <2 x double> %x, %t 940 %ty = fmul <2 x double> %y, %t1 941 %r = fadd <2 x double> %tx, %ty 942 ret <2 x double> %r 943} 944 945define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) { 946; FMA-LABEL: test_v4f64_interp: 947; FMA: # BB#0: 948; FMA-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1 949; FMA-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 950; FMA-NEXT: retq 951; 952; FMA4-LABEL: test_v4f64_interp: 953; FMA4: # BB#0: 954; FMA4-NEXT: vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1 955; FMA4-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0 956; FMA4-NEXT: retq 957; 958; AVX512-LABEL: test_v4f64_interp: 959; AVX512: # BB#0: 960; AVX512-NEXT: vmovaps %zmm2, %zmm3 961; AVX512-NEXT: vfnmadd213pd %ymm1, %ymm1, %ymm3 962; AVX512-NEXT: vfmadd213pd %ymm3, %ymm2, %ymm0 963; AVX512-NEXT: retq 964 %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t 965 %tx = fmul <4 x double> %x, %t 966 %ty = fmul <4 x double> %y, %t1 967 %r = fadd <4 x double> %tx, %ty 968 ret <4 x double> %r 969} 970 971; 972; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z) 973; 974 975define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 976; FMA-LABEL: test_v4f32_fneg_fmadd: 977; FMA: # BB#0: 978; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 979; FMA-NEXT: retq 980; 981; FMA4-LABEL: test_v4f32_fneg_fmadd: 982; FMA4: # BB#0: 983; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 984; FMA4-NEXT: retq 985; 986; AVX512-LABEL: test_v4f32_fneg_fmadd: 987; AVX512: # BB#0: 988; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 989; AVX512-NEXT: retq 990 %mul = fmul <4 x float> %a0, %a1 991 %add = fadd <4 x float> %mul, %a2 992 %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 993 ret <4 x float> %neg 994} 995 996define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 997; FMA-LABEL: test_v4f64_fneg_fmsub: 998; FMA: # BB#0: 999; FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 1000; FMA-NEXT: retq 1001; 1002; FMA4-LABEL: test_v4f64_fneg_fmsub: 1003; FMA4: # BB#0: 1004; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 1005; FMA4-NEXT: retq 1006; 1007; AVX512-LABEL: test_v4f64_fneg_fmsub: 1008; AVX512: # BB#0: 1009; AVX512-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 1010; AVX512-NEXT: retq 1011 %mul = fmul <4 x double> %a0, %a1 1012 %sub = fsub <4 x double> %mul, %a2 1013 %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1014 ret <4 x double> %neg 1015} 1016 1017define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 1018; FMA-LABEL: test_v4f32_fneg_fnmadd: 1019; FMA: # BB#0: 1020; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 1021; FMA-NEXT: retq 1022; 1023; FMA4-LABEL: test_v4f32_fneg_fnmadd: 1024; FMA4: # BB#0: 1025; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 1026; FMA4-NEXT: retq 1027; 1028; AVX512-LABEL: test_v4f32_fneg_fnmadd: 1029; AVX512: # BB#0: 1030; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 1031; AVX512-NEXT: retq 1032 %mul = fmul <4 x float> %a0, %a1 1033 %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul 1034 %add = fadd <4 x float> %neg0, %a2 1035 %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 1036 ret <4 x float> %neg1 1037} 1038 1039define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1040; FMA-LABEL: test_v4f64_fneg_fnmsub: 1041; FMA: # BB#0: 1042; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 1043; FMA-NEXT: retq 1044; 1045; FMA4-LABEL: test_v4f64_fneg_fnmsub: 1046; FMA4: # BB#0: 1047; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 1048; FMA4-NEXT: retq 1049; 1050; AVX512-LABEL: test_v4f64_fneg_fnmsub: 1051; AVX512: # BB#0: 1052; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 1053; AVX512-NEXT: retq 1054 %mul = fmul <4 x double> %a0, %a1 1055 %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul 1056 %sub = fsub <4 x double> %neg0, %a2 1057 %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1058 ret <4 x double> %neg1 1059} 1060 1061; 1062; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 1063; 1064 1065define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 { 1066; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1067; FMA: # BB#0: 1068; FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 1069; FMA-NEXT: retq 1070; 1071; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1072; FMA4: # BB#0: 1073; FMA4-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 1074; FMA4-NEXT: retq 1075; 1076; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1077; AVX512: # BB#0: 1078; AVX512-NEXT: vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0 1079; AVX512-NEXT: retq 1080 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1081 %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0> 1082 %a = fadd <4 x float> %m0, %m1 1083 ret <4 x float> %a 1084} 1085 1086; 1087; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 1088; 1089 1090define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 { 1091; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1092; FMA: # BB#0: 1093; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0 1094; FMA-NEXT: retq 1095; 1096; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1097; FMA4: # BB#0: 1098; FMA4-NEXT: vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0 1099; FMA4-NEXT: retq 1100; 1101; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1102; AVX512: # BB#0: 1103; AVX512-NEXT: vfmadd231ps {{.*}}(%rip), %xmm0, %xmm1 1104; AVX512-NEXT: vmovaps %zmm1, %zmm0 1105; AVX512-NEXT: retq 1106 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1107 %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0> 1108 %a = fadd <4 x float> %m1, %y 1109 ret <4 x float> %a 1110} 1111 1112; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0) 1113 1114define double @test_f64_fneg_fmul(double %x, double %y) #0 { 1115; FMA-LABEL: test_f64_fneg_fmul: 1116; FMA: # BB#0: 1117; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1118; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 1119; FMA-NEXT: retq 1120; 1121; FMA4-LABEL: test_f64_fneg_fmul: 1122; FMA4: # BB#0: 1123; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1124; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 1125; FMA4-NEXT: retq 1126; 1127; AVX512-LABEL: test_f64_fneg_fmul: 1128; AVX512: # BB#0: 1129; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 1130; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 1131; AVX512-NEXT: vmovaps %zmm1, %zmm0 1132; AVX512-NEXT: retq 1133 %m = fmul nsz double %x, %y 1134 %n = fsub double -0.0, %m 1135 ret double %n 1136} 1137 1138define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 { 1139; FMA-LABEL: test_v4f32_fneg_fmul: 1140; FMA: # BB#0: 1141; FMA-NEXT: vxorps %xmm2, %xmm2, %xmm2 1142; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1143; FMA-NEXT: retq 1144; 1145; FMA4-LABEL: test_v4f32_fneg_fmul: 1146; FMA4: # BB#0: 1147; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2 1148; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 1149; FMA4-NEXT: retq 1150; 1151; AVX512-LABEL: test_v4f32_fneg_fmul: 1152; AVX512: # BB#0: 1153; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 1154; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1155; AVX512-NEXT: retq 1156 %m = fmul nsz <4 x float> %x, %y 1157 %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m 1158 ret <4 x float> %n 1159} 1160 1161define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 { 1162; FMA-LABEL: test_v4f64_fneg_fmul: 1163; FMA: # BB#0: 1164; FMA-NEXT: vxorpd %ymm2, %ymm2, %ymm2 1165; FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 1166; FMA-NEXT: retq 1167; 1168; FMA4-LABEL: test_v4f64_fneg_fmul: 1169; FMA4: # BB#0: 1170; FMA4-NEXT: vxorpd %ymm2, %ymm2, %ymm2 1171; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 1172; FMA4-NEXT: retq 1173; 1174; AVX512-LABEL: test_v4f64_fneg_fmul: 1175; AVX512: # BB#0: 1176; AVX512-NEXT: vxorps %ymm2, %ymm2, %ymm2 1177; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 1178; AVX512-NEXT: retq 1179 %m = fmul nsz <4 x double> %x, %y 1180 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1181 ret <4 x double> %n 1182} 1183 1184define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 { 1185; ALL-LABEL: test_v4f64_fneg_fmul_no_nsz: 1186; ALL: # BB#0: 1187; ALL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1188; ALL-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 1189; ALL-NEXT: retq 1190 %m = fmul <4 x double> %x, %y 1191 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1192 ret <4 x double> %n 1193} 1194 1195attributes #0 = { "unsafe-fp-math"="true" } 1196