1; NOTE: Assertions have been autogenerated by update_llc_test_checks.py 2; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 7 8; 9; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) 10; 11 12define float @test_f32_fmadd(float %a0, float %a1, float %a2) { 13; FMA-LABEL: test_f32_fmadd: 14; FMA: # BB#0: 15; FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 16; FMA-NEXT: retq 17; 18; FMA4-LABEL: test_f32_fmadd: 19; FMA4: # BB#0: 20; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 21; FMA4-NEXT: retq 22; 23; AVX512-LABEL: test_f32_fmadd: 24; AVX512: # BB#0: 25; AVX512-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 26; AVX512-NEXT: vmovaps %zmm1, %zmm0 27; AVX512-NEXT: retq 28 %x = fmul float %a0, %a1 29 %res = fadd float %x, %a2 30 ret float %res 31} 32 33define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 34; FMA-LABEL: test_4f32_fmadd: 35; FMA: # BB#0: 36; FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 37; FMA-NEXT: retq 38; 39; FMA4-LABEL: test_4f32_fmadd: 40; FMA4: # BB#0: 41; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 42; FMA4-NEXT: retq 43; 44; AVX512-LABEL: test_4f32_fmadd: 45; AVX512: # BB#0: 46; AVX512-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 47; AVX512-NEXT: retq 48 %x = fmul <4 x float> %a0, %a1 49 %res = fadd <4 x float> %x, %a2 50 ret <4 x float> %res 51} 52 53define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 54; FMA-LABEL: test_8f32_fmadd: 55; FMA: # BB#0: 56; FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 57; FMA-NEXT: retq 58; 59; FMA4-LABEL: test_8f32_fmadd: 60; FMA4: # BB#0: 61; FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 62; FMA4-NEXT: retq 63; 64; AVX512-LABEL: test_8f32_fmadd: 65; AVX512: # BB#0: 66; AVX512-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 67; AVX512-NEXT: retq 68 %x = fmul <8 x float> %a0, %a1 69 %res = fadd <8 x float> %x, %a2 70 ret <8 x float> %res 71} 72 73define double @test_f64_fmadd(double %a0, double %a1, double %a2) { 74; FMA-LABEL: test_f64_fmadd: 75; FMA: # BB#0: 76; FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 77; FMA-NEXT: retq 78; 79; FMA4-LABEL: test_f64_fmadd: 80; FMA4: # BB#0: 81; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 82; FMA4-NEXT: retq 83; 84; AVX512-LABEL: test_f64_fmadd: 85; AVX512: # BB#0: 86; AVX512-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 87; AVX512-NEXT: vmovaps %zmm1, %zmm0 88; AVX512-NEXT: retq 89 %x = fmul double %a0, %a1 90 %res = fadd double %x, %a2 91 ret double %res 92} 93 94define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 95; FMA-LABEL: test_2f64_fmadd: 96; FMA: # BB#0: 97; FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 98; FMA-NEXT: retq 99; 100; FMA4-LABEL: test_2f64_fmadd: 101; FMA4: # BB#0: 102; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 103; FMA4-NEXT: retq 104; 105; AVX512-LABEL: test_2f64_fmadd: 106; AVX512: # BB#0: 107; AVX512-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 108; AVX512-NEXT: retq 109 %x = fmul <2 x double> %a0, %a1 110 %res = fadd <2 x double> %x, %a2 111 ret <2 x double> %res 112} 113 114define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 115; FMA-LABEL: test_4f64_fmadd: 116; FMA: # BB#0: 117; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 118; FMA-NEXT: retq 119; 120; FMA4-LABEL: test_4f64_fmadd: 121; FMA4: # BB#0: 122; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 123; FMA4-NEXT: retq 124; 125; AVX512-LABEL: test_4f64_fmadd: 126; AVX512: # BB#0: 127; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 128; AVX512-NEXT: retq 129 %x = fmul <4 x double> %a0, %a1 130 %res = fadd <4 x double> %x, %a2 131 ret <4 x double> %res 132} 133 134; 135; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z) 136; 137 138define float @test_f32_fmsub(float %a0, float %a1, float %a2) { 139; FMA-LABEL: test_f32_fmsub: 140; FMA: # BB#0: 141; FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 142; FMA-NEXT: retq 143; 144; FMA4-LABEL: test_f32_fmsub: 145; FMA4: # BB#0: 146; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 147; FMA4-NEXT: retq 148; 149; AVX512-LABEL: test_f32_fmsub: 150; AVX512: # BB#0: 151; AVX512-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 152; AVX512-NEXT: vmovaps %zmm1, %zmm0 153; AVX512-NEXT: retq 154 %x = fmul float %a0, %a1 155 %res = fsub float %x, %a2 156 ret float %res 157} 158 159define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 160; FMA-LABEL: test_4f32_fmsub: 161; FMA: # BB#0: 162; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 163; FMA-NEXT: retq 164; 165; FMA4-LABEL: test_4f32_fmsub: 166; FMA4: # BB#0: 167; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 168; FMA4-NEXT: retq 169; 170; AVX512-LABEL: test_4f32_fmsub: 171; AVX512: # BB#0: 172; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 173; AVX512-NEXT: retq 174 %x = fmul <4 x float> %a0, %a1 175 %res = fsub <4 x float> %x, %a2 176 ret <4 x float> %res 177} 178 179define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 180; FMA-LABEL: test_8f32_fmsub: 181; FMA: # BB#0: 182; FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 183; FMA-NEXT: retq 184; 185; FMA4-LABEL: test_8f32_fmsub: 186; FMA4: # BB#0: 187; FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 188; FMA4-NEXT: retq 189; 190; AVX512-LABEL: test_8f32_fmsub: 191; AVX512: # BB#0: 192; AVX512-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 193; AVX512-NEXT: retq 194 %x = fmul <8 x float> %a0, %a1 195 %res = fsub <8 x float> %x, %a2 196 ret <8 x float> %res 197} 198 199define double @test_f64_fmsub(double %a0, double %a1, double %a2) { 200; FMA-LABEL: test_f64_fmsub: 201; FMA: # BB#0: 202; FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 203; FMA-NEXT: retq 204; 205; FMA4-LABEL: test_f64_fmsub: 206; FMA4: # BB#0: 207; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 208; FMA4-NEXT: retq 209; 210; AVX512-LABEL: test_f64_fmsub: 211; AVX512: # BB#0: 212; AVX512-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 213; AVX512-NEXT: vmovaps %zmm1, %zmm0 214; AVX512-NEXT: retq 215 %x = fmul double %a0, %a1 216 %res = fsub double %x, %a2 217 ret double %res 218} 219 220define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 221; FMA-LABEL: test_2f64_fmsub: 222; FMA: # BB#0: 223; FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 224; FMA-NEXT: retq 225; 226; FMA4-LABEL: test_2f64_fmsub: 227; FMA4: # BB#0: 228; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 229; FMA4-NEXT: retq 230; 231; AVX512-LABEL: test_2f64_fmsub: 232; AVX512: # BB#0: 233; AVX512-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 234; AVX512-NEXT: retq 235 %x = fmul <2 x double> %a0, %a1 236 %res = fsub <2 x double> %x, %a2 237 ret <2 x double> %res 238} 239 240define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 241; FMA-LABEL: test_4f64_fmsub: 242; FMA: # BB#0: 243; FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 244; FMA-NEXT: retq 245; 246; FMA4-LABEL: test_4f64_fmsub: 247; FMA4: # BB#0: 248; FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 249; FMA4-NEXT: retq 250; 251; AVX512-LABEL: test_4f64_fmsub: 252; AVX512: # BB#0: 253; AVX512-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 254; AVX512-NEXT: retq 255 %x = fmul <4 x double> %a0, %a1 256 %res = fsub <4 x double> %x, %a2 257 ret <4 x double> %res 258} 259 260; 261; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z) 262; 263 264define float @test_f32_fnmadd(float %a0, float %a1, float %a2) { 265; FMA-LABEL: test_f32_fnmadd: 266; FMA: # BB#0: 267; FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 268; FMA-NEXT: retq 269; 270; FMA4-LABEL: test_f32_fnmadd: 271; FMA4: # BB#0: 272; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 273; FMA4-NEXT: retq 274; 275; AVX512-LABEL: test_f32_fnmadd: 276; AVX512: # BB#0: 277; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 278; AVX512-NEXT: vmovaps %zmm1, %zmm0 279; AVX512-NEXT: retq 280 %x = fmul float %a0, %a1 281 %res = fsub float %a2, %x 282 ret float %res 283} 284 285define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 286; FMA-LABEL: test_4f32_fnmadd: 287; FMA: # BB#0: 288; FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 289; FMA-NEXT: retq 290; 291; FMA4-LABEL: test_4f32_fnmadd: 292; FMA4: # BB#0: 293; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 294; FMA4-NEXT: retq 295; 296; AVX512-LABEL: test_4f32_fnmadd: 297; AVX512: # BB#0: 298; AVX512-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 299; AVX512-NEXT: retq 300 %x = fmul <4 x float> %a0, %a1 301 %res = fsub <4 x float> %a2, %x 302 ret <4 x float> %res 303} 304 305define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 306; FMA-LABEL: test_8f32_fnmadd: 307; FMA: # BB#0: 308; FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 309; FMA-NEXT: retq 310; 311; FMA4-LABEL: test_8f32_fnmadd: 312; FMA4: # BB#0: 313; FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 314; FMA4-NEXT: retq 315; 316; AVX512-LABEL: test_8f32_fnmadd: 317; AVX512: # BB#0: 318; AVX512-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 319; AVX512-NEXT: retq 320 %x = fmul <8 x float> %a0, %a1 321 %res = fsub <8 x float> %a2, %x 322 ret <8 x float> %res 323} 324 325define double @test_f64_fnmadd(double %a0, double %a1, double %a2) { 326; FMA-LABEL: test_f64_fnmadd: 327; FMA: # BB#0: 328; FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 329; FMA-NEXT: retq 330; 331; FMA4-LABEL: test_f64_fnmadd: 332; FMA4: # BB#0: 333; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 334; FMA4-NEXT: retq 335; 336; AVX512-LABEL: test_f64_fnmadd: 337; AVX512: # BB#0: 338; AVX512-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 339; AVX512-NEXT: vmovaps %zmm1, %zmm0 340; AVX512-NEXT: retq 341 %x = fmul double %a0, %a1 342 %res = fsub double %a2, %x 343 ret double %res 344} 345 346define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 347; FMA-LABEL: test_2f64_fnmadd: 348; FMA: # BB#0: 349; FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 350; FMA-NEXT: retq 351; 352; FMA4-LABEL: test_2f64_fnmadd: 353; FMA4: # BB#0: 354; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 355; FMA4-NEXT: retq 356; 357; AVX512-LABEL: test_2f64_fnmadd: 358; AVX512: # BB#0: 359; AVX512-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 360; AVX512-NEXT: retq 361 %x = fmul <2 x double> %a0, %a1 362 %res = fsub <2 x double> %a2, %x 363 ret <2 x double> %res 364} 365 366define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 367; FMA-LABEL: test_4f64_fnmadd: 368; FMA: # BB#0: 369; FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 370; FMA-NEXT: retq 371; 372; FMA4-LABEL: test_4f64_fnmadd: 373; FMA4: # BB#0: 374; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 375; FMA4-NEXT: retq 376; 377; AVX512-LABEL: test_4f64_fnmadd: 378; AVX512: # BB#0: 379; AVX512-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 380; AVX512-NEXT: retq 381 %x = fmul <4 x double> %a0, %a1 382 %res = fsub <4 x double> %a2, %x 383 ret <4 x double> %res 384} 385 386; 387; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z) 388; 389 390define float @test_f32_fnmsub(float %a0, float %a1, float %a2) { 391; FMA-LABEL: test_f32_fnmsub: 392; FMA: # BB#0: 393; FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 394; FMA-NEXT: retq 395; 396; FMA4-LABEL: test_f32_fnmsub: 397; FMA4: # BB#0: 398; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 399; FMA4-NEXT: retq 400; 401; AVX512-LABEL: test_f32_fnmsub: 402; AVX512: # BB#0: 403; AVX512-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 404; AVX512-NEXT: vmovaps %zmm1, %zmm0 405; AVX512-NEXT: retq 406 %x = fmul float %a0, %a1 407 %y = fsub float -0.000000e+00, %x 408 %res = fsub float %y, %a2 409 ret float %res 410} 411 412define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 413; FMA-LABEL: test_4f32_fnmsub: 414; FMA: # BB#0: 415; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 416; FMA-NEXT: retq 417; 418; FMA4-LABEL: test_4f32_fnmsub: 419; FMA4: # BB#0: 420; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 421; FMA4-NEXT: retq 422; 423; AVX512-LABEL: test_4f32_fnmsub: 424; AVX512: # BB#0: 425; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 426; AVX512-NEXT: retq 427 %x = fmul <4 x float> %a0, %a1 428 %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 429 %res = fsub <4 x float> %y, %a2 430 ret <4 x float> %res 431} 432 433define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 434; FMA-LABEL: test_8f32_fnmsub: 435; FMA: # BB#0: 436; FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 437; FMA-NEXT: retq 438; 439; FMA4-LABEL: test_8f32_fnmsub: 440; FMA4: # BB#0: 441; FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 442; FMA4-NEXT: retq 443; 444; AVX512-LABEL: test_8f32_fnmsub: 445; AVX512: # BB#0: 446; AVX512-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 447; AVX512-NEXT: retq 448 %x = fmul <8 x float> %a0, %a1 449 %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 450 %res = fsub <8 x float> %y, %a2 451 ret <8 x float> %res 452} 453 454define double @test_f64_fnmsub(double %a0, double %a1, double %a2) { 455; FMA-LABEL: test_f64_fnmsub: 456; FMA: # BB#0: 457; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 458; FMA-NEXT: retq 459; 460; FMA4-LABEL: test_f64_fnmsub: 461; FMA4: # BB#0: 462; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 463; FMA4-NEXT: retq 464; 465; AVX512-LABEL: test_f64_fnmsub: 466; AVX512: # BB#0: 467; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 468; AVX512-NEXT: vmovaps %zmm1, %zmm0 469; AVX512-NEXT: retq 470 %x = fmul double %a0, %a1 471 %y = fsub double -0.000000e+00, %x 472 %res = fsub double %y, %a2 473 ret double %res 474} 475 476define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 477; FMA-LABEL: test_2f64_fnmsub: 478; FMA: # BB#0: 479; FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 480; FMA-NEXT: retq 481; 482; FMA4-LABEL: test_2f64_fnmsub: 483; FMA4: # BB#0: 484; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 485; FMA4-NEXT: retq 486; 487; AVX512-LABEL: test_2f64_fnmsub: 488; AVX512: # BB#0: 489; AVX512-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 490; AVX512-NEXT: retq 491 %x = fmul <2 x double> %a0, %a1 492 %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x 493 %res = fsub <2 x double> %y, %a2 494 ret <2 x double> %res 495} 496 497define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 498; FMA-LABEL: test_4f64_fnmsub: 499; FMA: # BB#0: 500; FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 501; FMA-NEXT: retq 502; 503; FMA4-LABEL: test_4f64_fnmsub: 504; FMA4: # BB#0: 505; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 506; FMA4-NEXT: retq 507; 508; AVX512-LABEL: test_4f64_fnmsub: 509; AVX512: # BB#0: 510; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 511; AVX512-NEXT: retq 512 %x = fmul <4 x double> %a0, %a1 513 %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x 514 %res = fsub <4 x double> %y, %a2 515 ret <4 x double> %res 516} 517 518; 519; Load Folding Patterns 520; 521 522define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) { 523; FMA-LABEL: test_4f32_fmadd_load: 524; FMA: # BB#0: 525; FMA-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 526; FMA-NEXT: retq 527; 528; FMA4-LABEL: test_4f32_fmadd_load: 529; FMA4: # BB#0: 530; FMA4-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 531; FMA4-NEXT: retq 532; 533; AVX512-LABEL: test_4f32_fmadd_load: 534; AVX512: # BB#0: 535; AVX512-NEXT: vmovaps (%rdi), %xmm2 536; AVX512-NEXT: vfmadd213ps %xmm1, %xmm0, %xmm2 537; AVX512-NEXT: vmovaps %zmm2, %zmm0 538; AVX512-NEXT: retq 539 %x = load <4 x float>, <4 x float>* %a0 540 %y = fmul <4 x float> %x, %a1 541 %res = fadd <4 x float> %y, %a2 542 ret <4 x float> %res 543} 544 545define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <2 x double> %a2) { 546; FMA-LABEL: test_2f64_fmsub_load: 547; FMA: # BB#0: 548; FMA-NEXT: vfmsub132pd (%rdi), %xmm1, %xmm0 549; FMA-NEXT: retq 550; 551; FMA4-LABEL: test_2f64_fmsub_load: 552; FMA4: # BB#0: 553; FMA4-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 554; FMA4-NEXT: retq 555; 556; AVX512-LABEL: test_2f64_fmsub_load: 557; AVX512: # BB#0: 558; AVX512-NEXT: vmovapd (%rdi), %xmm2 559; AVX512-NEXT: vfmsub213pd %xmm1, %xmm0, %xmm2 560; AVX512-NEXT: vmovaps %zmm2, %zmm0 561; AVX512-NEXT: retq 562 %x = load <2 x double>, <2 x double>* %a0 563 %y = fmul <2 x double> %x, %a1 564 %res = fsub <2 x double> %y, %a2 565 ret <2 x double> %res 566} 567 568; 569; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) 570; 571 572define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) { 573; FMA-LABEL: test_v4f32_mul_add_x_one_y: 574; FMA: # BB#0: 575; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 576; FMA-NEXT: retq 577; 578; FMA4-LABEL: test_v4f32_mul_add_x_one_y: 579; FMA4: # BB#0: 580; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 581; FMA4-NEXT: retq 582; 583; AVX512-LABEL: test_v4f32_mul_add_x_one_y: 584; AVX512: # BB#0: 585; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 586; AVX512-NEXT: retq 587 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 588 %m = fmul <4 x float> %a, %y 589 ret <4 x float> %m 590} 591 592define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) { 593; FMA-LABEL: test_v4f32_mul_y_add_x_one: 594; FMA: # BB#0: 595; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 596; FMA-NEXT: retq 597; 598; FMA4-LABEL: test_v4f32_mul_y_add_x_one: 599; FMA4: # BB#0: 600; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 601; FMA4-NEXT: retq 602; 603; AVX512-LABEL: test_v4f32_mul_y_add_x_one: 604; AVX512: # BB#0: 605; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 606; AVX512-NEXT: retq 607 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 608 %m = fmul <4 x float> %y, %a 609 ret <4 x float> %m 610} 611 612define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) { 613; FMA-LABEL: test_v4f32_mul_add_x_negone_y: 614; FMA: # BB#0: 615; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 616; FMA-NEXT: retq 617; 618; FMA4-LABEL: test_v4f32_mul_add_x_negone_y: 619; FMA4: # BB#0: 620; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 621; FMA4-NEXT: retq 622; 623; AVX512-LABEL: test_v4f32_mul_add_x_negone_y: 624; AVX512: # BB#0: 625; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 626; AVX512-NEXT: retq 627 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 628 %m = fmul <4 x float> %a, %y 629 ret <4 x float> %m 630} 631 632define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) { 633; FMA-LABEL: test_v4f32_mul_y_add_x_negone: 634; FMA: # BB#0: 635; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 636; FMA-NEXT: retq 637; 638; FMA4-LABEL: test_v4f32_mul_y_add_x_negone: 639; FMA4: # BB#0: 640; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 641; FMA4-NEXT: retq 642; 643; AVX512-LABEL: test_v4f32_mul_y_add_x_negone: 644; AVX512: # BB#0: 645; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 646; AVX512-NEXT: retq 647 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 648 %m = fmul <4 x float> %y, %a 649 ret <4 x float> %m 650} 651 652define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) { 653; FMA-LABEL: test_v4f32_mul_sub_one_x_y: 654; FMA: # BB#0: 655; FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 656; FMA-NEXT: retq 657; 658; FMA4-LABEL: test_v4f32_mul_sub_one_x_y: 659; FMA4: # BB#0: 660; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 661; FMA4-NEXT: retq 662; 663; AVX512-LABEL: test_v4f32_mul_sub_one_x_y: 664; AVX512: # BB#0: 665; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 666; AVX512-NEXT: retq 667 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 668 %m = fmul <4 x float> %s, %y 669 ret <4 x float> %m 670} 671 672define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) { 673; FMA-LABEL: test_v4f32_mul_y_sub_one_x: 674; FMA: # BB#0: 675; FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 676; FMA-NEXT: retq 677; 678; FMA4-LABEL: test_v4f32_mul_y_sub_one_x: 679; FMA4: # BB#0: 680; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 681; FMA4-NEXT: retq 682; 683; AVX512-LABEL: test_v4f32_mul_y_sub_one_x: 684; AVX512: # BB#0: 685; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 686; AVX512-NEXT: retq 687 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 688 %m = fmul <4 x float> %y, %s 689 ret <4 x float> %m 690} 691 692define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) { 693; FMA-LABEL: test_v4f32_mul_sub_negone_x_y: 694; FMA: # BB#0: 695; FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 696; FMA-NEXT: retq 697; 698; FMA4-LABEL: test_v4f32_mul_sub_negone_x_y: 699; FMA4: # BB#0: 700; FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 701; FMA4-NEXT: retq 702; 703; AVX512-LABEL: test_v4f32_mul_sub_negone_x_y: 704; AVX512: # BB#0: 705; AVX512-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 706; AVX512-NEXT: retq 707 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 708 %m = fmul <4 x float> %s, %y 709 ret <4 x float> %m 710} 711 712define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) { 713; FMA-LABEL: test_v4f32_mul_y_sub_negone_x: 714; FMA: # BB#0: 715; FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 716; FMA-NEXT: retq 717; 718; FMA4-LABEL: test_v4f32_mul_y_sub_negone_x: 719; FMA4: # BB#0: 720; FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 721; FMA4-NEXT: retq 722; 723; AVX512-LABEL: test_v4f32_mul_y_sub_negone_x: 724; AVX512: # BB#0: 725; AVX512-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 726; AVX512-NEXT: retq 727 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 728 %m = fmul <4 x float> %y, %s 729 ret <4 x float> %m 730} 731 732define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { 733; FMA-LABEL: test_v4f32_mul_sub_x_one_y: 734; FMA: # BB#0: 735; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 736; FMA-NEXT: retq 737; 738; FMA4-LABEL: test_v4f32_mul_sub_x_one_y: 739; FMA4: # BB#0: 740; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 741; FMA4-NEXT: retq 742; 743; AVX512-LABEL: test_v4f32_mul_sub_x_one_y: 744; AVX512: # BB#0: 745; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 746; AVX512-NEXT: retq 747 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 748 %m = fmul <4 x float> %s, %y 749 ret <4 x float> %m 750} 751 752define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { 753; FMA-LABEL: test_v4f32_mul_y_sub_x_one: 754; FMA: # BB#0: 755; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 756; FMA-NEXT: retq 757; 758; FMA4-LABEL: test_v4f32_mul_y_sub_x_one: 759; FMA4: # BB#0: 760; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 761; FMA4-NEXT: retq 762; 763; AVX512-LABEL: test_v4f32_mul_y_sub_x_one: 764; AVX512: # BB#0: 765; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 766; AVX512-NEXT: retq 767 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 768 %m = fmul <4 x float> %y, %s 769 ret <4 x float> %m 770} 771 772define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) { 773; FMA-LABEL: test_v4f32_mul_sub_x_negone_y: 774; FMA: # BB#0: 775; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 776; FMA-NEXT: retq 777; 778; FMA4-LABEL: test_v4f32_mul_sub_x_negone_y: 779; FMA4: # BB#0: 780; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 781; FMA4-NEXT: retq 782; 783; AVX512-LABEL: test_v4f32_mul_sub_x_negone_y: 784; AVX512: # BB#0: 785; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 786; AVX512-NEXT: retq 787 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 788 %m = fmul <4 x float> %s, %y 789 ret <4 x float> %m 790} 791 792define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) { 793; FMA-LABEL: test_v4f32_mul_y_sub_x_negone: 794; FMA: # BB#0: 795; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 796; FMA-NEXT: retq 797; 798; FMA4-LABEL: test_v4f32_mul_y_sub_x_negone: 799; FMA4: # BB#0: 800; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 801; FMA4-NEXT: retq 802; 803; AVX512-LABEL: test_v4f32_mul_y_sub_x_negone: 804; AVX512: # BB#0: 805; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 806; AVX512-NEXT: retq 807 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 808 %m = fmul <4 x float> %y, %s 809 ret <4 x float> %m 810} 811 812; 813; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) 814; 815 816define float @test_f32_interp(float %x, float %y, float %t) { 817; FMA-LABEL: test_f32_interp: 818; FMA: # BB#0: 819; FMA-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 820; FMA-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 821; FMA-NEXT: retq 822; 823; FMA4-LABEL: test_f32_interp: 824; FMA4: # BB#0: 825; FMA4-NEXT: vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1 826; FMA4-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0 827; FMA4-NEXT: retq 828; 829; AVX512-LABEL: test_f32_interp: 830; AVX512: # BB#0: 831; AVX512-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 832; AVX512-NEXT: vfmadd213ss %xmm1, %xmm0, %xmm2 833; AVX512-NEXT: vmovaps %zmm2, %zmm0 834; AVX512-NEXT: retq 835 %t1 = fsub float 1.0, %t 836 %tx = fmul float %x, %t 837 %ty = fmul float %y, %t1 838 %r = fadd float %tx, %ty 839 ret float %r 840} 841 842define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) { 843; FMA-LABEL: test_v4f32_interp: 844; FMA: # BB#0: 845; FMA-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1 846; FMA-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 847; FMA-NEXT: retq 848; 849; FMA4-LABEL: test_v4f32_interp: 850; FMA4: # BB#0: 851; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1 852; FMA4-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0 853; FMA4-NEXT: retq 854; 855; AVX512-LABEL: test_v4f32_interp: 856; AVX512: # BB#0: 857; AVX512-NEXT: vmovaps %zmm2, %zmm3 858; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm3 859; AVX512-NEXT: vfmadd213ps %xmm3, %xmm2, %xmm0 860; AVX512-NEXT: retq 861 %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t 862 %tx = fmul <4 x float> %x, %t 863 %ty = fmul <4 x float> %y, %t1 864 %r = fadd <4 x float> %tx, %ty 865 ret <4 x float> %r 866} 867 868define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) { 869; FMA-LABEL: test_v8f32_interp: 870; FMA: # BB#0: 871; FMA-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1 872; FMA-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 873; FMA-NEXT: retq 874; 875; FMA4-LABEL: test_v8f32_interp: 876; FMA4: # BB#0: 877; FMA4-NEXT: vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1 878; FMA4-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0 879; FMA4-NEXT: retq 880; 881; AVX512-LABEL: test_v8f32_interp: 882; AVX512: # BB#0: 883; AVX512-NEXT: vmovaps %zmm2, %zmm3 884; AVX512-NEXT: vfnmadd213ps %ymm1, %ymm1, %ymm3 885; AVX512-NEXT: vfmadd213ps %ymm3, %ymm2, %ymm0 886; AVX512-NEXT: retq 887 %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t 888 %tx = fmul <8 x float> %x, %t 889 %ty = fmul <8 x float> %y, %t1 890 %r = fadd <8 x float> %tx, %ty 891 ret <8 x float> %r 892} 893 894define double @test_f64_interp(double %x, double %y, double %t) { 895; FMA-LABEL: test_f64_interp: 896; FMA: # BB#0: 897; FMA-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 898; FMA-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 899; FMA-NEXT: retq 900; 901; FMA4-LABEL: test_f64_interp: 902; FMA4: # BB#0: 903; FMA4-NEXT: vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1 904; FMA4-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0 905; FMA4-NEXT: retq 906; 907; AVX512-LABEL: test_f64_interp: 908; AVX512: # BB#0: 909; AVX512-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 910; AVX512-NEXT: vfmadd213sd %xmm1, %xmm0, %xmm2 911; AVX512-NEXT: vmovaps %zmm2, %zmm0 912; AVX512-NEXT: retq 913 %t1 = fsub double 1.0, %t 914 %tx = fmul double %x, %t 915 %ty = fmul double %y, %t1 916 %r = fadd double %tx, %ty 917 ret double %r 918} 919 920define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) { 921; FMA-LABEL: test_v2f64_interp: 922; FMA: # BB#0: 923; FMA-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1 924; FMA-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 925; FMA-NEXT: retq 926; 927; FMA4-LABEL: test_v2f64_interp: 928; FMA4: # BB#0: 929; FMA4-NEXT: vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1 930; FMA4-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0 931; FMA4-NEXT: retq 932; 933; AVX512-LABEL: test_v2f64_interp: 934; AVX512: # BB#0: 935; AVX512-NEXT: vmovaps %zmm2, %zmm3 936; AVX512-NEXT: vfnmadd213pd %xmm1, %xmm1, %xmm3 937; AVX512-NEXT: vfmadd213pd %xmm3, %xmm2, %xmm0 938; AVX512-NEXT: retq 939 %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t 940 %tx = fmul <2 x double> %x, %t 941 %ty = fmul <2 x double> %y, %t1 942 %r = fadd <2 x double> %tx, %ty 943 ret <2 x double> %r 944} 945 946define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) { 947; FMA-LABEL: test_v4f64_interp: 948; FMA: # BB#0: 949; FMA-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1 950; FMA-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 951; FMA-NEXT: retq 952; 953; FMA4-LABEL: test_v4f64_interp: 954; FMA4: # BB#0: 955; FMA4-NEXT: vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1 956; FMA4-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0 957; FMA4-NEXT: retq 958; 959; AVX512-LABEL: test_v4f64_interp: 960; AVX512: # BB#0: 961; AVX512-NEXT: vmovaps %zmm2, %zmm3 962; AVX512-NEXT: vfnmadd213pd %ymm1, %ymm1, %ymm3 963; AVX512-NEXT: vfmadd213pd %ymm3, %ymm2, %ymm0 964; AVX512-NEXT: retq 965 %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t 966 %tx = fmul <4 x double> %x, %t 967 %ty = fmul <4 x double> %y, %t1 968 %r = fadd <4 x double> %tx, %ty 969 ret <4 x double> %r 970} 971 972; 973; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z) 974; 975 976define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 977; FMA-LABEL: test_v4f32_fneg_fmadd: 978; FMA: # BB#0: 979; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 980; FMA-NEXT: retq 981; 982; FMA4-LABEL: test_v4f32_fneg_fmadd: 983; FMA4: # BB#0: 984; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 985; FMA4-NEXT: retq 986; 987; AVX512-LABEL: test_v4f32_fneg_fmadd: 988; AVX512: # BB#0: 989; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 990; AVX512-NEXT: retq 991 %mul = fmul <4 x float> %a0, %a1 992 %add = fadd <4 x float> %mul, %a2 993 %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 994 ret <4 x float> %neg 995} 996 997define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 998; FMA-LABEL: test_v4f64_fneg_fmsub: 999; FMA: # BB#0: 1000; FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 1001; FMA-NEXT: retq 1002; 1003; FMA4-LABEL: test_v4f64_fneg_fmsub: 1004; FMA4: # BB#0: 1005; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 1006; FMA4-NEXT: retq 1007; 1008; AVX512-LABEL: test_v4f64_fneg_fmsub: 1009; AVX512: # BB#0: 1010; AVX512-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 1011; AVX512-NEXT: retq 1012 %mul = fmul <4 x double> %a0, %a1 1013 %sub = fsub <4 x double> %mul, %a2 1014 %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1015 ret <4 x double> %neg 1016} 1017 1018define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 1019; FMA-LABEL: test_v4f32_fneg_fnmadd: 1020; FMA: # BB#0: 1021; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 1022; FMA-NEXT: retq 1023; 1024; FMA4-LABEL: test_v4f32_fneg_fnmadd: 1025; FMA4: # BB#0: 1026; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 1027; FMA4-NEXT: retq 1028; 1029; AVX512-LABEL: test_v4f32_fneg_fnmadd: 1030; AVX512: # BB#0: 1031; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 1032; AVX512-NEXT: retq 1033 %mul = fmul <4 x float> %a0, %a1 1034 %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul 1035 %add = fadd <4 x float> %neg0, %a2 1036 %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 1037 ret <4 x float> %neg1 1038} 1039 1040define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1041; FMA-LABEL: test_v4f64_fneg_fnmsub: 1042; FMA: # BB#0: 1043; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 1044; FMA-NEXT: retq 1045; 1046; FMA4-LABEL: test_v4f64_fneg_fnmsub: 1047; FMA4: # BB#0: 1048; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 1049; FMA4-NEXT: retq 1050; 1051; AVX512-LABEL: test_v4f64_fneg_fnmsub: 1052; AVX512: # BB#0: 1053; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 1054; AVX512-NEXT: retq 1055 %mul = fmul <4 x double> %a0, %a1 1056 %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul 1057 %sub = fsub <4 x double> %neg0, %a2 1058 %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1059 ret <4 x double> %neg1 1060} 1061 1062; 1063; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 1064; 1065 1066define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 { 1067; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1068; FMA: # BB#0: 1069; FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 1070; FMA-NEXT: retq 1071; 1072; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1073; FMA4: # BB#0: 1074; FMA4-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 1075; FMA4-NEXT: retq 1076; 1077; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1078; AVX512: # BB#0: 1079; AVX512-NEXT: vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0 1080; AVX512-NEXT: retq 1081 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1082 %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0> 1083 %a = fadd <4 x float> %m0, %m1 1084 ret <4 x float> %a 1085} 1086 1087; 1088; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 1089; 1090 1091define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 { 1092; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1093; FMA: # BB#0: 1094; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0 1095; FMA-NEXT: retq 1096; 1097; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1098; FMA4: # BB#0: 1099; FMA4-NEXT: vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0 1100; FMA4-NEXT: retq 1101; 1102; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1103; AVX512: # BB#0: 1104; AVX512-NEXT: vfmadd231ps {{.*}}(%rip), %xmm0, %xmm1 1105; AVX512-NEXT: vmovaps %zmm1, %zmm0 1106; AVX512-NEXT: retq 1107 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1108 %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0> 1109 %a = fadd <4 x float> %m1, %y 1110 ret <4 x float> %a 1111} 1112 1113; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0) 1114 1115define double @test_f64_fneg_fmul(double %x, double %y) #0 { 1116; FMA-LABEL: test_f64_fneg_fmul: 1117; FMA: # BB#0: 1118; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1119; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 1120; FMA-NEXT: retq 1121; 1122; FMA4-LABEL: test_f64_fneg_fmul: 1123; FMA4: # BB#0: 1124; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1125; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 1126; FMA4-NEXT: retq 1127; 1128; AVX512-LABEL: test_f64_fneg_fmul: 1129; AVX512: # BB#0: 1130; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 1131; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 1132; AVX512-NEXT: vmovaps %zmm1, %zmm0 1133; AVX512-NEXT: retq 1134 %m = fmul nsz double %x, %y 1135 %n = fsub double -0.0, %m 1136 ret double %n 1137} 1138 1139define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 { 1140; FMA-LABEL: test_v4f32_fneg_fmul: 1141; FMA: # BB#0: 1142; FMA-NEXT: vxorps %xmm2, %xmm2, %xmm2 1143; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1144; FMA-NEXT: retq 1145; 1146; FMA4-LABEL: test_v4f32_fneg_fmul: 1147; FMA4: # BB#0: 1148; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2 1149; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 1150; FMA4-NEXT: retq 1151; 1152; AVX512-LABEL: test_v4f32_fneg_fmul: 1153; AVX512: # BB#0: 1154; AVX512-NEXT: vpxord %xmm2, %xmm2, %xmm2 1155; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1156; AVX512-NEXT: retq 1157 %m = fmul nsz <4 x float> %x, %y 1158 %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m 1159 ret <4 x float> %n 1160} 1161 1162define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 { 1163; FMA-LABEL: test_v4f64_fneg_fmul: 1164; FMA: # BB#0: 1165; FMA-NEXT: vxorpd %ymm2, %ymm2, %ymm2 1166; FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 1167; FMA-NEXT: retq 1168; 1169; FMA4-LABEL: test_v4f64_fneg_fmul: 1170; FMA4: # BB#0: 1171; FMA4-NEXT: vxorpd %ymm2, %ymm2, %ymm2 1172; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 1173; FMA4-NEXT: retq 1174; 1175; AVX512-LABEL: test_v4f64_fneg_fmul: 1176; AVX512: # BB#0: 1177; AVX512-NEXT: vpxord %ymm2, %ymm2, %ymm2 1178; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 1179; AVX512-NEXT: retq 1180 %m = fmul nsz <4 x double> %x, %y 1181 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1182 ret <4 x double> %n 1183} 1184 1185define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 { 1186; ALL-LABEL: test_v4f64_fneg_fmul_no_nsz: 1187; ALL: # BB#0: 1188; ALL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1189; ALL-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 1190; ALL-NEXT: retq 1191 %m = fmul <4 x double> %x, %y 1192 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1193 ret <4 x double> %n 1194} 1195 1196attributes #0 = { "unsafe-fp-math"="true" } 1197