1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA --check-prefix=FMA-INFS 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-INFS 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-INFS 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512-INFS 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=FMA --check-prefix=FMA-NOINFS 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-NOINFS 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-NOINFS 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512-NOINFS 10 11; 12; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) 13; 14 15define float @test_f32_fmadd(float %a0, float %a1, float %a2) { 16; FMA-LABEL: test_f32_fmadd: 17; FMA: # %bb.0: 18; FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 19; FMA-NEXT: retq 20; 21; FMA4-LABEL: test_f32_fmadd: 22; FMA4: # %bb.0: 23; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 24; FMA4-NEXT: retq 25; 26; AVX512-LABEL: test_f32_fmadd: 27; AVX512: # %bb.0: 28; AVX512-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 29; AVX512-NEXT: retq 30 %x = fmul float %a0, %a1 31 %res = fadd float %x, %a2 32 ret float %res 33} 34 35define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 36; FMA-LABEL: test_4f32_fmadd: 37; FMA: # %bb.0: 38; FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 39; FMA-NEXT: retq 40; 41; FMA4-LABEL: test_4f32_fmadd: 42; FMA4: # %bb.0: 43; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 44; FMA4-NEXT: retq 45; 46; AVX512-LABEL: test_4f32_fmadd: 47; AVX512: # %bb.0: 48; AVX512-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 49; AVX512-NEXT: retq 50 %x = fmul <4 x float> %a0, %a1 51 %res = fadd <4 x float> %x, %a2 52 ret <4 x float> %res 53} 54 55define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 56; FMA-LABEL: test_8f32_fmadd: 57; FMA: # %bb.0: 58; FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 59; FMA-NEXT: retq 60; 61; FMA4-LABEL: test_8f32_fmadd: 62; FMA4: # %bb.0: 63; FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 64; FMA4-NEXT: retq 65; 66; AVX512-LABEL: test_8f32_fmadd: 67; AVX512: # %bb.0: 68; AVX512-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 69; AVX512-NEXT: retq 70 %x = fmul <8 x float> %a0, %a1 71 %res = fadd <8 x float> %x, %a2 72 ret <8 x float> %res 73} 74 75define double @test_f64_fmadd(double %a0, double %a1, double %a2) { 76; FMA-LABEL: test_f64_fmadd: 77; FMA: # %bb.0: 78; FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 79; FMA-NEXT: retq 80; 81; FMA4-LABEL: test_f64_fmadd: 82; FMA4: # %bb.0: 83; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 84; FMA4-NEXT: retq 85; 86; AVX512-LABEL: test_f64_fmadd: 87; AVX512: # %bb.0: 88; AVX512-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 89; AVX512-NEXT: retq 90 %x = fmul double %a0, %a1 91 %res = fadd double %x, %a2 92 ret double %res 93} 94 95define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 96; FMA-LABEL: test_2f64_fmadd: 97; FMA: # %bb.0: 98; FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 99; FMA-NEXT: retq 100; 101; FMA4-LABEL: test_2f64_fmadd: 102; FMA4: # %bb.0: 103; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 104; FMA4-NEXT: retq 105; 106; AVX512-LABEL: test_2f64_fmadd: 107; AVX512: # %bb.0: 108; AVX512-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 109; AVX512-NEXT: retq 110 %x = fmul <2 x double> %a0, %a1 111 %res = fadd <2 x double> %x, %a2 112 ret <2 x double> %res 113} 114 115define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 116; FMA-LABEL: test_4f64_fmadd: 117; FMA: # %bb.0: 118; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 119; FMA-NEXT: retq 120; 121; FMA4-LABEL: test_4f64_fmadd: 122; FMA4: # %bb.0: 123; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 124; FMA4-NEXT: retq 125; 126; AVX512-LABEL: test_4f64_fmadd: 127; AVX512: # %bb.0: 128; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 129; AVX512-NEXT: retq 130 %x = fmul <4 x double> %a0, %a1 131 %res = fadd <4 x double> %x, %a2 132 ret <4 x double> %res 133} 134 135; 136; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z) 137; 138 139define float @test_f32_fmsub(float %a0, float %a1, float %a2) { 140; FMA-LABEL: test_f32_fmsub: 141; FMA: # %bb.0: 142; FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 143; FMA-NEXT: retq 144; 145; FMA4-LABEL: test_f32_fmsub: 146; FMA4: # %bb.0: 147; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 148; FMA4-NEXT: retq 149; 150; AVX512-LABEL: test_f32_fmsub: 151; AVX512: # %bb.0: 152; AVX512-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 153; AVX512-NEXT: retq 154 %x = fmul float %a0, %a1 155 %res = fsub float %x, %a2 156 ret float %res 157} 158 159define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 160; FMA-LABEL: test_4f32_fmsub: 161; FMA: # %bb.0: 162; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 163; FMA-NEXT: retq 164; 165; FMA4-LABEL: test_4f32_fmsub: 166; FMA4: # %bb.0: 167; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 168; FMA4-NEXT: retq 169; 170; AVX512-LABEL: test_4f32_fmsub: 171; AVX512: # %bb.0: 172; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 173; AVX512-NEXT: retq 174 %x = fmul <4 x float> %a0, %a1 175 %res = fsub <4 x float> %x, %a2 176 ret <4 x float> %res 177} 178 179define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 180; FMA-LABEL: test_8f32_fmsub: 181; FMA: # %bb.0: 182; FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 183; FMA-NEXT: retq 184; 185; FMA4-LABEL: test_8f32_fmsub: 186; FMA4: # %bb.0: 187; FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 188; FMA4-NEXT: retq 189; 190; AVX512-LABEL: test_8f32_fmsub: 191; AVX512: # %bb.0: 192; AVX512-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 193; AVX512-NEXT: retq 194 %x = fmul <8 x float> %a0, %a1 195 %res = fsub <8 x float> %x, %a2 196 ret <8 x float> %res 197} 198 199define double @test_f64_fmsub(double %a0, double %a1, double %a2) { 200; FMA-LABEL: test_f64_fmsub: 201; FMA: # %bb.0: 202; FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 203; FMA-NEXT: retq 204; 205; FMA4-LABEL: test_f64_fmsub: 206; FMA4: # %bb.0: 207; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 208; FMA4-NEXT: retq 209; 210; AVX512-LABEL: test_f64_fmsub: 211; AVX512: # %bb.0: 212; AVX512-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 213; AVX512-NEXT: retq 214 %x = fmul double %a0, %a1 215 %res = fsub double %x, %a2 216 ret double %res 217} 218 219define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 220; FMA-LABEL: test_2f64_fmsub: 221; FMA: # %bb.0: 222; FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 223; FMA-NEXT: retq 224; 225; FMA4-LABEL: test_2f64_fmsub: 226; FMA4: # %bb.0: 227; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 228; FMA4-NEXT: retq 229; 230; AVX512-LABEL: test_2f64_fmsub: 231; AVX512: # %bb.0: 232; AVX512-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 233; AVX512-NEXT: retq 234 %x = fmul <2 x double> %a0, %a1 235 %res = fsub <2 x double> %x, %a2 236 ret <2 x double> %res 237} 238 239define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 240; FMA-LABEL: test_4f64_fmsub: 241; FMA: # %bb.0: 242; FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 243; FMA-NEXT: retq 244; 245; FMA4-LABEL: test_4f64_fmsub: 246; FMA4: # %bb.0: 247; FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 248; FMA4-NEXT: retq 249; 250; AVX512-LABEL: test_4f64_fmsub: 251; AVX512: # %bb.0: 252; AVX512-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 253; AVX512-NEXT: retq 254 %x = fmul <4 x double> %a0, %a1 255 %res = fsub <4 x double> %x, %a2 256 ret <4 x double> %res 257} 258 259; 260; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z) 261; 262 263define float @test_f32_fnmadd(float %a0, float %a1, float %a2) { 264; FMA-LABEL: test_f32_fnmadd: 265; FMA: # %bb.0: 266; FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 267; FMA-NEXT: retq 268; 269; FMA4-LABEL: test_f32_fnmadd: 270; FMA4: # %bb.0: 271; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 272; FMA4-NEXT: retq 273; 274; AVX512-LABEL: test_f32_fnmadd: 275; AVX512: # %bb.0: 276; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 277; AVX512-NEXT: retq 278 %x = fmul float %a0, %a1 279 %res = fsub float %a2, %x 280 ret float %res 281} 282 283define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 284; FMA-LABEL: test_4f32_fnmadd: 285; FMA: # %bb.0: 286; FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 287; FMA-NEXT: retq 288; 289; FMA4-LABEL: test_4f32_fnmadd: 290; FMA4: # %bb.0: 291; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 292; FMA4-NEXT: retq 293; 294; AVX512-LABEL: test_4f32_fnmadd: 295; AVX512: # %bb.0: 296; AVX512-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 297; AVX512-NEXT: retq 298 %x = fmul <4 x float> %a0, %a1 299 %res = fsub <4 x float> %a2, %x 300 ret <4 x float> %res 301} 302 303define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 304; FMA-LABEL: test_8f32_fnmadd: 305; FMA: # %bb.0: 306; FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 307; FMA-NEXT: retq 308; 309; FMA4-LABEL: test_8f32_fnmadd: 310; FMA4: # %bb.0: 311; FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 312; FMA4-NEXT: retq 313; 314; AVX512-LABEL: test_8f32_fnmadd: 315; AVX512: # %bb.0: 316; AVX512-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 317; AVX512-NEXT: retq 318 %x = fmul <8 x float> %a0, %a1 319 %res = fsub <8 x float> %a2, %x 320 ret <8 x float> %res 321} 322 323define double @test_f64_fnmadd(double %a0, double %a1, double %a2) { 324; FMA-LABEL: test_f64_fnmadd: 325; FMA: # %bb.0: 326; FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 327; FMA-NEXT: retq 328; 329; FMA4-LABEL: test_f64_fnmadd: 330; FMA4: # %bb.0: 331; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 332; FMA4-NEXT: retq 333; 334; AVX512-LABEL: test_f64_fnmadd: 335; AVX512: # %bb.0: 336; AVX512-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 337; AVX512-NEXT: retq 338 %x = fmul double %a0, %a1 339 %res = fsub double %a2, %x 340 ret double %res 341} 342 343define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 344; FMA-LABEL: test_2f64_fnmadd: 345; FMA: # %bb.0: 346; FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 347; FMA-NEXT: retq 348; 349; FMA4-LABEL: test_2f64_fnmadd: 350; FMA4: # %bb.0: 351; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 352; FMA4-NEXT: retq 353; 354; AVX512-LABEL: test_2f64_fnmadd: 355; AVX512: # %bb.0: 356; AVX512-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 357; AVX512-NEXT: retq 358 %x = fmul <2 x double> %a0, %a1 359 %res = fsub <2 x double> %a2, %x 360 ret <2 x double> %res 361} 362 363define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 364; FMA-LABEL: test_4f64_fnmadd: 365; FMA: # %bb.0: 366; FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 367; FMA-NEXT: retq 368; 369; FMA4-LABEL: test_4f64_fnmadd: 370; FMA4: # %bb.0: 371; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 372; FMA4-NEXT: retq 373; 374; AVX512-LABEL: test_4f64_fnmadd: 375; AVX512: # %bb.0: 376; AVX512-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 377; AVX512-NEXT: retq 378 %x = fmul <4 x double> %a0, %a1 379 %res = fsub <4 x double> %a2, %x 380 ret <4 x double> %res 381} 382 383; 384; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z) 385; 386 387define float @test_f32_fnmsub(float %a0, float %a1, float %a2) { 388; FMA-LABEL: test_f32_fnmsub: 389; FMA: # %bb.0: 390; FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 391; FMA-NEXT: retq 392; 393; FMA4-LABEL: test_f32_fnmsub: 394; FMA4: # %bb.0: 395; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 396; FMA4-NEXT: retq 397; 398; AVX512-LABEL: test_f32_fnmsub: 399; AVX512: # %bb.0: 400; AVX512-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 401; AVX512-NEXT: retq 402 %x = fmul float %a0, %a1 403 %y = fsub float -0.000000e+00, %x 404 %res = fsub float %y, %a2 405 ret float %res 406} 407 408define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 409; FMA-LABEL: test_4f32_fnmsub: 410; FMA: # %bb.0: 411; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 412; FMA-NEXT: retq 413; 414; FMA4-LABEL: test_4f32_fnmsub: 415; FMA4: # %bb.0: 416; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 417; FMA4-NEXT: retq 418; 419; AVX512-LABEL: test_4f32_fnmsub: 420; AVX512: # %bb.0: 421; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 422; AVX512-NEXT: retq 423 %x = fmul <4 x float> %a0, %a1 424 %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 425 %res = fsub <4 x float> %y, %a2 426 ret <4 x float> %res 427} 428 429define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 430; FMA-LABEL: test_8f32_fnmsub: 431; FMA: # %bb.0: 432; FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 433; FMA-NEXT: retq 434; 435; FMA4-LABEL: test_8f32_fnmsub: 436; FMA4: # %bb.0: 437; FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 438; FMA4-NEXT: retq 439; 440; AVX512-LABEL: test_8f32_fnmsub: 441; AVX512: # %bb.0: 442; AVX512-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 443; AVX512-NEXT: retq 444 %x = fmul <8 x float> %a0, %a1 445 %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 446 %res = fsub <8 x float> %y, %a2 447 ret <8 x float> %res 448} 449 450define double @test_f64_fnmsub(double %a0, double %a1, double %a2) { 451; FMA-LABEL: test_f64_fnmsub: 452; FMA: # %bb.0: 453; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 454; FMA-NEXT: retq 455; 456; FMA4-LABEL: test_f64_fnmsub: 457; FMA4: # %bb.0: 458; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 459; FMA4-NEXT: retq 460; 461; AVX512-LABEL: test_f64_fnmsub: 462; AVX512: # %bb.0: 463; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 464; AVX512-NEXT: retq 465 %x = fmul double %a0, %a1 466 %y = fsub double -0.000000e+00, %x 467 %res = fsub double %y, %a2 468 ret double %res 469} 470 471define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 472; FMA-LABEL: test_2f64_fnmsub: 473; FMA: # %bb.0: 474; FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 475; FMA-NEXT: retq 476; 477; FMA4-LABEL: test_2f64_fnmsub: 478; FMA4: # %bb.0: 479; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 480; FMA4-NEXT: retq 481; 482; AVX512-LABEL: test_2f64_fnmsub: 483; AVX512: # %bb.0: 484; AVX512-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 485; AVX512-NEXT: retq 486 %x = fmul <2 x double> %a0, %a1 487 %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x 488 %res = fsub <2 x double> %y, %a2 489 ret <2 x double> %res 490} 491 492define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 493; FMA-LABEL: test_4f64_fnmsub: 494; FMA: # %bb.0: 495; FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 496; FMA-NEXT: retq 497; 498; FMA4-LABEL: test_4f64_fnmsub: 499; FMA4: # %bb.0: 500; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 501; FMA4-NEXT: retq 502; 503; AVX512-LABEL: test_4f64_fnmsub: 504; AVX512: # %bb.0: 505; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 506; AVX512-NEXT: retq 507 %x = fmul <4 x double> %a0, %a1 508 %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x 509 %res = fsub <4 x double> %y, %a2 510 ret <4 x double> %res 511} 512 513; 514; Load Folding Patterns 515; 516 517define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) { 518; FMA-LABEL: test_4f32_fmadd_load: 519; FMA: # %bb.0: 520; FMA-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 521; FMA-NEXT: retq 522; 523; FMA4-LABEL: test_4f32_fmadd_load: 524; FMA4: # %bb.0: 525; FMA4-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 526; FMA4-NEXT: retq 527; 528; AVX512-LABEL: test_4f32_fmadd_load: 529; AVX512: # %bb.0: 530; AVX512-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 531; AVX512-NEXT: retq 532 %x = load <4 x float>, <4 x float>* %a0 533 %y = fmul <4 x float> %x, %a1 534 %res = fadd <4 x float> %y, %a2 535 ret <4 x float> %res 536} 537 538define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <2 x double> %a2) { 539; FMA-LABEL: test_2f64_fmsub_load: 540; FMA: # %bb.0: 541; FMA-NEXT: vfmsub132pd (%rdi), %xmm1, %xmm0 542; FMA-NEXT: retq 543; 544; FMA4-LABEL: test_2f64_fmsub_load: 545; FMA4: # %bb.0: 546; FMA4-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 547; FMA4-NEXT: retq 548; 549; AVX512-LABEL: test_2f64_fmsub_load: 550; AVX512: # %bb.0: 551; AVX512-NEXT: vfmsub132pd (%rdi), %xmm1, %xmm0 552; AVX512-NEXT: retq 553 %x = load <2 x double>, <2 x double>* %a0 554 %y = fmul <2 x double> %x, %a1 555 %res = fsub <2 x double> %y, %a2 556 ret <2 x double> %res 557} 558 559; 560; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) 561; 562 563define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) { 564; FMA-INFS-LABEL: test_v4f32_mul_add_x_one_y: 565; FMA-INFS: # %bb.0: 566; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 567; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 568; FMA-INFS-NEXT: retq 569; 570; FMA4-INFS-LABEL: test_v4f32_mul_add_x_one_y: 571; FMA4-INFS: # %bb.0: 572; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 573; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 574; FMA4-INFS-NEXT: retq 575; 576; AVX512-INFS-LABEL: test_v4f32_mul_add_x_one_y: 577; AVX512-INFS: # %bb.0: 578; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 579; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 580; AVX512-INFS-NEXT: retq 581; 582; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: 583; FMA-NOINFS: # %bb.0: 584; FMA-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 585; FMA-NOINFS-NEXT: retq 586; 587; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: 588; FMA4-NOINFS: # %bb.0: 589; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 590; FMA4-NOINFS-NEXT: retq 591; 592; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: 593; AVX512-NOINFS: # %bb.0: 594; AVX512-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 595; AVX512-NOINFS-NEXT: retq 596 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 597 %m = fmul <4 x float> %a, %y 598 ret <4 x float> %m 599} 600 601define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) { 602; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_one: 603; FMA-INFS: # %bb.0: 604; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 605; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 606; FMA-INFS-NEXT: retq 607; 608; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_one: 609; FMA4-INFS: # %bb.0: 610; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 611; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 612; FMA4-INFS-NEXT: retq 613; 614; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_one: 615; AVX512-INFS: # %bb.0: 616; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 617; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 618; AVX512-INFS-NEXT: retq 619; 620; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: 621; FMA-NOINFS: # %bb.0: 622; FMA-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 623; FMA-NOINFS-NEXT: retq 624; 625; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: 626; FMA4-NOINFS: # %bb.0: 627; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 628; FMA4-NOINFS-NEXT: retq 629; 630; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: 631; AVX512-NOINFS: # %bb.0: 632; AVX512-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 633; AVX512-NOINFS-NEXT: retq 634 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 635 %m = fmul <4 x float> %y, %a 636 ret <4 x float> %m 637} 638 639define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) { 640; FMA-INFS-LABEL: test_v4f32_mul_add_x_negone_y: 641; FMA-INFS: # %bb.0: 642; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 643; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 644; FMA-INFS-NEXT: retq 645; 646; FMA4-INFS-LABEL: test_v4f32_mul_add_x_negone_y: 647; FMA4-INFS: # %bb.0: 648; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 649; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 650; FMA4-INFS-NEXT: retq 651; 652; AVX512-INFS-LABEL: test_v4f32_mul_add_x_negone_y: 653; AVX512-INFS: # %bb.0: 654; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 655; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 656; AVX512-INFS-NEXT: retq 657; 658; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: 659; FMA-NOINFS: # %bb.0: 660; FMA-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 661; FMA-NOINFS-NEXT: retq 662; 663; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: 664; FMA4-NOINFS: # %bb.0: 665; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 666; FMA4-NOINFS-NEXT: retq 667; 668; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: 669; AVX512-NOINFS: # %bb.0: 670; AVX512-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 671; AVX512-NOINFS-NEXT: retq 672 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 673 %m = fmul <4 x float> %a, %y 674 ret <4 x float> %m 675} 676 677define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) { 678; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_negone: 679; FMA-INFS: # %bb.0: 680; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 681; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 682; FMA-INFS-NEXT: retq 683; 684; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_negone: 685; FMA4-INFS: # %bb.0: 686; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 687; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 688; FMA4-INFS-NEXT: retq 689; 690; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_negone: 691; AVX512-INFS: # %bb.0: 692; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 693; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 694; AVX512-INFS-NEXT: retq 695; 696; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: 697; FMA-NOINFS: # %bb.0: 698; FMA-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 699; FMA-NOINFS-NEXT: retq 700; 701; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: 702; FMA4-NOINFS: # %bb.0: 703; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 704; FMA4-NOINFS-NEXT: retq 705; 706; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: 707; AVX512-NOINFS: # %bb.0: 708; AVX512-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 709; AVX512-NOINFS-NEXT: retq 710 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 711 %m = fmul <4 x float> %y, %a 712 ret <4 x float> %m 713} 714 715define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) { 716; FMA-INFS-LABEL: test_v4f32_mul_sub_one_x_y: 717; FMA-INFS: # %bb.0: 718; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 719; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 720; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 721; FMA-INFS-NEXT: retq 722; 723; FMA4-INFS-LABEL: test_v4f32_mul_sub_one_x_y: 724; FMA4-INFS: # %bb.0: 725; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 726; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 727; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 728; FMA4-INFS-NEXT: retq 729; 730; AVX512-INFS-LABEL: test_v4f32_mul_sub_one_x_y: 731; AVX512-INFS: # %bb.0: 732; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] 733; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 734; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 735; AVX512-INFS-NEXT: retq 736; 737; FMA-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: 738; FMA-NOINFS: # %bb.0: 739; FMA-NOINFS-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 740; FMA-NOINFS-NEXT: retq 741; 742; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: 743; FMA4-NOINFS: # %bb.0: 744; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 745; FMA4-NOINFS-NEXT: retq 746; 747; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: 748; AVX512-NOINFS: # %bb.0: 749; AVX512-NOINFS-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 750; AVX512-NOINFS-NEXT: retq 751 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 752 %m = fmul <4 x float> %s, %y 753 ret <4 x float> %m 754} 755 756define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) { 757; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x: 758; FMA-INFS: # %bb.0: 759; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 760; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 761; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 762; FMA-INFS-NEXT: retq 763; 764; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x: 765; FMA4-INFS: # %bb.0: 766; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 767; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 768; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 769; FMA4-INFS-NEXT: retq 770; 771; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x: 772; AVX512-INFS: # %bb.0: 773; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] 774; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 775; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 776; AVX512-INFS-NEXT: retq 777; 778; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: 779; FMA-NOINFS: # %bb.0: 780; FMA-NOINFS-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 781; FMA-NOINFS-NEXT: retq 782; 783; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: 784; FMA4-NOINFS: # %bb.0: 785; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 786; FMA4-NOINFS-NEXT: retq 787; 788; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: 789; AVX512-NOINFS: # %bb.0: 790; AVX512-NOINFS-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 791; AVX512-NOINFS-NEXT: retq 792 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 793 %m = fmul <4 x float> %y, %s 794 ret <4 x float> %m 795} 796 797define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) { 798; FMA-INFS-LABEL: test_v4f32_mul_sub_negone_x_y: 799; FMA-INFS: # %bb.0: 800; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 801; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 802; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 803; FMA-INFS-NEXT: retq 804; 805; FMA4-INFS-LABEL: test_v4f32_mul_sub_negone_x_y: 806; FMA4-INFS: # %bb.0: 807; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 808; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 809; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 810; FMA4-INFS-NEXT: retq 811; 812; AVX512-INFS-LABEL: test_v4f32_mul_sub_negone_x_y: 813; AVX512-INFS: # %bb.0: 814; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1,-1,-1,-1] 815; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 816; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 817; AVX512-INFS-NEXT: retq 818; 819; FMA-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: 820; FMA-NOINFS: # %bb.0: 821; FMA-NOINFS-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 822; FMA-NOINFS-NEXT: retq 823; 824; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: 825; FMA4-NOINFS: # %bb.0: 826; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 827; FMA4-NOINFS-NEXT: retq 828; 829; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: 830; AVX512-NOINFS: # %bb.0: 831; AVX512-NOINFS-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 832; AVX512-NOINFS-NEXT: retq 833 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 834 %m = fmul <4 x float> %s, %y 835 ret <4 x float> %m 836} 837 838define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) { 839; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x: 840; FMA-INFS: # %bb.0: 841; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 842; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 843; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 844; FMA-INFS-NEXT: retq 845; 846; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x: 847; FMA4-INFS: # %bb.0: 848; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 849; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 850; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 851; FMA4-INFS-NEXT: retq 852; 853; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x: 854; AVX512-INFS: # %bb.0: 855; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1,-1,-1,-1] 856; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 857; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 858; AVX512-INFS-NEXT: retq 859; 860; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: 861; FMA-NOINFS: # %bb.0: 862; FMA-NOINFS-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 863; FMA-NOINFS-NEXT: retq 864; 865; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: 866; FMA4-NOINFS: # %bb.0: 867; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 868; FMA4-NOINFS-NEXT: retq 869; 870; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: 871; AVX512-NOINFS: # %bb.0: 872; AVX512-NOINFS-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 873; AVX512-NOINFS-NEXT: retq 874 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 875 %m = fmul <4 x float> %y, %s 876 ret <4 x float> %m 877} 878 879define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { 880; FMA-INFS-LABEL: test_v4f32_mul_sub_x_one_y: 881; FMA-INFS: # %bb.0: 882; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 883; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 884; FMA-INFS-NEXT: retq 885; 886; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_one_y: 887; FMA4-INFS: # %bb.0: 888; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 889; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 890; FMA4-INFS-NEXT: retq 891; 892; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_one_y: 893; AVX512-INFS: # %bb.0: 894; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 895; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 896; AVX512-INFS-NEXT: retq 897; 898; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: 899; FMA-NOINFS: # %bb.0: 900; FMA-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 901; FMA-NOINFS-NEXT: retq 902; 903; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: 904; FMA4-NOINFS: # %bb.0: 905; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 906; FMA4-NOINFS-NEXT: retq 907; 908; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: 909; AVX512-NOINFS: # %bb.0: 910; AVX512-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 911; AVX512-NOINFS-NEXT: retq 912 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 913 %m = fmul <4 x float> %s, %y 914 ret <4 x float> %m 915} 916 917define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { 918; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one: 919; FMA-INFS: # %bb.0: 920; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 921; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 922; FMA-INFS-NEXT: retq 923; 924; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one: 925; FMA4-INFS: # %bb.0: 926; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 927; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 928; FMA4-INFS-NEXT: retq 929; 930; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one: 931; AVX512-INFS: # %bb.0: 932; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 933; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 934; AVX512-INFS-NEXT: retq 935; 936; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: 937; FMA-NOINFS: # %bb.0: 938; FMA-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 939; FMA-NOINFS-NEXT: retq 940; 941; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: 942; FMA4-NOINFS: # %bb.0: 943; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 944; FMA4-NOINFS-NEXT: retq 945; 946; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: 947; AVX512-NOINFS: # %bb.0: 948; AVX512-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 949; AVX512-NOINFS-NEXT: retq 950 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 951 %m = fmul <4 x float> %y, %s 952 ret <4 x float> %m 953} 954 955define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) { 956; FMA-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: 957; FMA-INFS: # %bb.0: 958; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 959; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 960; FMA-INFS-NEXT: retq 961; 962; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: 963; FMA4-INFS: # %bb.0: 964; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 965; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 966; FMA4-INFS-NEXT: retq 967; 968; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: 969; AVX512-INFS: # %bb.0: 970; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 971; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 972; AVX512-INFS-NEXT: retq 973; 974; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: 975; FMA-NOINFS: # %bb.0: 976; FMA-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 977; FMA-NOINFS-NEXT: retq 978; 979; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: 980; FMA4-NOINFS: # %bb.0: 981; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 982; FMA4-NOINFS-NEXT: retq 983; 984; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: 985; AVX512-NOINFS: # %bb.0: 986; AVX512-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 987; AVX512-NOINFS-NEXT: retq 988 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 989 %m = fmul <4 x float> %s, %y 990 ret <4 x float> %m 991} 992 993define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) { 994; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: 995; FMA-INFS: # %bb.0: 996; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 997; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 998; FMA-INFS-NEXT: retq 999; 1000; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1001; FMA4-INFS: # %bb.0: 1002; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 1003; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1004; FMA4-INFS-NEXT: retq 1005; 1006; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1007; AVX512-INFS: # %bb.0: 1008; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 1009; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1010; AVX512-INFS-NEXT: retq 1011; 1012; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1013; FMA-NOINFS: # %bb.0: 1014; FMA-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 1015; FMA-NOINFS-NEXT: retq 1016; 1017; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1018; FMA4-NOINFS: # %bb.0: 1019; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 1020; FMA4-NOINFS-NEXT: retq 1021; 1022; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1023; AVX512-NOINFS: # %bb.0: 1024; AVX512-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 1025; AVX512-NOINFS-NEXT: retq 1026 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 1027 %m = fmul <4 x float> %y, %s 1028 ret <4 x float> %m 1029} 1030 1031; 1032; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) 1033; 1034 1035define float @test_f32_interp(float %x, float %y, float %t) { 1036; FMA-INFS-LABEL: test_f32_interp: 1037; FMA-INFS: # %bb.0: 1038; FMA-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 1039; FMA-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 1040; FMA-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 1041; FMA-INFS-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 1042; FMA-INFS-NEXT: retq 1043; 1044; FMA4-INFS-LABEL: test_f32_interp: 1045; FMA4-INFS: # %bb.0: 1046; FMA4-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 1047; FMA4-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 1048; FMA4-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 1049; FMA4-INFS-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0 1050; FMA4-INFS-NEXT: retq 1051; 1052; AVX512-INFS-LABEL: test_f32_interp: 1053; AVX512-INFS: # %bb.0: 1054; AVX512-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 1055; AVX512-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 1056; AVX512-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 1057; AVX512-INFS-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 1058; AVX512-INFS-NEXT: retq 1059; 1060; FMA-NOINFS-LABEL: test_f32_interp: 1061; FMA-NOINFS: # %bb.0: 1062; FMA-NOINFS-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 1063; FMA-NOINFS-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 1064; FMA-NOINFS-NEXT: retq 1065; 1066; FMA4-NOINFS-LABEL: test_f32_interp: 1067; FMA4-NOINFS: # %bb.0: 1068; FMA4-NOINFS-NEXT: vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1 1069; FMA4-NOINFS-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0 1070; FMA4-NOINFS-NEXT: retq 1071; 1072; AVX512-NOINFS-LABEL: test_f32_interp: 1073; AVX512-NOINFS: # %bb.0: 1074; AVX512-NOINFS-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 1075; AVX512-NOINFS-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 1076; AVX512-NOINFS-NEXT: retq 1077 %t1 = fsub float 1.0, %t 1078 %tx = fmul float %x, %t 1079 %ty = fmul float %y, %t1 1080 %r = fadd float %tx, %ty 1081 ret float %r 1082} 1083 1084define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) { 1085; FMA-INFS-LABEL: test_v4f32_interp: 1086; FMA-INFS: # %bb.0: 1087; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 1088; FMA-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 1089; FMA-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 1090; FMA-INFS-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 1091; FMA-INFS-NEXT: retq 1092; 1093; FMA4-INFS-LABEL: test_v4f32_interp: 1094; FMA4-INFS: # %bb.0: 1095; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 1096; FMA4-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 1097; FMA4-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 1098; FMA4-INFS-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0 1099; FMA4-INFS-NEXT: retq 1100; 1101; AVX512-INFS-LABEL: test_v4f32_interp: 1102; AVX512-INFS: # %bb.0: 1103; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1] 1104; AVX512-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 1105; AVX512-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 1106; AVX512-INFS-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 1107; AVX512-INFS-NEXT: retq 1108; 1109; FMA-NOINFS-LABEL: test_v4f32_interp: 1110; FMA-NOINFS: # %bb.0: 1111; FMA-NOINFS-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1 1112; FMA-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 1113; FMA-NOINFS-NEXT: retq 1114; 1115; FMA4-NOINFS-LABEL: test_v4f32_interp: 1116; FMA4-NOINFS: # %bb.0: 1117; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1 1118; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0 1119; FMA4-NOINFS-NEXT: retq 1120; 1121; AVX512-NOINFS-LABEL: test_v4f32_interp: 1122; AVX512-NOINFS: # %bb.0: 1123; AVX512-NOINFS-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1 1124; AVX512-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 1125; AVX512-NOINFS-NEXT: retq 1126 %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t 1127 %tx = fmul <4 x float> %x, %t 1128 %ty = fmul <4 x float> %y, %t1 1129 %r = fadd <4 x float> %tx, %ty 1130 ret <4 x float> %r 1131} 1132 1133define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) { 1134; FMA-INFS-LABEL: test_v8f32_interp: 1135; FMA-INFS: # %bb.0: 1136; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 1137; FMA-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 1138; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 1139; FMA-INFS-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 1140; FMA-INFS-NEXT: retq 1141; 1142; FMA4-INFS-LABEL: test_v8f32_interp: 1143; FMA4-INFS: # %bb.0: 1144; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 1145; FMA4-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 1146; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 1147; FMA4-INFS-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0 1148; FMA4-INFS-NEXT: retq 1149; 1150; AVX512-INFS-LABEL: test_v8f32_interp: 1151; AVX512-INFS: # %bb.0: 1152; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] 1153; AVX512-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 1154; AVX512-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 1155; AVX512-INFS-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 1156; AVX512-INFS-NEXT: retq 1157; 1158; FMA-NOINFS-LABEL: test_v8f32_interp: 1159; FMA-NOINFS: # %bb.0: 1160; FMA-NOINFS-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1 1161; FMA-NOINFS-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 1162; FMA-NOINFS-NEXT: retq 1163; 1164; FMA4-NOINFS-LABEL: test_v8f32_interp: 1165; FMA4-NOINFS: # %bb.0: 1166; FMA4-NOINFS-NEXT: vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1 1167; FMA4-NOINFS-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0 1168; FMA4-NOINFS-NEXT: retq 1169; 1170; AVX512-NOINFS-LABEL: test_v8f32_interp: 1171; AVX512-NOINFS: # %bb.0: 1172; AVX512-NOINFS-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1 1173; AVX512-NOINFS-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 1174; AVX512-NOINFS-NEXT: retq 1175 %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t 1176 %tx = fmul <8 x float> %x, %t 1177 %ty = fmul <8 x float> %y, %t1 1178 %r = fadd <8 x float> %tx, %ty 1179 ret <8 x float> %r 1180} 1181 1182define double @test_f64_interp(double %x, double %y, double %t) { 1183; FMA-INFS-LABEL: test_f64_interp: 1184; FMA-INFS: # %bb.0: 1185; FMA-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 1186; FMA-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 1187; FMA-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 1188; FMA-INFS-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 1189; FMA-INFS-NEXT: retq 1190; 1191; FMA4-INFS-LABEL: test_f64_interp: 1192; FMA4-INFS: # %bb.0: 1193; FMA4-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 1194; FMA4-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 1195; FMA4-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 1196; FMA4-INFS-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0 1197; FMA4-INFS-NEXT: retq 1198; 1199; AVX512-INFS-LABEL: test_f64_interp: 1200; AVX512-INFS: # %bb.0: 1201; AVX512-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 1202; AVX512-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 1203; AVX512-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 1204; AVX512-INFS-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 1205; AVX512-INFS-NEXT: retq 1206; 1207; FMA-NOINFS-LABEL: test_f64_interp: 1208; FMA-NOINFS: # %bb.0: 1209; FMA-NOINFS-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 1210; FMA-NOINFS-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 1211; FMA-NOINFS-NEXT: retq 1212; 1213; FMA4-NOINFS-LABEL: test_f64_interp: 1214; FMA4-NOINFS: # %bb.0: 1215; FMA4-NOINFS-NEXT: vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1 1216; FMA4-NOINFS-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0 1217; FMA4-NOINFS-NEXT: retq 1218; 1219; AVX512-NOINFS-LABEL: test_f64_interp: 1220; AVX512-NOINFS: # %bb.0: 1221; AVX512-NOINFS-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 1222; AVX512-NOINFS-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 1223; AVX512-NOINFS-NEXT: retq 1224 %t1 = fsub double 1.0, %t 1225 %tx = fmul double %x, %t 1226 %ty = fmul double %y, %t1 1227 %r = fadd double %tx, %ty 1228 ret double %r 1229} 1230 1231define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) { 1232; FMA-INFS-LABEL: test_v2f64_interp: 1233; FMA-INFS: # %bb.0: 1234; FMA-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00] 1235; FMA-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 1236; FMA-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 1237; FMA-INFS-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 1238; FMA-INFS-NEXT: retq 1239; 1240; FMA4-INFS-LABEL: test_v2f64_interp: 1241; FMA4-INFS: # %bb.0: 1242; FMA4-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00] 1243; FMA4-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 1244; FMA4-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 1245; FMA4-INFS-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0 1246; FMA4-INFS-NEXT: retq 1247; 1248; AVX512-INFS-LABEL: test_v2f64_interp: 1249; AVX512-INFS: # %bb.0: 1250; AVX512-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00] 1251; AVX512-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 1252; AVX512-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 1253; AVX512-INFS-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 1254; AVX512-INFS-NEXT: retq 1255; 1256; FMA-NOINFS-LABEL: test_v2f64_interp: 1257; FMA-NOINFS: # %bb.0: 1258; FMA-NOINFS-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1 1259; FMA-NOINFS-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 1260; FMA-NOINFS-NEXT: retq 1261; 1262; FMA4-NOINFS-LABEL: test_v2f64_interp: 1263; FMA4-NOINFS: # %bb.0: 1264; FMA4-NOINFS-NEXT: vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1 1265; FMA4-NOINFS-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0 1266; FMA4-NOINFS-NEXT: retq 1267; 1268; AVX512-NOINFS-LABEL: test_v2f64_interp: 1269; AVX512-NOINFS: # %bb.0: 1270; AVX512-NOINFS-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1 1271; AVX512-NOINFS-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 1272; AVX512-NOINFS-NEXT: retq 1273 %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t 1274 %tx = fmul <2 x double> %x, %t 1275 %ty = fmul <2 x double> %y, %t1 1276 %r = fadd <2 x double> %tx, %ty 1277 ret <2 x double> %r 1278} 1279 1280define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) { 1281; FMA-INFS-LABEL: test_v4f64_interp: 1282; FMA-INFS: # %bb.0: 1283; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 1284; FMA-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 1285; FMA-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1286; FMA-INFS-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 1287; FMA-INFS-NEXT: retq 1288; 1289; FMA4-INFS-LABEL: test_v4f64_interp: 1290; FMA4-INFS: # %bb.0: 1291; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 1292; FMA4-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 1293; FMA4-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1294; FMA4-INFS-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0 1295; FMA4-INFS-NEXT: retq 1296; 1297; AVX512-INFS-LABEL: test_v4f64_interp: 1298; AVX512-INFS: # %bb.0: 1299; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1,1,1,1] 1300; AVX512-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 1301; AVX512-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1302; AVX512-INFS-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 1303; AVX512-INFS-NEXT: retq 1304; 1305; FMA-NOINFS-LABEL: test_v4f64_interp: 1306; FMA-NOINFS: # %bb.0: 1307; FMA-NOINFS-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1 1308; FMA-NOINFS-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 1309; FMA-NOINFS-NEXT: retq 1310; 1311; FMA4-NOINFS-LABEL: test_v4f64_interp: 1312; FMA4-NOINFS: # %bb.0: 1313; FMA4-NOINFS-NEXT: vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1 1314; FMA4-NOINFS-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0 1315; FMA4-NOINFS-NEXT: retq 1316; 1317; AVX512-NOINFS-LABEL: test_v4f64_interp: 1318; AVX512-NOINFS: # %bb.0: 1319; AVX512-NOINFS-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1 1320; AVX512-NOINFS-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 1321; AVX512-NOINFS-NEXT: retq 1322 %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t 1323 %tx = fmul <4 x double> %x, %t 1324 %ty = fmul <4 x double> %y, %t1 1325 %r = fadd <4 x double> %tx, %ty 1326 ret <4 x double> %r 1327} 1328 1329; 1330; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z) 1331; 1332 1333define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 1334; FMA-LABEL: test_v4f32_fneg_fmadd: 1335; FMA: # %bb.0: 1336; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1337; FMA-NEXT: retq 1338; 1339; FMA4-LABEL: test_v4f32_fneg_fmadd: 1340; FMA4: # %bb.0: 1341; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 1342; FMA4-NEXT: retq 1343; 1344; AVX512-LABEL: test_v4f32_fneg_fmadd: 1345; AVX512: # %bb.0: 1346; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1347; AVX512-NEXT: retq 1348 %mul = fmul <4 x float> %a0, %a1 1349 %add = fadd <4 x float> %mul, %a2 1350 %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 1351 ret <4 x float> %neg 1352} 1353 1354define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1355; FMA-LABEL: test_v4f64_fneg_fmsub: 1356; FMA: # %bb.0: 1357; FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 1358; FMA-NEXT: retq 1359; 1360; FMA4-LABEL: test_v4f64_fneg_fmsub: 1361; FMA4: # %bb.0: 1362; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 1363; FMA4-NEXT: retq 1364; 1365; AVX512-LABEL: test_v4f64_fneg_fmsub: 1366; AVX512: # %bb.0: 1367; AVX512-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 1368; AVX512-NEXT: retq 1369 %mul = fmul <4 x double> %a0, %a1 1370 %sub = fsub <4 x double> %mul, %a2 1371 %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1372 ret <4 x double> %neg 1373} 1374 1375define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 1376; FMA-LABEL: test_v4f32_fneg_fnmadd: 1377; FMA: # %bb.0: 1378; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 1379; FMA-NEXT: retq 1380; 1381; FMA4-LABEL: test_v4f32_fneg_fnmadd: 1382; FMA4: # %bb.0: 1383; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 1384; FMA4-NEXT: retq 1385; 1386; AVX512-LABEL: test_v4f32_fneg_fnmadd: 1387; AVX512: # %bb.0: 1388; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 1389; AVX512-NEXT: retq 1390 %mul = fmul <4 x float> %a0, %a1 1391 %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul 1392 %add = fadd <4 x float> %neg0, %a2 1393 %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 1394 ret <4 x float> %neg1 1395} 1396 1397define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1398; FMA-LABEL: test_v4f64_fneg_fnmsub: 1399; FMA: # %bb.0: 1400; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 1401; FMA-NEXT: retq 1402; 1403; FMA4-LABEL: test_v4f64_fneg_fnmsub: 1404; FMA4: # %bb.0: 1405; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 1406; FMA4-NEXT: retq 1407; 1408; AVX512-LABEL: test_v4f64_fneg_fnmsub: 1409; AVX512: # %bb.0: 1410; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 1411; AVX512-NEXT: retq 1412 %mul = fmul <4 x double> %a0, %a1 1413 %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul 1414 %sub = fsub <4 x double> %neg0, %a2 1415 %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1416 ret <4 x double> %neg1 1417} 1418 1419; 1420; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 1421; 1422 1423define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 { 1424; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1425; FMA: # %bb.0: 1426; FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 1427; FMA-NEXT: retq 1428; 1429; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1430; FMA4: # %bb.0: 1431; FMA4-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 1432; FMA4-NEXT: retq 1433; 1434; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1435; AVX512: # %bb.0: 1436; AVX512-NEXT: vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0 1437; AVX512-NEXT: retq 1438 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1439 %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0> 1440 %a = fadd <4 x float> %m0, %m1 1441 ret <4 x float> %a 1442} 1443 1444; 1445; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 1446; 1447 1448define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 { 1449; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1450; FMA: # %bb.0: 1451; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0 1452; FMA-NEXT: retq 1453; 1454; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1455; FMA4: # %bb.0: 1456; FMA4-NEXT: vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0 1457; FMA4-NEXT: retq 1458; 1459; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1460; AVX512: # %bb.0: 1461; AVX512-NEXT: vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0 1462; AVX512-NEXT: retq 1463 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1464 %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0> 1465 %a = fadd <4 x float> %m1, %y 1466 ret <4 x float> %a 1467} 1468 1469; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0) 1470 1471define double @test_f64_fneg_fmul(double %x, double %y) #0 { 1472; FMA-LABEL: test_f64_fneg_fmul: 1473; FMA: # %bb.0: 1474; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1475; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 1476; FMA-NEXT: retq 1477; 1478; FMA4-LABEL: test_f64_fneg_fmul: 1479; FMA4: # %bb.0: 1480; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1481; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 1482; FMA4-NEXT: retq 1483; 1484; AVX512-LABEL: test_f64_fneg_fmul: 1485; AVX512: # %bb.0: 1486; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1487; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 1488; AVX512-NEXT: retq 1489 %m = fmul nsz double %x, %y 1490 %n = fsub double -0.0, %m 1491 ret double %n 1492} 1493 1494define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 { 1495; FMA-LABEL: test_v4f32_fneg_fmul: 1496; FMA: # %bb.0: 1497; FMA-NEXT: vxorps %xmm2, %xmm2, %xmm2 1498; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1499; FMA-NEXT: retq 1500; 1501; FMA4-LABEL: test_v4f32_fneg_fmul: 1502; FMA4: # %bb.0: 1503; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2 1504; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 1505; FMA4-NEXT: retq 1506; 1507; AVX512-LABEL: test_v4f32_fneg_fmul: 1508; AVX512: # %bb.0: 1509; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 1510; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1511; AVX512-NEXT: retq 1512 %m = fmul nsz <4 x float> %x, %y 1513 %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m 1514 ret <4 x float> %n 1515} 1516 1517define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 { 1518; FMA-LABEL: test_v4f64_fneg_fmul: 1519; FMA: # %bb.0: 1520; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1521; FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 1522; FMA-NEXT: retq 1523; 1524; FMA4-LABEL: test_v4f64_fneg_fmul: 1525; FMA4: # %bb.0: 1526; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1527; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 1528; FMA4-NEXT: retq 1529; 1530; AVX512-LABEL: test_v4f64_fneg_fmul: 1531; AVX512: # %bb.0: 1532; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1533; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 1534; AVX512-NEXT: retq 1535 %m = fmul nsz <4 x double> %x, %y 1536 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1537 ret <4 x double> %n 1538} 1539 1540define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 { 1541; FMA-LABEL: test_v4f64_fneg_fmul_no_nsz: 1542; FMA: # %bb.0: 1543; FMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1544; FMA-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 1545; FMA-NEXT: retq 1546; 1547; FMA4-LABEL: test_v4f64_fneg_fmul_no_nsz: 1548; FMA4: # %bb.0: 1549; FMA4-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1550; FMA4-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 1551; FMA4-NEXT: retq 1552; 1553; AVX512-LABEL: test_v4f64_fneg_fmul_no_nsz: 1554; AVX512: # %bb.0: 1555; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1556; AVX512-NEXT: vxorpd {{.*}}(%rip){1to4}, %ymm0, %ymm0 1557; AVX512-NEXT: retq 1558 %m = fmul <4 x double> %x, %y 1559 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1560 ret <4 x double> %n 1561} 1562 1563attributes #0 = { "unsafe-fp-math"="true" } 1564