1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_256 %s 3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma,+avx512f | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_512 %s 4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4 | FileCheck -check-prefix=FMA4 %s 5 6; This test checks the fusing of MUL + SUB/ADD to FMSUBADD. 7 8define <2 x double> @mul_subadd_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 { 9; FMA3_256-LABEL: mul_subadd_pd128: 10; FMA3_256: # %bb.0: # %entry 11; FMA3_256-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 12; FMA3_256-NEXT: retq 13; 14; FMA3_512-LABEL: mul_subadd_pd128: 15; FMA3_512: # %bb.0: # %entry 16; FMA3_512-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 17; FMA3_512-NEXT: retq 18; 19; FMA4-LABEL: mul_subadd_pd128: 20; FMA4: # %bb.0: # %entry 21; FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 22; FMA4-NEXT: retq 23entry: 24 %AB = fmul <2 x double> %A, %B 25 %Sub = fsub <2 x double> %AB, %C 26 %Add = fadd <2 x double> %AB, %C 27 %subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3> 28 ret <2 x double> %subadd 29} 30 31define <4 x float> @mul_subadd_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) #0 { 32; FMA3-LABEL: mul_subadd_ps128: 33; FMA3: # %bb.0: # %entry 34; FMA3-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 35; FMA3-NEXT: retq 36; 37; FMA4-LABEL: mul_subadd_ps128: 38; FMA4: # %bb.0: # %entry 39; FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 40; FMA4-NEXT: retq 41entry: 42 %AB = fmul <4 x float> %A, %B 43 %Sub = fsub <4 x float> %AB, %C 44 %Add = fadd <4 x float> %AB, %C 45 %subadd = shufflevector <4 x float> %Add, <4 x float> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 46 ret <4 x float> %subadd 47} 48 49define <4 x double> @mul_subadd_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) #0 { 50; FMA3-LABEL: mul_subadd_pd256: 51; FMA3: # %bb.0: # %entry 52; FMA3-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 53; FMA3-NEXT: retq 54; 55; FMA4-LABEL: mul_subadd_pd256: 56; FMA4: # %bb.0: # %entry 57; FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 58; FMA4-NEXT: retq 59entry: 60 %AB = fmul <4 x double> %A, %B 61 %Sub = fsub <4 x double> %AB, %C 62 %Add = fadd <4 x double> %AB, %C 63 %subadd = shufflevector <4 x double> %Add, <4 x double> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 64 ret <4 x double> %subadd 65} 66 67define <8 x float> @mul_subadd_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) #0 { 68; FMA3-LABEL: mul_subadd_ps256: 69; FMA3: # %bb.0: # %entry 70; FMA3-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 71; FMA3-NEXT: retq 72; 73; FMA4-LABEL: mul_subadd_ps256: 74; FMA4: # %bb.0: # %entry 75; FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 76; FMA4-NEXT: retq 77entry: 78 %AB = fmul <8 x float> %A, %B 79 %Sub = fsub <8 x float> %AB, %C 80 %Add = fadd <8 x float> %AB, %C 81 %subadd = shufflevector <8 x float> %Add, <8 x float> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 82 ret <8 x float> %subadd 83} 84 85define <8 x double> @mul_subadd_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) #0 { 86; FMA3_256-LABEL: mul_subadd_pd512: 87; FMA3_256: # %bb.0: # %entry 88; FMA3_256-NEXT: vfmsubadd213pd %ymm4, %ymm2, %ymm0 89; FMA3_256-NEXT: vfmsubadd213pd %ymm5, %ymm3, %ymm1 90; FMA3_256-NEXT: retq 91; 92; FMA3_512-LABEL: mul_subadd_pd512: 93; FMA3_512: # %bb.0: # %entry 94; FMA3_512-NEXT: vfmsubadd213pd %zmm2, %zmm1, %zmm0 95; FMA3_512-NEXT: retq 96; 97; FMA4-LABEL: mul_subadd_pd512: 98; FMA4: # %bb.0: # %entry 99; FMA4-NEXT: vfmsubaddpd %ymm4, %ymm2, %ymm0, %ymm0 100; FMA4-NEXT: vfmsubaddpd %ymm5, %ymm3, %ymm1, %ymm1 101; FMA4-NEXT: retq 102entry: 103 %AB = fmul <8 x double> %A, %B 104 %Sub = fsub <8 x double> %AB, %C 105 %Add = fadd <8 x double> %AB, %C 106 %subadd = shufflevector <8 x double> %Add, <8 x double> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 107 ret <8 x double> %subadd 108} 109 110define <16 x float> @mul_subadd_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) #0 { 111; FMA3_256-LABEL: mul_subadd_ps512: 112; FMA3_256: # %bb.0: # %entry 113; FMA3_256-NEXT: vfmsubadd213ps %ymm4, %ymm2, %ymm0 114; FMA3_256-NEXT: vfmsubadd213ps %ymm5, %ymm3, %ymm1 115; FMA3_256-NEXT: retq 116; 117; FMA3_512-LABEL: mul_subadd_ps512: 118; FMA3_512: # %bb.0: # %entry 119; FMA3_512-NEXT: vfmsubadd213ps %zmm2, %zmm1, %zmm0 120; FMA3_512-NEXT: retq 121; 122; FMA4-LABEL: mul_subadd_ps512: 123; FMA4: # %bb.0: # %entry 124; FMA4-NEXT: vfmsubaddps %ymm4, %ymm2, %ymm0, %ymm0 125; FMA4-NEXT: vfmsubaddps %ymm5, %ymm3, %ymm1, %ymm1 126; FMA4-NEXT: retq 127entry: 128 %AB = fmul <16 x float> %A, %B 129 %Sub = fsub <16 x float> %AB, %C 130 %Add = fadd <16 x float> %AB, %C 131 %subadd = shufflevector <16 x float> %Add, <16 x float> %Sub, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 132 ret <16 x float> %subadd 133} 134 135; This should not be matched to fmsubadd because the mul is on the wrong side of the fsub. 136define <2 x double> @mul_subadd_bad_commute(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 { 137; FMA3-LABEL: mul_subadd_bad_commute: 138; FMA3: # %bb.0: # %entry 139; FMA3-NEXT: vmulpd %xmm1, %xmm0, %xmm0 140; FMA3-NEXT: vsubpd %xmm0, %xmm2, %xmm1 141; FMA3-NEXT: vaddpd %xmm2, %xmm0, %xmm0 142; FMA3-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 143; FMA3-NEXT: retq 144; 145; FMA4-LABEL: mul_subadd_bad_commute: 146; FMA4: # %bb.0: # %entry 147; FMA4-NEXT: vmulpd %xmm1, %xmm0, %xmm0 148; FMA4-NEXT: vsubpd %xmm0, %xmm2, %xmm1 149; FMA4-NEXT: vaddpd %xmm2, %xmm0, %xmm0 150; FMA4-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 151; FMA4-NEXT: retq 152entry: 153 %AB = fmul <2 x double> %A, %B 154 %Sub = fsub <2 x double> %C, %AB 155 %Add = fadd <2 x double> %AB, %C 156 %subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3> 157 ret <2 x double> %subadd 158} 159 160attributes #0 = { nounwind "unsafe-fp-math"="true" } 161