• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_256 %s
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma,+avx512f | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_512 %s
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4 | FileCheck -check-prefix=FMA4 %s
5
6; This test checks the fusing of MUL + SUB/ADD to FMSUBADD.
7
8define <2 x double> @mul_subadd_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
9; FMA3_256-LABEL: mul_subadd_pd128:
10; FMA3_256:       # %bb.0: # %entry
11; FMA3_256-NEXT:    vfmsubadd213pd %xmm2, %xmm1, %xmm0
12; FMA3_256-NEXT:    retq
13;
14; FMA3_512-LABEL: mul_subadd_pd128:
15; FMA3_512:       # %bb.0: # %entry
16; FMA3_512-NEXT:    vfmsubadd213pd %xmm2, %xmm1, %xmm0
17; FMA3_512-NEXT:    retq
18;
19; FMA4-LABEL: mul_subadd_pd128:
20; FMA4:       # %bb.0: # %entry
21; FMA4-NEXT:    vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
22; FMA4-NEXT:    retq
23entry:
24  %AB = fmul <2 x double> %A, %B
25  %Sub = fsub <2 x double> %AB, %C
26  %Add = fadd <2 x double> %AB, %C
27  %subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3>
28  ret <2 x double> %subadd
29}
30
31define <4 x float> @mul_subadd_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) #0 {
32; FMA3-LABEL: mul_subadd_ps128:
33; FMA3:       # %bb.0: # %entry
34; FMA3-NEXT:    vfmsubadd213ps  %xmm2, %xmm1, %xmm0
35; FMA3-NEXT:    retq
36;
37; FMA4-LABEL: mul_subadd_ps128:
38; FMA4:       # %bb.0: # %entry
39; FMA4-NEXT:    vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
40; FMA4-NEXT:    retq
41entry:
42  %AB = fmul <4 x float> %A, %B
43  %Sub = fsub <4 x float> %AB, %C
44  %Add = fadd <4 x float> %AB, %C
45  %subadd = shufflevector <4 x float> %Add, <4 x float> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
46  ret <4 x float> %subadd
47}
48
49define <4 x double> @mul_subadd_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) #0 {
50; FMA3-LABEL: mul_subadd_pd256:
51; FMA3:       # %bb.0: # %entry
52; FMA3-NEXT:    vfmsubadd213pd  %ymm2, %ymm1, %ymm0
53; FMA3-NEXT:    retq
54;
55; FMA4-LABEL: mul_subadd_pd256:
56; FMA4:       # %bb.0: # %entry
57; FMA4-NEXT:    vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
58; FMA4-NEXT:    retq
59entry:
60  %AB = fmul <4 x double> %A, %B
61  %Sub = fsub <4 x double> %AB, %C
62  %Add = fadd <4 x double> %AB, %C
63  %subadd = shufflevector <4 x double> %Add, <4 x double> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
64  ret <4 x double> %subadd
65}
66
67define <8 x float> @mul_subadd_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) #0 {
68; FMA3-LABEL: mul_subadd_ps256:
69; FMA3:       # %bb.0: # %entry
70; FMA3-NEXT:    vfmsubadd213ps  %ymm2, %ymm1, %ymm0
71; FMA3-NEXT:    retq
72;
73; FMA4-LABEL: mul_subadd_ps256:
74; FMA4:       # %bb.0: # %entry
75; FMA4-NEXT:    vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
76; FMA4-NEXT:    retq
77entry:
78  %AB = fmul <8 x float> %A, %B
79  %Sub = fsub <8 x float> %AB, %C
80  %Add = fadd <8 x float> %AB, %C
81  %subadd = shufflevector <8 x float> %Add, <8 x float> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
82  ret <8 x float> %subadd
83}
84
85define <8 x double> @mul_subadd_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) #0 {
86; FMA3_256-LABEL: mul_subadd_pd512:
87; FMA3_256:       # %bb.0: # %entry
88; FMA3_256-NEXT:    vfmsubadd213pd  %ymm4, %ymm2, %ymm0
89; FMA3_256-NEXT:    vfmsubadd213pd  %ymm5, %ymm3, %ymm1
90; FMA3_256-NEXT:    retq
91;
92; FMA3_512-LABEL: mul_subadd_pd512:
93; FMA3_512:       # %bb.0: # %entry
94; FMA3_512-NEXT:    vfmsubadd213pd  %zmm2, %zmm1, %zmm0
95; FMA3_512-NEXT:    retq
96;
97; FMA4-LABEL: mul_subadd_pd512:
98; FMA4:       # %bb.0: # %entry
99; FMA4-NEXT:    vfmsubaddpd %ymm4, %ymm2, %ymm0, %ymm0
100; FMA4-NEXT:    vfmsubaddpd %ymm5, %ymm3, %ymm1, %ymm1
101; FMA4-NEXT:    retq
102entry:
103  %AB = fmul <8 x double> %A, %B
104  %Sub = fsub <8 x double> %AB, %C
105  %Add = fadd <8 x double> %AB, %C
106  %subadd = shufflevector <8 x double> %Add, <8 x double> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
107  ret <8 x double> %subadd
108}
109
110define <16 x float> @mul_subadd_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) #0 {
111; FMA3_256-LABEL: mul_subadd_ps512:
112; FMA3_256:       # %bb.0: # %entry
113; FMA3_256-NEXT:    vfmsubadd213ps  %ymm4, %ymm2, %ymm0
114; FMA3_256-NEXT:    vfmsubadd213ps  %ymm5, %ymm3, %ymm1
115; FMA3_256-NEXT:    retq
116;
117; FMA3_512-LABEL: mul_subadd_ps512:
118; FMA3_512:       # %bb.0: # %entry
119; FMA3_512-NEXT:    vfmsubadd213ps  %zmm2, %zmm1, %zmm0
120; FMA3_512-NEXT:    retq
121;
122; FMA4-LABEL: mul_subadd_ps512:
123; FMA4:       # %bb.0: # %entry
124; FMA4-NEXT:    vfmsubaddps %ymm4, %ymm2, %ymm0, %ymm0
125; FMA4-NEXT:    vfmsubaddps %ymm5, %ymm3, %ymm1, %ymm1
126; FMA4-NEXT:    retq
127entry:
128  %AB = fmul <16 x float> %A, %B
129  %Sub = fsub <16 x float> %AB, %C
130  %Add = fadd <16 x float> %AB, %C
131  %subadd = shufflevector <16 x float> %Add, <16 x float> %Sub, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
132  ret <16 x float> %subadd
133}
134
135; This should not be matched to fmsubadd because the mul is on the wrong side of the fsub.
136define <2 x double> @mul_subadd_bad_commute(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
137; FMA3-LABEL: mul_subadd_bad_commute:
138; FMA3:       # %bb.0: # %entry
139; FMA3-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
140; FMA3-NEXT:    vsubpd %xmm0, %xmm2, %xmm1
141; FMA3-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
142; FMA3-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
143; FMA3-NEXT:    retq
144;
145; FMA4-LABEL: mul_subadd_bad_commute:
146; FMA4:       # %bb.0: # %entry
147; FMA4-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
148; FMA4-NEXT:    vsubpd %xmm0, %xmm2, %xmm1
149; FMA4-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
150; FMA4-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
151; FMA4-NEXT:    retq
152entry:
153  %AB = fmul <2 x double> %A, %B
154  %Sub = fsub <2 x double> %C, %AB
155  %Add = fadd <2 x double> %AB, %C
156  %subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3>
157  ret <2 x double> %subadd
158}
159
160attributes #0 = { nounwind "unsafe-fp-math"="true" }
161