• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
9
10;
11; VFMADD
12;
13
14define void @test_vfmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
15; GENERIC-LABEL: test_vfmaddpd_128:
16; GENERIC:       # %bb.0:
17; GENERIC-NEXT:    #APP
18; GENERIC-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
19; GENERIC-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
20; GENERIC-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
21; GENERIC-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
22; GENERIC-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
23; GENERIC-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
24; GENERIC-NEXT:    #NO_APP
25; GENERIC-NEXT:    retq # sched: [1:1.00]
26;
27; HASWELL-LABEL: test_vfmaddpd_128:
28; HASWELL:       # %bb.0:
29; HASWELL-NEXT:    #APP
30; HASWELL-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
31; HASWELL-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
32; HASWELL-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
33; HASWELL-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
34; HASWELL-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
35; HASWELL-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
36; HASWELL-NEXT:    #NO_APP
37; HASWELL-NEXT:    retq # sched: [7:1.00]
38;
39; BROADWELL-LABEL: test_vfmaddpd_128:
40; BROADWELL:       # %bb.0:
41; BROADWELL-NEXT:    #APP
42; BROADWELL-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
43; BROADWELL-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
44; BROADWELL-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
45; BROADWELL-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
46; BROADWELL-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
47; BROADWELL-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
48; BROADWELL-NEXT:    #NO_APP
49; BROADWELL-NEXT:    retq # sched: [7:1.00]
50;
51; SKYLAKE-LABEL: test_vfmaddpd_128:
52; SKYLAKE:       # %bb.0:
53; SKYLAKE-NEXT:    #APP
54; SKYLAKE-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
55; SKYLAKE-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
56; SKYLAKE-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
57; SKYLAKE-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
58; SKYLAKE-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
59; SKYLAKE-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
60; SKYLAKE-NEXT:    #NO_APP
61; SKYLAKE-NEXT:    retq # sched: [7:1.00]
62;
63; KNL-LABEL: test_vfmaddpd_128:
64; KNL:       # %bb.0:
65; KNL-NEXT:    #APP
66; KNL-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
67; KNL-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
68; KNL-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
69; KNL-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
70; KNL-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
71; KNL-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
72; KNL-NEXT:    #NO_APP
73; KNL-NEXT:    retq # sched: [7:1.00]
74;
75; SKX-LABEL: test_vfmaddpd_128:
76; SKX:       # %bb.0:
77; SKX-NEXT:    #APP
78; SKX-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
79; SKX-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
80; SKX-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
81; SKX-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
82; SKX-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
83; SKX-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
84; SKX-NEXT:    #NO_APP
85; SKX-NEXT:    retq # sched: [7:1.00]
86;
87; ZNVER1-LABEL: test_vfmaddpd_128:
88; ZNVER1:       # %bb.0:
89; ZNVER1-NEXT:    #APP
90; ZNVER1-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
91; ZNVER1-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
92; ZNVER1-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
93; ZNVER1-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
94; ZNVER1-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
95; ZNVER1-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
96; ZNVER1-NEXT:    #NO_APP
97; ZNVER1-NEXT:    retq # sched: [1:0.50]
98  tail call void asm "vfmadd132pd $2, $1, $0 \0A\09 vfmadd213pd $2, $1, $0 \0A\09 vfmadd231pd $2, $1, $0 \0A\09 vfmadd132pd $3, $1, $0 \0A\09 vfmadd213pd $3, $1, $0 \0A\09 vfmadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
99  ret void
100}
101
102define void @test_vfmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
103; GENERIC-LABEL: test_vfmaddpd_256:
104; GENERIC:       # %bb.0:
105; GENERIC-NEXT:    #APP
106; GENERIC-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
107; GENERIC-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
108; GENERIC-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
109; GENERIC-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
110; GENERIC-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
111; GENERIC-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
112; GENERIC-NEXT:    #NO_APP
113; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
114; GENERIC-NEXT:    retq # sched: [1:1.00]
115;
116; HASWELL-LABEL: test_vfmaddpd_256:
117; HASWELL:       # %bb.0:
118; HASWELL-NEXT:    #APP
119; HASWELL-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
120; HASWELL-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
121; HASWELL-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
122; HASWELL-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
123; HASWELL-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
124; HASWELL-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
125; HASWELL-NEXT:    #NO_APP
126; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
127; HASWELL-NEXT:    retq # sched: [7:1.00]
128;
129; BROADWELL-LABEL: test_vfmaddpd_256:
130; BROADWELL:       # %bb.0:
131; BROADWELL-NEXT:    #APP
132; BROADWELL-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
133; BROADWELL-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
134; BROADWELL-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
135; BROADWELL-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
136; BROADWELL-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
137; BROADWELL-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
138; BROADWELL-NEXT:    #NO_APP
139; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
140; BROADWELL-NEXT:    retq # sched: [7:1.00]
141;
142; SKYLAKE-LABEL: test_vfmaddpd_256:
143; SKYLAKE:       # %bb.0:
144; SKYLAKE-NEXT:    #APP
145; SKYLAKE-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
146; SKYLAKE-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
147; SKYLAKE-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
148; SKYLAKE-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
149; SKYLAKE-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
150; SKYLAKE-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
151; SKYLAKE-NEXT:    #NO_APP
152; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
153; SKYLAKE-NEXT:    retq # sched: [7:1.00]
154;
155; KNL-LABEL: test_vfmaddpd_256:
156; KNL:       # %bb.0:
157; KNL-NEXT:    #APP
158; KNL-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
159; KNL-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
160; KNL-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
161; KNL-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
162; KNL-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
163; KNL-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
164; KNL-NEXT:    #NO_APP
165; KNL-NEXT:    retq # sched: [7:1.00]
166;
167; SKX-LABEL: test_vfmaddpd_256:
168; SKX:       # %bb.0:
169; SKX-NEXT:    #APP
170; SKX-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
171; SKX-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
172; SKX-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
173; SKX-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
174; SKX-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
175; SKX-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
176; SKX-NEXT:    #NO_APP
177; SKX-NEXT:    vzeroupper # sched: [4:1.00]
178; SKX-NEXT:    retq # sched: [7:1.00]
179;
180; ZNVER1-LABEL: test_vfmaddpd_256:
181; ZNVER1:       # %bb.0:
182; ZNVER1-NEXT:    #APP
183; ZNVER1-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
184; ZNVER1-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
185; ZNVER1-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
186; ZNVER1-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
187; ZNVER1-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
188; ZNVER1-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
189; ZNVER1-NEXT:    #NO_APP
190; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
191; ZNVER1-NEXT:    retq # sched: [1:0.50]
192  tail call void asm "vfmadd132pd $2, $1, $0 \0A\09 vfmadd213pd $2, $1, $0 \0A\09 vfmadd231pd $2, $1, $0 \0A\09 vfmadd132pd $3, $1, $0 \0A\09 vfmadd213pd $3, $1, $0 \0A\09 vfmadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
193  ret void
194}
195
196define void @test_vfmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
197; GENERIC-LABEL: test_vfmaddps_128:
198; GENERIC:       # %bb.0:
199; GENERIC-NEXT:    #APP
200; GENERIC-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
201; GENERIC-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
202; GENERIC-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
203; GENERIC-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
204; GENERIC-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
205; GENERIC-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
206; GENERIC-NEXT:    #NO_APP
207; GENERIC-NEXT:    retq # sched: [1:1.00]
208;
209; HASWELL-LABEL: test_vfmaddps_128:
210; HASWELL:       # %bb.0:
211; HASWELL-NEXT:    #APP
212; HASWELL-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
213; HASWELL-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
214; HASWELL-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
215; HASWELL-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
216; HASWELL-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
217; HASWELL-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
218; HASWELL-NEXT:    #NO_APP
219; HASWELL-NEXT:    retq # sched: [7:1.00]
220;
221; BROADWELL-LABEL: test_vfmaddps_128:
222; BROADWELL:       # %bb.0:
223; BROADWELL-NEXT:    #APP
224; BROADWELL-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
225; BROADWELL-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
226; BROADWELL-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
227; BROADWELL-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
228; BROADWELL-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
229; BROADWELL-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
230; BROADWELL-NEXT:    #NO_APP
231; BROADWELL-NEXT:    retq # sched: [7:1.00]
232;
233; SKYLAKE-LABEL: test_vfmaddps_128:
234; SKYLAKE:       # %bb.0:
235; SKYLAKE-NEXT:    #APP
236; SKYLAKE-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
237; SKYLAKE-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
238; SKYLAKE-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
239; SKYLAKE-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
240; SKYLAKE-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
241; SKYLAKE-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
242; SKYLAKE-NEXT:    #NO_APP
243; SKYLAKE-NEXT:    retq # sched: [7:1.00]
244;
245; KNL-LABEL: test_vfmaddps_128:
246; KNL:       # %bb.0:
247; KNL-NEXT:    #APP
248; KNL-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
249; KNL-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
250; KNL-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
251; KNL-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
252; KNL-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
253; KNL-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
254; KNL-NEXT:    #NO_APP
255; KNL-NEXT:    retq # sched: [7:1.00]
256;
257; SKX-LABEL: test_vfmaddps_128:
258; SKX:       # %bb.0:
259; SKX-NEXT:    #APP
260; SKX-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
261; SKX-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
262; SKX-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
263; SKX-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
264; SKX-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
265; SKX-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
266; SKX-NEXT:    #NO_APP
267; SKX-NEXT:    retq # sched: [7:1.00]
268;
269; ZNVER1-LABEL: test_vfmaddps_128:
270; ZNVER1:       # %bb.0:
271; ZNVER1-NEXT:    #APP
272; ZNVER1-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
273; ZNVER1-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
274; ZNVER1-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
275; ZNVER1-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
276; ZNVER1-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
277; ZNVER1-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
278; ZNVER1-NEXT:    #NO_APP
279; ZNVER1-NEXT:    retq # sched: [1:0.50]
280  tail call void asm "vfmadd132ps $2, $1, $0 \0A\09 vfmadd213ps $2, $1, $0 \0A\09 vfmadd231ps $2, $1, $0 \0A\09 vfmadd132ps $3, $1, $0 \0A\09 vfmadd213ps $3, $1, $0 \0A\09 vfmadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
281  ret void
282}
283
284define void @test_vfmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
285; GENERIC-LABEL: test_vfmaddps_256:
286; GENERIC:       # %bb.0:
287; GENERIC-NEXT:    #APP
288; GENERIC-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
289; GENERIC-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
290; GENERIC-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
291; GENERIC-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
292; GENERIC-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
293; GENERIC-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
294; GENERIC-NEXT:    #NO_APP
295; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
296; GENERIC-NEXT:    retq # sched: [1:1.00]
297;
298; HASWELL-LABEL: test_vfmaddps_256:
299; HASWELL:       # %bb.0:
300; HASWELL-NEXT:    #APP
301; HASWELL-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
302; HASWELL-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
303; HASWELL-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
304; HASWELL-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
305; HASWELL-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
306; HASWELL-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
307; HASWELL-NEXT:    #NO_APP
308; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
309; HASWELL-NEXT:    retq # sched: [7:1.00]
310;
311; BROADWELL-LABEL: test_vfmaddps_256:
312; BROADWELL:       # %bb.0:
313; BROADWELL-NEXT:    #APP
314; BROADWELL-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
315; BROADWELL-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
316; BROADWELL-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
317; BROADWELL-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
318; BROADWELL-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
319; BROADWELL-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
320; BROADWELL-NEXT:    #NO_APP
321; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
322; BROADWELL-NEXT:    retq # sched: [7:1.00]
323;
324; SKYLAKE-LABEL: test_vfmaddps_256:
325; SKYLAKE:       # %bb.0:
326; SKYLAKE-NEXT:    #APP
327; SKYLAKE-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
328; SKYLAKE-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
329; SKYLAKE-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
330; SKYLAKE-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
331; SKYLAKE-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
332; SKYLAKE-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
333; SKYLAKE-NEXT:    #NO_APP
334; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
335; SKYLAKE-NEXT:    retq # sched: [7:1.00]
336;
337; KNL-LABEL: test_vfmaddps_256:
338; KNL:       # %bb.0:
339; KNL-NEXT:    #APP
340; KNL-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
341; KNL-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
342; KNL-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
343; KNL-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
344; KNL-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
345; KNL-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
346; KNL-NEXT:    #NO_APP
347; KNL-NEXT:    retq # sched: [7:1.00]
348;
349; SKX-LABEL: test_vfmaddps_256:
350; SKX:       # %bb.0:
351; SKX-NEXT:    #APP
352; SKX-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
353; SKX-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
354; SKX-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
355; SKX-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
356; SKX-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
357; SKX-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
358; SKX-NEXT:    #NO_APP
359; SKX-NEXT:    vzeroupper # sched: [4:1.00]
360; SKX-NEXT:    retq # sched: [7:1.00]
361;
362; ZNVER1-LABEL: test_vfmaddps_256:
363; ZNVER1:       # %bb.0:
364; ZNVER1-NEXT:    #APP
365; ZNVER1-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
366; ZNVER1-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
367; ZNVER1-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
368; ZNVER1-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
369; ZNVER1-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
370; ZNVER1-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
371; ZNVER1-NEXT:    #NO_APP
372; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
373; ZNVER1-NEXT:    retq # sched: [1:0.50]
374  tail call void asm "vfmadd132ps $2, $1, $0 \0A\09 vfmadd213ps $2, $1, $0 \0A\09 vfmadd231ps $2, $1, $0 \0A\09 vfmadd132ps $3, $1, $0 \0A\09 vfmadd213ps $3, $1, $0 \0A\09 vfmadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
375  ret void
376}
377
378define void @test_vfmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
379; GENERIC-LABEL: test_vfmaddsd_128:
380; GENERIC:       # %bb.0:
381; GENERIC-NEXT:    #APP
382; GENERIC-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
383; GENERIC-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
384; GENERIC-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
385; GENERIC-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
386; GENERIC-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
387; GENERIC-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
388; GENERIC-NEXT:    #NO_APP
389; GENERIC-NEXT:    retq # sched: [1:1.00]
390;
391; HASWELL-LABEL: test_vfmaddsd_128:
392; HASWELL:       # %bb.0:
393; HASWELL-NEXT:    #APP
394; HASWELL-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
395; HASWELL-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
396; HASWELL-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
397; HASWELL-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
398; HASWELL-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
399; HASWELL-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
400; HASWELL-NEXT:    #NO_APP
401; HASWELL-NEXT:    retq # sched: [7:1.00]
402;
403; BROADWELL-LABEL: test_vfmaddsd_128:
404; BROADWELL:       # %bb.0:
405; BROADWELL-NEXT:    #APP
406; BROADWELL-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
407; BROADWELL-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
408; BROADWELL-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
409; BROADWELL-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
410; BROADWELL-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
411; BROADWELL-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
412; BROADWELL-NEXT:    #NO_APP
413; BROADWELL-NEXT:    retq # sched: [7:1.00]
414;
415; SKYLAKE-LABEL: test_vfmaddsd_128:
416; SKYLAKE:       # %bb.0:
417; SKYLAKE-NEXT:    #APP
418; SKYLAKE-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
419; SKYLAKE-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
420; SKYLAKE-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
421; SKYLAKE-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
422; SKYLAKE-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
423; SKYLAKE-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
424; SKYLAKE-NEXT:    #NO_APP
425; SKYLAKE-NEXT:    retq # sched: [7:1.00]
426;
427; KNL-LABEL: test_vfmaddsd_128:
428; KNL:       # %bb.0:
429; KNL-NEXT:    #APP
430; KNL-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
431; KNL-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
432; KNL-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
433; KNL-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
434; KNL-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
435; KNL-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
436; KNL-NEXT:    #NO_APP
437; KNL-NEXT:    retq # sched: [7:1.00]
438;
439; SKX-LABEL: test_vfmaddsd_128:
440; SKX:       # %bb.0:
441; SKX-NEXT:    #APP
442; SKX-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
443; SKX-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
444; SKX-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
445; SKX-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
446; SKX-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
447; SKX-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
448; SKX-NEXT:    #NO_APP
449; SKX-NEXT:    retq # sched: [7:1.00]
450;
451; ZNVER1-LABEL: test_vfmaddsd_128:
452; ZNVER1:       # %bb.0:
453; ZNVER1-NEXT:    #APP
454; ZNVER1-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
455; ZNVER1-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
456; ZNVER1-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
457; ZNVER1-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
458; ZNVER1-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
459; ZNVER1-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
460; ZNVER1-NEXT:    #NO_APP
461; ZNVER1-NEXT:    retq # sched: [1:0.50]
462  tail call void asm "vfmadd132sd $2, $1, $0 \0A\09 vfmadd213sd $2, $1, $0 \0A\09 vfmadd231sd $2, $1, $0 \0A\09 vfmadd132sd $3, $1, $0 \0A\09 vfmadd213sd $3, $1, $0 \0A\09 vfmadd231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
463  ret void
464}
465
466define void @test_vfmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
467; GENERIC-LABEL: test_vfmaddss_128:
468; GENERIC:       # %bb.0:
469; GENERIC-NEXT:    #APP
470; GENERIC-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
471; GENERIC-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
472; GENERIC-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
473; GENERIC-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
474; GENERIC-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
475; GENERIC-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
476; GENERIC-NEXT:    #NO_APP
477; GENERIC-NEXT:    retq # sched: [1:1.00]
478;
479; HASWELL-LABEL: test_vfmaddss_128:
480; HASWELL:       # %bb.0:
481; HASWELL-NEXT:    #APP
482; HASWELL-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
483; HASWELL-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
484; HASWELL-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
485; HASWELL-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
486; HASWELL-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
487; HASWELL-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
488; HASWELL-NEXT:    #NO_APP
489; HASWELL-NEXT:    retq # sched: [7:1.00]
490;
491; BROADWELL-LABEL: test_vfmaddss_128:
492; BROADWELL:       # %bb.0:
493; BROADWELL-NEXT:    #APP
494; BROADWELL-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
495; BROADWELL-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
496; BROADWELL-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
497; BROADWELL-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
498; BROADWELL-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
499; BROADWELL-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
500; BROADWELL-NEXT:    #NO_APP
501; BROADWELL-NEXT:    retq # sched: [7:1.00]
502;
503; SKYLAKE-LABEL: test_vfmaddss_128:
504; SKYLAKE:       # %bb.0:
505; SKYLAKE-NEXT:    #APP
506; SKYLAKE-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
507; SKYLAKE-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
508; SKYLAKE-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
509; SKYLAKE-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
510; SKYLAKE-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
511; SKYLAKE-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
512; SKYLAKE-NEXT:    #NO_APP
513; SKYLAKE-NEXT:    retq # sched: [7:1.00]
514;
515; KNL-LABEL: test_vfmaddss_128:
516; KNL:       # %bb.0:
517; KNL-NEXT:    #APP
518; KNL-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
519; KNL-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
520; KNL-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
521; KNL-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
522; KNL-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
523; KNL-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
524; KNL-NEXT:    #NO_APP
525; KNL-NEXT:    retq # sched: [7:1.00]
526;
527; SKX-LABEL: test_vfmaddss_128:
528; SKX:       # %bb.0:
529; SKX-NEXT:    #APP
530; SKX-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
531; SKX-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
532; SKX-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
533; SKX-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
534; SKX-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
535; SKX-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
536; SKX-NEXT:    #NO_APP
537; SKX-NEXT:    retq # sched: [7:1.00]
538;
539; ZNVER1-LABEL: test_vfmaddss_128:
540; ZNVER1:       # %bb.0:
541; ZNVER1-NEXT:    #APP
542; ZNVER1-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
543; ZNVER1-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
544; ZNVER1-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
545; ZNVER1-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
546; ZNVER1-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
547; ZNVER1-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
548; ZNVER1-NEXT:    #NO_APP
549; ZNVER1-NEXT:    retq # sched: [1:0.50]
550  tail call void asm "vfmadd132ss $2, $1, $0 \0A\09 vfmadd213ss $2, $1, $0 \0A\09 vfmadd231ss $2, $1, $0 \0A\09 vfmadd132ss $3, $1, $0 \0A\09 vfmadd213ss $3, $1, $0 \0A\09 vfmadd231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
551  ret void
552}
553
554;
555; VFMADDSUB
556;
557
558define void @test_vfmaddsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
559; GENERIC-LABEL: test_vfmaddsubpd_128:
560; GENERIC:       # %bb.0:
561; GENERIC-NEXT:    #APP
562; GENERIC-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
563; GENERIC-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
564; GENERIC-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
565; GENERIC-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
566; GENERIC-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
567; GENERIC-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
568; GENERIC-NEXT:    #NO_APP
569; GENERIC-NEXT:    retq # sched: [1:1.00]
570;
571; HASWELL-LABEL: test_vfmaddsubpd_128:
572; HASWELL:       # %bb.0:
573; HASWELL-NEXT:    #APP
574; HASWELL-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
575; HASWELL-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
576; HASWELL-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
577; HASWELL-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
578; HASWELL-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
579; HASWELL-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
580; HASWELL-NEXT:    #NO_APP
581; HASWELL-NEXT:    retq # sched: [7:1.00]
582;
583; BROADWELL-LABEL: test_vfmaddsubpd_128:
584; BROADWELL:       # %bb.0:
585; BROADWELL-NEXT:    #APP
586; BROADWELL-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
587; BROADWELL-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
588; BROADWELL-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
589; BROADWELL-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
590; BROADWELL-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
591; BROADWELL-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
592; BROADWELL-NEXT:    #NO_APP
593; BROADWELL-NEXT:    retq # sched: [7:1.00]
594;
595; SKYLAKE-LABEL: test_vfmaddsubpd_128:
596; SKYLAKE:       # %bb.0:
597; SKYLAKE-NEXT:    #APP
598; SKYLAKE-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
599; SKYLAKE-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
600; SKYLAKE-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
601; SKYLAKE-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
602; SKYLAKE-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
603; SKYLAKE-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
604; SKYLAKE-NEXT:    #NO_APP
605; SKYLAKE-NEXT:    retq # sched: [7:1.00]
606;
607; KNL-LABEL: test_vfmaddsubpd_128:
608; KNL:       # %bb.0:
609; KNL-NEXT:    #APP
610; KNL-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
611; KNL-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
612; KNL-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
613; KNL-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
614; KNL-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
615; KNL-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
616; KNL-NEXT:    #NO_APP
617; KNL-NEXT:    retq # sched: [7:1.00]
618;
619; SKX-LABEL: test_vfmaddsubpd_128:
620; SKX:       # %bb.0:
621; SKX-NEXT:    #APP
622; SKX-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
623; SKX-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
624; SKX-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
625; SKX-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
626; SKX-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
627; SKX-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
628; SKX-NEXT:    #NO_APP
629; SKX-NEXT:    retq # sched: [7:1.00]
630;
631; ZNVER1-LABEL: test_vfmaddsubpd_128:
632; ZNVER1:       # %bb.0:
633; ZNVER1-NEXT:    #APP
634; ZNVER1-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
635; ZNVER1-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
636; ZNVER1-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
637; ZNVER1-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [12:0.50]
638; ZNVER1-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [12:0.50]
639; ZNVER1-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [12:0.50]
640; ZNVER1-NEXT:    #NO_APP
641; ZNVER1-NEXT:    retq # sched: [1:0.50]
642  tail call void asm "vfmaddsub132pd $2, $1, $0 \0A\09 vfmaddsub213pd $2, $1, $0 \0A\09 vfmaddsub231pd $2, $1, $0 \0A\09 vfmaddsub132pd $3, $1, $0 \0A\09 vfmaddsub213pd $3, $1, $0 \0A\09 vfmaddsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
643  ret void
644}
645
646define void @test_vfmaddsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
647; GENERIC-LABEL: test_vfmaddsubpd_256:
648; GENERIC:       # %bb.0:
649; GENERIC-NEXT:    #APP
650; GENERIC-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
651; GENERIC-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
652; GENERIC-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
653; GENERIC-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
654; GENERIC-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
655; GENERIC-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
656; GENERIC-NEXT:    #NO_APP
657; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
658; GENERIC-NEXT:    retq # sched: [1:1.00]
659;
660; HASWELL-LABEL: test_vfmaddsubpd_256:
661; HASWELL:       # %bb.0:
662; HASWELL-NEXT:    #APP
663; HASWELL-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
664; HASWELL-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
665; HASWELL-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
666; HASWELL-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
667; HASWELL-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
668; HASWELL-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
669; HASWELL-NEXT:    #NO_APP
670; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
671; HASWELL-NEXT:    retq # sched: [7:1.00]
672;
673; BROADWELL-LABEL: test_vfmaddsubpd_256:
674; BROADWELL:       # %bb.0:
675; BROADWELL-NEXT:    #APP
676; BROADWELL-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
677; BROADWELL-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
678; BROADWELL-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
679; BROADWELL-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
680; BROADWELL-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
681; BROADWELL-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
682; BROADWELL-NEXT:    #NO_APP
683; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
684; BROADWELL-NEXT:    retq # sched: [7:1.00]
685;
686; SKYLAKE-LABEL: test_vfmaddsubpd_256:
687; SKYLAKE:       # %bb.0:
688; SKYLAKE-NEXT:    #APP
689; SKYLAKE-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
690; SKYLAKE-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
691; SKYLAKE-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
692; SKYLAKE-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
693; SKYLAKE-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
694; SKYLAKE-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
695; SKYLAKE-NEXT:    #NO_APP
696; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
697; SKYLAKE-NEXT:    retq # sched: [7:1.00]
698;
699; KNL-LABEL: test_vfmaddsubpd_256:
700; KNL:       # %bb.0:
701; KNL-NEXT:    #APP
702; KNL-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
703; KNL-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
704; KNL-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
705; KNL-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
706; KNL-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
707; KNL-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
708; KNL-NEXT:    #NO_APP
709; KNL-NEXT:    retq # sched: [7:1.00]
710;
711; SKX-LABEL: test_vfmaddsubpd_256:
712; SKX:       # %bb.0:
713; SKX-NEXT:    #APP
714; SKX-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
715; SKX-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
716; SKX-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
717; SKX-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
718; SKX-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
719; SKX-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
720; SKX-NEXT:    #NO_APP
721; SKX-NEXT:    vzeroupper # sched: [4:1.00]
722; SKX-NEXT:    retq # sched: [7:1.00]
723;
724; ZNVER1-LABEL: test_vfmaddsubpd_256:
725; ZNVER1:       # %bb.0:
726; ZNVER1-NEXT:    #APP
727; ZNVER1-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
728; ZNVER1-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
729; ZNVER1-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
730; ZNVER1-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
731; ZNVER1-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
732; ZNVER1-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
733; ZNVER1-NEXT:    #NO_APP
734; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
735; ZNVER1-NEXT:    retq # sched: [1:0.50]
736  tail call void asm "vfmaddsub132pd $2, $1, $0 \0A\09 vfmaddsub213pd $2, $1, $0 \0A\09 vfmaddsub231pd $2, $1, $0 \0A\09 vfmaddsub132pd $3, $1, $0 \0A\09 vfmaddsub213pd $3, $1, $0 \0A\09 vfmaddsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
737  ret void
738}
739
740define void @test_vfmaddsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
741; GENERIC-LABEL: test_vfmaddsubps_128:
742; GENERIC:       # %bb.0:
743; GENERIC-NEXT:    #APP
744; GENERIC-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
745; GENERIC-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
746; GENERIC-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
747; GENERIC-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
748; GENERIC-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
749; GENERIC-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
750; GENERIC-NEXT:    #NO_APP
751; GENERIC-NEXT:    retq # sched: [1:1.00]
752;
753; HASWELL-LABEL: test_vfmaddsubps_128:
754; HASWELL:       # %bb.0:
755; HASWELL-NEXT:    #APP
756; HASWELL-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
757; HASWELL-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
758; HASWELL-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
759; HASWELL-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
760; HASWELL-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
761; HASWELL-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
762; HASWELL-NEXT:    #NO_APP
763; HASWELL-NEXT:    retq # sched: [7:1.00]
764;
765; BROADWELL-LABEL: test_vfmaddsubps_128:
766; BROADWELL:       # %bb.0:
767; BROADWELL-NEXT:    #APP
768; BROADWELL-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
769; BROADWELL-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
770; BROADWELL-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
771; BROADWELL-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
772; BROADWELL-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
773; BROADWELL-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
774; BROADWELL-NEXT:    #NO_APP
775; BROADWELL-NEXT:    retq # sched: [7:1.00]
776;
777; SKYLAKE-LABEL: test_vfmaddsubps_128:
778; SKYLAKE:       # %bb.0:
779; SKYLAKE-NEXT:    #APP
780; SKYLAKE-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
781; SKYLAKE-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
782; SKYLAKE-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
783; SKYLAKE-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
784; SKYLAKE-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
785; SKYLAKE-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
786; SKYLAKE-NEXT:    #NO_APP
787; SKYLAKE-NEXT:    retq # sched: [7:1.00]
788;
789; KNL-LABEL: test_vfmaddsubps_128:
790; KNL:       # %bb.0:
791; KNL-NEXT:    #APP
792; KNL-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
793; KNL-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
794; KNL-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
795; KNL-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
796; KNL-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
797; KNL-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
798; KNL-NEXT:    #NO_APP
799; KNL-NEXT:    retq # sched: [7:1.00]
800;
801; SKX-LABEL: test_vfmaddsubps_128:
802; SKX:       # %bb.0:
803; SKX-NEXT:    #APP
804; SKX-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
805; SKX-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
806; SKX-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
807; SKX-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
808; SKX-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
809; SKX-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
810; SKX-NEXT:    #NO_APP
811; SKX-NEXT:    retq # sched: [7:1.00]
812;
813; ZNVER1-LABEL: test_vfmaddsubps_128:
814; ZNVER1:       # %bb.0:
815; ZNVER1-NEXT:    #APP
816; ZNVER1-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
817; ZNVER1-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
818; ZNVER1-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
819; ZNVER1-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [12:0.50]
820; ZNVER1-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [12:0.50]
821; ZNVER1-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [12:0.50]
822; ZNVER1-NEXT:    #NO_APP
823; ZNVER1-NEXT:    retq # sched: [1:0.50]
824  tail call void asm "vfmaddsub132ps $2, $1, $0 \0A\09 vfmaddsub213ps $2, $1, $0 \0A\09 vfmaddsub231ps $2, $1, $0 \0A\09 vfmaddsub132ps $3, $1, $0 \0A\09 vfmaddsub213ps $3, $1, $0 \0A\09 vfmaddsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
825  ret void
826}
827
828define void @test_vfmaddsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
829; GENERIC-LABEL: test_vfmaddsubps_256:
830; GENERIC:       # %bb.0:
831; GENERIC-NEXT:    #APP
832; GENERIC-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
833; GENERIC-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
834; GENERIC-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
835; GENERIC-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
836; GENERIC-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
837; GENERIC-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
838; GENERIC-NEXT:    #NO_APP
839; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
840; GENERIC-NEXT:    retq # sched: [1:1.00]
841;
842; HASWELL-LABEL: test_vfmaddsubps_256:
843; HASWELL:       # %bb.0:
844; HASWELL-NEXT:    #APP
845; HASWELL-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
846; HASWELL-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
847; HASWELL-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
848; HASWELL-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
849; HASWELL-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
850; HASWELL-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
851; HASWELL-NEXT:    #NO_APP
852; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
853; HASWELL-NEXT:    retq # sched: [7:1.00]
854;
855; BROADWELL-LABEL: test_vfmaddsubps_256:
856; BROADWELL:       # %bb.0:
857; BROADWELL-NEXT:    #APP
858; BROADWELL-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
859; BROADWELL-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
860; BROADWELL-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
861; BROADWELL-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
862; BROADWELL-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
863; BROADWELL-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
864; BROADWELL-NEXT:    #NO_APP
865; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
866; BROADWELL-NEXT:    retq # sched: [7:1.00]
867;
868; SKYLAKE-LABEL: test_vfmaddsubps_256:
869; SKYLAKE:       # %bb.0:
870; SKYLAKE-NEXT:    #APP
871; SKYLAKE-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
872; SKYLAKE-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
873; SKYLAKE-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
874; SKYLAKE-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
875; SKYLAKE-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
876; SKYLAKE-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
877; SKYLAKE-NEXT:    #NO_APP
878; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
879; SKYLAKE-NEXT:    retq # sched: [7:1.00]
880;
881; KNL-LABEL: test_vfmaddsubps_256:
882; KNL:       # %bb.0:
883; KNL-NEXT:    #APP
884; KNL-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
885; KNL-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
886; KNL-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
887; KNL-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
888; KNL-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
889; KNL-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
890; KNL-NEXT:    #NO_APP
891; KNL-NEXT:    retq # sched: [7:1.00]
892;
893; SKX-LABEL: test_vfmaddsubps_256:
894; SKX:       # %bb.0:
895; SKX-NEXT:    #APP
896; SKX-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
897; SKX-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
898; SKX-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
899; SKX-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
900; SKX-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
901; SKX-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
902; SKX-NEXT:    #NO_APP
903; SKX-NEXT:    vzeroupper # sched: [4:1.00]
904; SKX-NEXT:    retq # sched: [7:1.00]
905;
906; ZNVER1-LABEL: test_vfmaddsubps_256:
907; ZNVER1:       # %bb.0:
908; ZNVER1-NEXT:    #APP
909; ZNVER1-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
910; ZNVER1-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
911; ZNVER1-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
912; ZNVER1-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
913; ZNVER1-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
914; ZNVER1-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
915; ZNVER1-NEXT:    #NO_APP
916; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
917; ZNVER1-NEXT:    retq # sched: [1:0.50]
918  tail call void asm "vfmaddsub132ps $2, $1, $0 \0A\09 vfmaddsub213ps $2, $1, $0 \0A\09 vfmaddsub231ps $2, $1, $0 \0A\09 vfmaddsub132ps $3, $1, $0 \0A\09 vfmaddsub213ps $3, $1, $0 \0A\09 vfmaddsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
919  ret void
920}
921
922;
923; VFMSUBADD
924;
925
926define void @test_vfmsubaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
927; GENERIC-LABEL: test_vfmsubaddpd_128:
928; GENERIC:       # %bb.0:
929; GENERIC-NEXT:    #APP
930; GENERIC-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
931; GENERIC-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
932; GENERIC-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
933; GENERIC-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
934; GENERIC-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
935; GENERIC-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
936; GENERIC-NEXT:    #NO_APP
937; GENERIC-NEXT:    retq # sched: [1:1.00]
938;
939; HASWELL-LABEL: test_vfmsubaddpd_128:
940; HASWELL:       # %bb.0:
941; HASWELL-NEXT:    #APP
942; HASWELL-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
943; HASWELL-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
944; HASWELL-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
945; HASWELL-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
946; HASWELL-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
947; HASWELL-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
948; HASWELL-NEXT:    #NO_APP
949; HASWELL-NEXT:    retq # sched: [7:1.00]
950;
951; BROADWELL-LABEL: test_vfmsubaddpd_128:
952; BROADWELL:       # %bb.0:
953; BROADWELL-NEXT:    #APP
954; BROADWELL-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
955; BROADWELL-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
956; BROADWELL-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
957; BROADWELL-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
958; BROADWELL-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
959; BROADWELL-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
960; BROADWELL-NEXT:    #NO_APP
961; BROADWELL-NEXT:    retq # sched: [7:1.00]
962;
963; SKYLAKE-LABEL: test_vfmsubaddpd_128:
964; SKYLAKE:       # %bb.0:
965; SKYLAKE-NEXT:    #APP
966; SKYLAKE-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
967; SKYLAKE-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
968; SKYLAKE-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
969; SKYLAKE-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
970; SKYLAKE-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
971; SKYLAKE-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
972; SKYLAKE-NEXT:    #NO_APP
973; SKYLAKE-NEXT:    retq # sched: [7:1.00]
974;
975; KNL-LABEL: test_vfmsubaddpd_128:
976; KNL:       # %bb.0:
977; KNL-NEXT:    #APP
978; KNL-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
979; KNL-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
980; KNL-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
981; KNL-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
982; KNL-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
983; KNL-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
984; KNL-NEXT:    #NO_APP
985; KNL-NEXT:    retq # sched: [7:1.00]
986;
987; SKX-LABEL: test_vfmsubaddpd_128:
988; SKX:       # %bb.0:
989; SKX-NEXT:    #APP
990; SKX-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
991; SKX-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
992; SKX-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
993; SKX-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
994; SKX-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
995; SKX-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
996; SKX-NEXT:    #NO_APP
997; SKX-NEXT:    retq # sched: [7:1.00]
998;
999; ZNVER1-LABEL: test_vfmsubaddpd_128:
1000; ZNVER1:       # %bb.0:
1001; ZNVER1-NEXT:    #APP
1002; ZNVER1-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
1003; ZNVER1-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
1004; ZNVER1-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
1005; ZNVER1-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [12:0.50]
1006; ZNVER1-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [12:0.50]
1007; ZNVER1-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [12:0.50]
1008; ZNVER1-NEXT:    #NO_APP
1009; ZNVER1-NEXT:    retq # sched: [1:0.50]
1010  tail call void asm "vfmsubadd132pd $2, $1, $0 \0A\09 vfmsubadd213pd $2, $1, $0 \0A\09 vfmsubadd231pd $2, $1, $0 \0A\09 vfmsubadd132pd $3, $1, $0 \0A\09 vfmsubadd213pd $3, $1, $0 \0A\09 vfmsubadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
1011  ret void
1012}
1013
1014define void @test_vfmsubaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
1015; GENERIC-LABEL: test_vfmsubaddpd_256:
1016; GENERIC:       # %bb.0:
1017; GENERIC-NEXT:    #APP
1018; GENERIC-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
1019; GENERIC-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
1020; GENERIC-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
1021; GENERIC-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
1022; GENERIC-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
1023; GENERIC-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
1024; GENERIC-NEXT:    #NO_APP
1025; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
1026; GENERIC-NEXT:    retq # sched: [1:1.00]
1027;
1028; HASWELL-LABEL: test_vfmsubaddpd_256:
1029; HASWELL:       # %bb.0:
1030; HASWELL-NEXT:    #APP
1031; HASWELL-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
1032; HASWELL-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
1033; HASWELL-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
1034; HASWELL-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
1035; HASWELL-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
1036; HASWELL-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
1037; HASWELL-NEXT:    #NO_APP
1038; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
1039; HASWELL-NEXT:    retq # sched: [7:1.00]
1040;
1041; BROADWELL-LABEL: test_vfmsubaddpd_256:
1042; BROADWELL:       # %bb.0:
1043; BROADWELL-NEXT:    #APP
1044; BROADWELL-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
1045; BROADWELL-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
1046; BROADWELL-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
1047; BROADWELL-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
1048; BROADWELL-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
1049; BROADWELL-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
1050; BROADWELL-NEXT:    #NO_APP
1051; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
1052; BROADWELL-NEXT:    retq # sched: [7:1.00]
1053;
1054; SKYLAKE-LABEL: test_vfmsubaddpd_256:
1055; SKYLAKE:       # %bb.0:
1056; SKYLAKE-NEXT:    #APP
1057; SKYLAKE-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
1058; SKYLAKE-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
1059; SKYLAKE-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
1060; SKYLAKE-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
1061; SKYLAKE-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
1062; SKYLAKE-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
1063; SKYLAKE-NEXT:    #NO_APP
1064; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
1065; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1066;
1067; KNL-LABEL: test_vfmsubaddpd_256:
1068; KNL:       # %bb.0:
1069; KNL-NEXT:    #APP
1070; KNL-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
1071; KNL-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
1072; KNL-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
1073; KNL-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
1074; KNL-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
1075; KNL-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
1076; KNL-NEXT:    #NO_APP
1077; KNL-NEXT:    retq # sched: [7:1.00]
1078;
1079; SKX-LABEL: test_vfmsubaddpd_256:
1080; SKX:       # %bb.0:
1081; SKX-NEXT:    #APP
1082; SKX-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
1083; SKX-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
1084; SKX-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
1085; SKX-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
1086; SKX-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
1087; SKX-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
1088; SKX-NEXT:    #NO_APP
1089; SKX-NEXT:    vzeroupper # sched: [4:1.00]
1090; SKX-NEXT:    retq # sched: [7:1.00]
1091;
1092; ZNVER1-LABEL: test_vfmsubaddpd_256:
1093; ZNVER1:       # %bb.0:
1094; ZNVER1-NEXT:    #APP
1095; ZNVER1-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
1096; ZNVER1-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
1097; ZNVER1-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
1098; ZNVER1-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
1099; ZNVER1-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
1100; ZNVER1-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
1101; ZNVER1-NEXT:    #NO_APP
1102; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
1103; ZNVER1-NEXT:    retq # sched: [1:0.50]
1104  tail call void asm "vfmsubadd132pd $2, $1, $0 \0A\09 vfmsubadd213pd $2, $1, $0 \0A\09 vfmsubadd231pd $2, $1, $0 \0A\09 vfmsubadd132pd $3, $1, $0 \0A\09 vfmsubadd213pd $3, $1, $0 \0A\09 vfmsubadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
1105  ret void
1106}
1107
1108define void @test_vfmsubaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
1109; GENERIC-LABEL: test_vfmsubaddps_128:
1110; GENERIC:       # %bb.0:
1111; GENERIC-NEXT:    #APP
1112; GENERIC-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
1113; GENERIC-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
1114; GENERIC-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
1115; GENERIC-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
1116; GENERIC-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
1117; GENERIC-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
1118; GENERIC-NEXT:    #NO_APP
1119; GENERIC-NEXT:    retq # sched: [1:1.00]
1120;
1121; HASWELL-LABEL: test_vfmsubaddps_128:
1122; HASWELL:       # %bb.0:
1123; HASWELL-NEXT:    #APP
1124; HASWELL-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
1125; HASWELL-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
1126; HASWELL-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
1127; HASWELL-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
1128; HASWELL-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
1129; HASWELL-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
1130; HASWELL-NEXT:    #NO_APP
1131; HASWELL-NEXT:    retq # sched: [7:1.00]
1132;
1133; BROADWELL-LABEL: test_vfmsubaddps_128:
1134; BROADWELL:       # %bb.0:
1135; BROADWELL-NEXT:    #APP
1136; BROADWELL-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
1137; BROADWELL-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
1138; BROADWELL-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
1139; BROADWELL-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
1140; BROADWELL-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
1141; BROADWELL-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
1142; BROADWELL-NEXT:    #NO_APP
1143; BROADWELL-NEXT:    retq # sched: [7:1.00]
1144;
1145; SKYLAKE-LABEL: test_vfmsubaddps_128:
1146; SKYLAKE:       # %bb.0:
1147; SKYLAKE-NEXT:    #APP
1148; SKYLAKE-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
1149; SKYLAKE-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
1150; SKYLAKE-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
1151; SKYLAKE-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
1152; SKYLAKE-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
1153; SKYLAKE-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
1154; SKYLAKE-NEXT:    #NO_APP
1155; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1156;
1157; KNL-LABEL: test_vfmsubaddps_128:
1158; KNL:       # %bb.0:
1159; KNL-NEXT:    #APP
1160; KNL-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
1161; KNL-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
1162; KNL-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
1163; KNL-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
1164; KNL-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
1165; KNL-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
1166; KNL-NEXT:    #NO_APP
1167; KNL-NEXT:    retq # sched: [7:1.00]
1168;
1169; SKX-LABEL: test_vfmsubaddps_128:
1170; SKX:       # %bb.0:
1171; SKX-NEXT:    #APP
1172; SKX-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
1173; SKX-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
1174; SKX-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
1175; SKX-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
1176; SKX-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
1177; SKX-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
1178; SKX-NEXT:    #NO_APP
1179; SKX-NEXT:    retq # sched: [7:1.00]
1180;
1181; ZNVER1-LABEL: test_vfmsubaddps_128:
1182; ZNVER1:       # %bb.0:
1183; ZNVER1-NEXT:    #APP
1184; ZNVER1-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
1185; ZNVER1-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
1186; ZNVER1-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
1187; ZNVER1-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [12:0.50]
1188; ZNVER1-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [12:0.50]
1189; ZNVER1-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [12:0.50]
1190; ZNVER1-NEXT:    #NO_APP
1191; ZNVER1-NEXT:    retq # sched: [1:0.50]
1192  tail call void asm "vfmsubadd132ps $2, $1, $0 \0A\09 vfmsubadd213ps $2, $1, $0 \0A\09 vfmsubadd231ps $2, $1, $0 \0A\09 vfmsubadd132ps $3, $1, $0 \0A\09 vfmsubadd213ps $3, $1, $0 \0A\09 vfmsubadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
1193  ret void
1194}
1195
1196define void @test_vfmsubaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
1197; GENERIC-LABEL: test_vfmsubaddps_256:
1198; GENERIC:       # %bb.0:
1199; GENERIC-NEXT:    #APP
1200; GENERIC-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
1201; GENERIC-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
1202; GENERIC-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
1203; GENERIC-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
1204; GENERIC-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
1205; GENERIC-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
1206; GENERIC-NEXT:    #NO_APP
1207; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
1208; GENERIC-NEXT:    retq # sched: [1:1.00]
1209;
1210; HASWELL-LABEL: test_vfmsubaddps_256:
1211; HASWELL:       # %bb.0:
1212; HASWELL-NEXT:    #APP
1213; HASWELL-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
1214; HASWELL-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
1215; HASWELL-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
1216; HASWELL-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
1217; HASWELL-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
1218; HASWELL-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
1219; HASWELL-NEXT:    #NO_APP
1220; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
1221; HASWELL-NEXT:    retq # sched: [7:1.00]
1222;
1223; BROADWELL-LABEL: test_vfmsubaddps_256:
1224; BROADWELL:       # %bb.0:
1225; BROADWELL-NEXT:    #APP
1226; BROADWELL-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
1227; BROADWELL-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
1228; BROADWELL-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
1229; BROADWELL-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
1230; BROADWELL-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
1231; BROADWELL-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
1232; BROADWELL-NEXT:    #NO_APP
1233; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
1234; BROADWELL-NEXT:    retq # sched: [7:1.00]
1235;
1236; SKYLAKE-LABEL: test_vfmsubaddps_256:
1237; SKYLAKE:       # %bb.0:
1238; SKYLAKE-NEXT:    #APP
1239; SKYLAKE-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
1240; SKYLAKE-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
1241; SKYLAKE-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
1242; SKYLAKE-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
1243; SKYLAKE-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
1244; SKYLAKE-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
1245; SKYLAKE-NEXT:    #NO_APP
1246; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
1247; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1248;
1249; KNL-LABEL: test_vfmsubaddps_256:
1250; KNL:       # %bb.0:
1251; KNL-NEXT:    #APP
1252; KNL-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
1253; KNL-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
1254; KNL-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
1255; KNL-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
1256; KNL-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
1257; KNL-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
1258; KNL-NEXT:    #NO_APP
1259; KNL-NEXT:    retq # sched: [7:1.00]
1260;
1261; SKX-LABEL: test_vfmsubaddps_256:
1262; SKX:       # %bb.0:
1263; SKX-NEXT:    #APP
1264; SKX-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
1265; SKX-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
1266; SKX-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
1267; SKX-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
1268; SKX-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
1269; SKX-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
1270; SKX-NEXT:    #NO_APP
1271; SKX-NEXT:    vzeroupper # sched: [4:1.00]
1272; SKX-NEXT:    retq # sched: [7:1.00]
1273;
1274; ZNVER1-LABEL: test_vfmsubaddps_256:
1275; ZNVER1:       # %bb.0:
1276; ZNVER1-NEXT:    #APP
1277; ZNVER1-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
1278; ZNVER1-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
1279; ZNVER1-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
1280; ZNVER1-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
1281; ZNVER1-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
1282; ZNVER1-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
1283; ZNVER1-NEXT:    #NO_APP
1284; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
1285; ZNVER1-NEXT:    retq # sched: [1:0.50]
1286  tail call void asm "vfmsubadd132ps $2, $1, $0 \0A\09 vfmsubadd213ps $2, $1, $0 \0A\09 vfmsubadd231ps $2, $1, $0 \0A\09 vfmsubadd132ps $3, $1, $0 \0A\09 vfmsubadd213ps $3, $1, $0 \0A\09 vfmsubadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
1287  ret void
1288}
1289
1290;
1291; VFMSUB
1292;
1293
1294define void @test_vfmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
1295; GENERIC-LABEL: test_vfmsubpd_128:
1296; GENERIC:       # %bb.0:
1297; GENERIC-NEXT:    #APP
1298; GENERIC-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1299; GENERIC-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1300; GENERIC-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1301; GENERIC-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1302; GENERIC-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1303; GENERIC-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1304; GENERIC-NEXT:    #NO_APP
1305; GENERIC-NEXT:    retq # sched: [1:1.00]
1306;
1307; HASWELL-LABEL: test_vfmsubpd_128:
1308; HASWELL:       # %bb.0:
1309; HASWELL-NEXT:    #APP
1310; HASWELL-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1311; HASWELL-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1312; HASWELL-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1313; HASWELL-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
1314; HASWELL-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
1315; HASWELL-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
1316; HASWELL-NEXT:    #NO_APP
1317; HASWELL-NEXT:    retq # sched: [7:1.00]
1318;
1319; BROADWELL-LABEL: test_vfmsubpd_128:
1320; BROADWELL:       # %bb.0:
1321; BROADWELL-NEXT:    #APP
1322; BROADWELL-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1323; BROADWELL-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1324; BROADWELL-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1325; BROADWELL-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1326; BROADWELL-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1327; BROADWELL-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1328; BROADWELL-NEXT:    #NO_APP
1329; BROADWELL-NEXT:    retq # sched: [7:1.00]
1330;
1331; SKYLAKE-LABEL: test_vfmsubpd_128:
1332; SKYLAKE:       # %bb.0:
1333; SKYLAKE-NEXT:    #APP
1334; SKYLAKE-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
1335; SKYLAKE-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
1336; SKYLAKE-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
1337; SKYLAKE-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1338; SKYLAKE-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1339; SKYLAKE-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1340; SKYLAKE-NEXT:    #NO_APP
1341; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1342;
1343; KNL-LABEL: test_vfmsubpd_128:
1344; KNL:       # %bb.0:
1345; KNL-NEXT:    #APP
1346; KNL-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1347; KNL-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1348; KNL-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1349; KNL-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
1350; KNL-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
1351; KNL-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
1352; KNL-NEXT:    #NO_APP
1353; KNL-NEXT:    retq # sched: [7:1.00]
1354;
1355; SKX-LABEL: test_vfmsubpd_128:
1356; SKX:       # %bb.0:
1357; SKX-NEXT:    #APP
1358; SKX-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
1359; SKX-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
1360; SKX-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
1361; SKX-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1362; SKX-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1363; SKX-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1364; SKX-NEXT:    #NO_APP
1365; SKX-NEXT:    retq # sched: [7:1.00]
1366;
1367; ZNVER1-LABEL: test_vfmsubpd_128:
1368; ZNVER1:       # %bb.0:
1369; ZNVER1-NEXT:    #APP
1370; ZNVER1-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1371; ZNVER1-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1372; ZNVER1-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1373; ZNVER1-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
1374; ZNVER1-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
1375; ZNVER1-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
1376; ZNVER1-NEXT:    #NO_APP
1377; ZNVER1-NEXT:    retq # sched: [1:0.50]
1378  tail call void asm "vfmsub132pd $2, $1, $0 \0A\09 vfmsub213pd $2, $1, $0 \0A\09 vfmsub231pd $2, $1, $0 \0A\09 vfmsub132pd $3, $1, $0 \0A\09 vfmsub213pd $3, $1, $0 \0A\09 vfmsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
1379  ret void
1380}
1381
1382define void @test_vfmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
1383; GENERIC-LABEL: test_vfmsubpd_256:
1384; GENERIC:       # %bb.0:
1385; GENERIC-NEXT:    #APP
1386; GENERIC-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
1387; GENERIC-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
1388; GENERIC-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
1389; GENERIC-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
1390; GENERIC-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
1391; GENERIC-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
1392; GENERIC-NEXT:    #NO_APP
1393; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
1394; GENERIC-NEXT:    retq # sched: [1:1.00]
1395;
1396; HASWELL-LABEL: test_vfmsubpd_256:
1397; HASWELL:       # %bb.0:
1398; HASWELL-NEXT:    #APP
1399; HASWELL-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
1400; HASWELL-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
1401; HASWELL-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
1402; HASWELL-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
1403; HASWELL-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
1404; HASWELL-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
1405; HASWELL-NEXT:    #NO_APP
1406; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
1407; HASWELL-NEXT:    retq # sched: [7:1.00]
1408;
1409; BROADWELL-LABEL: test_vfmsubpd_256:
1410; BROADWELL:       # %bb.0:
1411; BROADWELL-NEXT:    #APP
1412; BROADWELL-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
1413; BROADWELL-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
1414; BROADWELL-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
1415; BROADWELL-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
1416; BROADWELL-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
1417; BROADWELL-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
1418; BROADWELL-NEXT:    #NO_APP
1419; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
1420; BROADWELL-NEXT:    retq # sched: [7:1.00]
1421;
1422; SKYLAKE-LABEL: test_vfmsubpd_256:
1423; SKYLAKE:       # %bb.0:
1424; SKYLAKE-NEXT:    #APP
1425; SKYLAKE-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
1426; SKYLAKE-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
1427; SKYLAKE-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
1428; SKYLAKE-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
1429; SKYLAKE-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
1430; SKYLAKE-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
1431; SKYLAKE-NEXT:    #NO_APP
1432; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
1433; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1434;
1435; KNL-LABEL: test_vfmsubpd_256:
1436; KNL:       # %bb.0:
1437; KNL-NEXT:    #APP
1438; KNL-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
1439; KNL-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
1440; KNL-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
1441; KNL-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
1442; KNL-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
1443; KNL-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
1444; KNL-NEXT:    #NO_APP
1445; KNL-NEXT:    retq # sched: [7:1.00]
1446;
1447; SKX-LABEL: test_vfmsubpd_256:
1448; SKX:       # %bb.0:
1449; SKX-NEXT:    #APP
1450; SKX-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
1451; SKX-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
1452; SKX-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
1453; SKX-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
1454; SKX-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
1455; SKX-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
1456; SKX-NEXT:    #NO_APP
1457; SKX-NEXT:    vzeroupper # sched: [4:1.00]
1458; SKX-NEXT:    retq # sched: [7:1.00]
1459;
1460; ZNVER1-LABEL: test_vfmsubpd_256:
1461; ZNVER1:       # %bb.0:
1462; ZNVER1-NEXT:    #APP
1463; ZNVER1-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
1464; ZNVER1-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
1465; ZNVER1-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
1466; ZNVER1-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
1467; ZNVER1-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
1468; ZNVER1-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
1469; ZNVER1-NEXT:    #NO_APP
1470; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
1471; ZNVER1-NEXT:    retq # sched: [1:0.50]
1472  tail call void asm "vfmsub132pd $2, $1, $0 \0A\09 vfmsub213pd $2, $1, $0 \0A\09 vfmsub231pd $2, $1, $0 \0A\09 vfmsub132pd $3, $1, $0 \0A\09 vfmsub213pd $3, $1, $0 \0A\09 vfmsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
1473  ret void
1474}
1475
1476define void @test_vfmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
1477; GENERIC-LABEL: test_vfmsubps_128:
1478; GENERIC:       # %bb.0:
1479; GENERIC-NEXT:    #APP
1480; GENERIC-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1481; GENERIC-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1482; GENERIC-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1483; GENERIC-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1484; GENERIC-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1485; GENERIC-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1486; GENERIC-NEXT:    #NO_APP
1487; GENERIC-NEXT:    retq # sched: [1:1.00]
1488;
1489; HASWELL-LABEL: test_vfmsubps_128:
1490; HASWELL:       # %bb.0:
1491; HASWELL-NEXT:    #APP
1492; HASWELL-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1493; HASWELL-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1494; HASWELL-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1495; HASWELL-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
1496; HASWELL-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
1497; HASWELL-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
1498; HASWELL-NEXT:    #NO_APP
1499; HASWELL-NEXT:    retq # sched: [7:1.00]
1500;
1501; BROADWELL-LABEL: test_vfmsubps_128:
1502; BROADWELL:       # %bb.0:
1503; BROADWELL-NEXT:    #APP
1504; BROADWELL-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1505; BROADWELL-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1506; BROADWELL-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1507; BROADWELL-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1508; BROADWELL-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1509; BROADWELL-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1510; BROADWELL-NEXT:    #NO_APP
1511; BROADWELL-NEXT:    retq # sched: [7:1.00]
1512;
1513; SKYLAKE-LABEL: test_vfmsubps_128:
1514; SKYLAKE:       # %bb.0:
1515; SKYLAKE-NEXT:    #APP
1516; SKYLAKE-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
1517; SKYLAKE-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
1518; SKYLAKE-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
1519; SKYLAKE-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1520; SKYLAKE-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1521; SKYLAKE-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1522; SKYLAKE-NEXT:    #NO_APP
1523; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1524;
1525; KNL-LABEL: test_vfmsubps_128:
1526; KNL:       # %bb.0:
1527; KNL-NEXT:    #APP
1528; KNL-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1529; KNL-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1530; KNL-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1531; KNL-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
1532; KNL-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
1533; KNL-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
1534; KNL-NEXT:    #NO_APP
1535; KNL-NEXT:    retq # sched: [7:1.00]
1536;
1537; SKX-LABEL: test_vfmsubps_128:
1538; SKX:       # %bb.0:
1539; SKX-NEXT:    #APP
1540; SKX-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
1541; SKX-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
1542; SKX-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
1543; SKX-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1544; SKX-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1545; SKX-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1546; SKX-NEXT:    #NO_APP
1547; SKX-NEXT:    retq # sched: [7:1.00]
1548;
1549; ZNVER1-LABEL: test_vfmsubps_128:
1550; ZNVER1:       # %bb.0:
1551; ZNVER1-NEXT:    #APP
1552; ZNVER1-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1553; ZNVER1-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1554; ZNVER1-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1555; ZNVER1-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
1556; ZNVER1-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
1557; ZNVER1-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
1558; ZNVER1-NEXT:    #NO_APP
1559; ZNVER1-NEXT:    retq # sched: [1:0.50]
1560  tail call void asm "vfmsub132ps $2, $1, $0 \0A\09 vfmsub213ps $2, $1, $0 \0A\09 vfmsub231ps $2, $1, $0 \0A\09 vfmsub132ps $3, $1, $0 \0A\09 vfmsub213ps $3, $1, $0 \0A\09 vfmsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
1561  ret void
1562}
1563
1564define void @test_vfmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
1565; GENERIC-LABEL: test_vfmsubps_256:
1566; GENERIC:       # %bb.0:
1567; GENERIC-NEXT:    #APP
1568; GENERIC-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
1569; GENERIC-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
1570; GENERIC-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
1571; GENERIC-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
1572; GENERIC-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
1573; GENERIC-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
1574; GENERIC-NEXT:    #NO_APP
1575; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
1576; GENERIC-NEXT:    retq # sched: [1:1.00]
1577;
1578; HASWELL-LABEL: test_vfmsubps_256:
1579; HASWELL:       # %bb.0:
1580; HASWELL-NEXT:    #APP
1581; HASWELL-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
1582; HASWELL-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
1583; HASWELL-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
1584; HASWELL-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
1585; HASWELL-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
1586; HASWELL-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
1587; HASWELL-NEXT:    #NO_APP
1588; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
1589; HASWELL-NEXT:    retq # sched: [7:1.00]
1590;
1591; BROADWELL-LABEL: test_vfmsubps_256:
1592; BROADWELL:       # %bb.0:
1593; BROADWELL-NEXT:    #APP
1594; BROADWELL-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
1595; BROADWELL-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
1596; BROADWELL-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
1597; BROADWELL-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
1598; BROADWELL-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
1599; BROADWELL-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
1600; BROADWELL-NEXT:    #NO_APP
1601; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
1602; BROADWELL-NEXT:    retq # sched: [7:1.00]
1603;
1604; SKYLAKE-LABEL: test_vfmsubps_256:
1605; SKYLAKE:       # %bb.0:
1606; SKYLAKE-NEXT:    #APP
1607; SKYLAKE-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
1608; SKYLAKE-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
1609; SKYLAKE-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
1610; SKYLAKE-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
1611; SKYLAKE-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
1612; SKYLAKE-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
1613; SKYLAKE-NEXT:    #NO_APP
1614; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
1615; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1616;
1617; KNL-LABEL: test_vfmsubps_256:
1618; KNL:       # %bb.0:
1619; KNL-NEXT:    #APP
1620; KNL-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
1621; KNL-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
1622; KNL-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
1623; KNL-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
1624; KNL-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
1625; KNL-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
1626; KNL-NEXT:    #NO_APP
1627; KNL-NEXT:    retq # sched: [7:1.00]
1628;
1629; SKX-LABEL: test_vfmsubps_256:
1630; SKX:       # %bb.0:
1631; SKX-NEXT:    #APP
1632; SKX-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
1633; SKX-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
1634; SKX-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
1635; SKX-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
1636; SKX-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
1637; SKX-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
1638; SKX-NEXT:    #NO_APP
1639; SKX-NEXT:    vzeroupper # sched: [4:1.00]
1640; SKX-NEXT:    retq # sched: [7:1.00]
1641;
1642; ZNVER1-LABEL: test_vfmsubps_256:
1643; ZNVER1:       # %bb.0:
1644; ZNVER1-NEXT:    #APP
1645; ZNVER1-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
1646; ZNVER1-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
1647; ZNVER1-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
1648; ZNVER1-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
1649; ZNVER1-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
1650; ZNVER1-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
1651; ZNVER1-NEXT:    #NO_APP
1652; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
1653; ZNVER1-NEXT:    retq # sched: [1:0.50]
1654  tail call void asm "vfmsub132ps $2, $1, $0 \0A\09 vfmsub213ps $2, $1, $0 \0A\09 vfmsub231ps $2, $1, $0 \0A\09 vfmsub132ps $3, $1, $0 \0A\09 vfmsub213ps $3, $1, $0 \0A\09 vfmsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
1655  ret void
1656}
1657
1658define void @test_vfmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
1659; GENERIC-LABEL: test_vfmsubsd_128:
1660; GENERIC:       # %bb.0:
1661; GENERIC-NEXT:    #APP
1662; GENERIC-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1663; GENERIC-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1664; GENERIC-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1665; GENERIC-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1666; GENERIC-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1667; GENERIC-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1668; GENERIC-NEXT:    #NO_APP
1669; GENERIC-NEXT:    retq # sched: [1:1.00]
1670;
1671; HASWELL-LABEL: test_vfmsubsd_128:
1672; HASWELL:       # %bb.0:
1673; HASWELL-NEXT:    #APP
1674; HASWELL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1675; HASWELL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1676; HASWELL-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1677; HASWELL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1678; HASWELL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1679; HASWELL-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1680; HASWELL-NEXT:    #NO_APP
1681; HASWELL-NEXT:    retq # sched: [7:1.00]
1682;
1683; BROADWELL-LABEL: test_vfmsubsd_128:
1684; BROADWELL:       # %bb.0:
1685; BROADWELL-NEXT:    #APP
1686; BROADWELL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1687; BROADWELL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1688; BROADWELL-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1689; BROADWELL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1690; BROADWELL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1691; BROADWELL-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1692; BROADWELL-NEXT:    #NO_APP
1693; BROADWELL-NEXT:    retq # sched: [7:1.00]
1694;
1695; SKYLAKE-LABEL: test_vfmsubsd_128:
1696; SKYLAKE:       # %bb.0:
1697; SKYLAKE-NEXT:    #APP
1698; SKYLAKE-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
1699; SKYLAKE-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
1700; SKYLAKE-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
1701; SKYLAKE-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
1702; SKYLAKE-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
1703; SKYLAKE-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
1704; SKYLAKE-NEXT:    #NO_APP
1705; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1706;
1707; KNL-LABEL: test_vfmsubsd_128:
1708; KNL:       # %bb.0:
1709; KNL-NEXT:    #APP
1710; KNL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1711; KNL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1712; KNL-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1713; KNL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1714; KNL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1715; KNL-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1716; KNL-NEXT:    #NO_APP
1717; KNL-NEXT:    retq # sched: [7:1.00]
1718;
1719; SKX-LABEL: test_vfmsubsd_128:
1720; SKX:       # %bb.0:
1721; SKX-NEXT:    #APP
1722; SKX-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
1723; SKX-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
1724; SKX-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
1725; SKX-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
1726; SKX-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
1727; SKX-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
1728; SKX-NEXT:    #NO_APP
1729; SKX-NEXT:    retq # sched: [7:1.00]
1730;
1731; ZNVER1-LABEL: test_vfmsubsd_128:
1732; ZNVER1:       # %bb.0:
1733; ZNVER1-NEXT:    #APP
1734; ZNVER1-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1735; ZNVER1-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1736; ZNVER1-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1737; ZNVER1-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
1738; ZNVER1-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
1739; ZNVER1-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
1740; ZNVER1-NEXT:    #NO_APP
1741; ZNVER1-NEXT:    retq # sched: [1:0.50]
1742  tail call void asm "vfmsub132sd $2, $1, $0 \0A\09 vfmsub213sd $2, $1, $0 \0A\09 vfmsub231sd $2, $1, $0 \0A\09 vfmsub132sd $3, $1, $0 \0A\09 vfmsub213sd $3, $1, $0 \0A\09 vfmsub231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
1743  ret void
1744}
1745
1746define void @test_vfmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
1747; GENERIC-LABEL: test_vfmsubss_128:
1748; GENERIC:       # %bb.0:
1749; GENERIC-NEXT:    #APP
1750; GENERIC-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1751; GENERIC-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1752; GENERIC-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1753; GENERIC-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1754; GENERIC-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1755; GENERIC-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1756; GENERIC-NEXT:    #NO_APP
1757; GENERIC-NEXT:    retq # sched: [1:1.00]
1758;
1759; HASWELL-LABEL: test_vfmsubss_128:
1760; HASWELL:       # %bb.0:
1761; HASWELL-NEXT:    #APP
1762; HASWELL-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1763; HASWELL-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1764; HASWELL-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1765; HASWELL-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1766; HASWELL-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1767; HASWELL-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1768; HASWELL-NEXT:    #NO_APP
1769; HASWELL-NEXT:    retq # sched: [7:1.00]
1770;
1771; BROADWELL-LABEL: test_vfmsubss_128:
1772; BROADWELL:       # %bb.0:
1773; BROADWELL-NEXT:    #APP
1774; BROADWELL-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1775; BROADWELL-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1776; BROADWELL-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1777; BROADWELL-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1778; BROADWELL-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1779; BROADWELL-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1780; BROADWELL-NEXT:    #NO_APP
1781; BROADWELL-NEXT:    retq # sched: [7:1.00]
1782;
1783; SKYLAKE-LABEL: test_vfmsubss_128:
1784; SKYLAKE:       # %bb.0:
1785; SKYLAKE-NEXT:    #APP
1786; SKYLAKE-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
1787; SKYLAKE-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
1788; SKYLAKE-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
1789; SKYLAKE-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
1790; SKYLAKE-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
1791; SKYLAKE-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
1792; SKYLAKE-NEXT:    #NO_APP
1793; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1794;
1795; KNL-LABEL: test_vfmsubss_128:
1796; KNL:       # %bb.0:
1797; KNL-NEXT:    #APP
1798; KNL-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1799; KNL-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1800; KNL-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1801; KNL-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
1802; KNL-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
1803; KNL-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
1804; KNL-NEXT:    #NO_APP
1805; KNL-NEXT:    retq # sched: [7:1.00]
1806;
1807; SKX-LABEL: test_vfmsubss_128:
1808; SKX:       # %bb.0:
1809; SKX-NEXT:    #APP
1810; SKX-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
1811; SKX-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
1812; SKX-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
1813; SKX-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
1814; SKX-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
1815; SKX-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
1816; SKX-NEXT:    #NO_APP
1817; SKX-NEXT:    retq # sched: [7:1.00]
1818;
1819; ZNVER1-LABEL: test_vfmsubss_128:
1820; ZNVER1:       # %bb.0:
1821; ZNVER1-NEXT:    #APP
1822; ZNVER1-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
1823; ZNVER1-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
1824; ZNVER1-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
1825; ZNVER1-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
1826; ZNVER1-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
1827; ZNVER1-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
1828; ZNVER1-NEXT:    #NO_APP
1829; ZNVER1-NEXT:    retq # sched: [1:0.50]
1830  tail call void asm "vfmsub132ss $2, $1, $0 \0A\09 vfmsub213ss $2, $1, $0 \0A\09 vfmsub231ss $2, $1, $0 \0A\09 vfmsub132ss $3, $1, $0 \0A\09 vfmsub213ss $3, $1, $0 \0A\09 vfmsub231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
1831  ret void
1832}
1833
1834;
1835; VFNMADD
1836;
1837
1838define void @test_vfnmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
1839; GENERIC-LABEL: test_vfnmaddpd_128:
1840; GENERIC:       # %bb.0:
1841; GENERIC-NEXT:    #APP
1842; GENERIC-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
1843; GENERIC-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
1844; GENERIC-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
1845; GENERIC-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
1846; GENERIC-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
1847; GENERIC-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
1848; GENERIC-NEXT:    #NO_APP
1849; GENERIC-NEXT:    retq # sched: [1:1.00]
1850;
1851; HASWELL-LABEL: test_vfnmaddpd_128:
1852; HASWELL:       # %bb.0:
1853; HASWELL-NEXT:    #APP
1854; HASWELL-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
1855; HASWELL-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
1856; HASWELL-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
1857; HASWELL-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
1858; HASWELL-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
1859; HASWELL-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
1860; HASWELL-NEXT:    #NO_APP
1861; HASWELL-NEXT:    retq # sched: [7:1.00]
1862;
1863; BROADWELL-LABEL: test_vfnmaddpd_128:
1864; BROADWELL:       # %bb.0:
1865; BROADWELL-NEXT:    #APP
1866; BROADWELL-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
1867; BROADWELL-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
1868; BROADWELL-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
1869; BROADWELL-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
1870; BROADWELL-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
1871; BROADWELL-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
1872; BROADWELL-NEXT:    #NO_APP
1873; BROADWELL-NEXT:    retq # sched: [7:1.00]
1874;
1875; SKYLAKE-LABEL: test_vfnmaddpd_128:
1876; SKYLAKE:       # %bb.0:
1877; SKYLAKE-NEXT:    #APP
1878; SKYLAKE-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
1879; SKYLAKE-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
1880; SKYLAKE-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
1881; SKYLAKE-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
1882; SKYLAKE-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
1883; SKYLAKE-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
1884; SKYLAKE-NEXT:    #NO_APP
1885; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1886;
1887; KNL-LABEL: test_vfnmaddpd_128:
1888; KNL:       # %bb.0:
1889; KNL-NEXT:    #APP
1890; KNL-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
1891; KNL-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
1892; KNL-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
1893; KNL-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
1894; KNL-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
1895; KNL-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
1896; KNL-NEXT:    #NO_APP
1897; KNL-NEXT:    retq # sched: [7:1.00]
1898;
1899; SKX-LABEL: test_vfnmaddpd_128:
1900; SKX:       # %bb.0:
1901; SKX-NEXT:    #APP
1902; SKX-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
1903; SKX-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
1904; SKX-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
1905; SKX-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
1906; SKX-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
1907; SKX-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
1908; SKX-NEXT:    #NO_APP
1909; SKX-NEXT:    retq # sched: [7:1.00]
1910;
1911; ZNVER1-LABEL: test_vfnmaddpd_128:
1912; ZNVER1:       # %bb.0:
1913; ZNVER1-NEXT:    #APP
1914; ZNVER1-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
1915; ZNVER1-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
1916; ZNVER1-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
1917; ZNVER1-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
1918; ZNVER1-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
1919; ZNVER1-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
1920; ZNVER1-NEXT:    #NO_APP
1921; ZNVER1-NEXT:    retq # sched: [1:0.50]
1922  tail call void asm "vfnmadd132pd $2, $1, $0 \0A\09 vfnmadd213pd $2, $1, $0 \0A\09 vfnmadd231pd $2, $1, $0 \0A\09 vfnmadd132pd $3, $1, $0 \0A\09 vfnmadd213pd $3, $1, $0 \0A\09 vfnmadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
1923  ret void
1924}
1925
1926define void @test_vfnmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
1927; GENERIC-LABEL: test_vfnmaddpd_256:
1928; GENERIC:       # %bb.0:
1929; GENERIC-NEXT:    #APP
1930; GENERIC-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
1931; GENERIC-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
1932; GENERIC-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
1933; GENERIC-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
1934; GENERIC-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
1935; GENERIC-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
1936; GENERIC-NEXT:    #NO_APP
1937; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
1938; GENERIC-NEXT:    retq # sched: [1:1.00]
1939;
1940; HASWELL-LABEL: test_vfnmaddpd_256:
1941; HASWELL:       # %bb.0:
1942; HASWELL-NEXT:    #APP
1943; HASWELL-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
1944; HASWELL-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
1945; HASWELL-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
1946; HASWELL-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
1947; HASWELL-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
1948; HASWELL-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
1949; HASWELL-NEXT:    #NO_APP
1950; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
1951; HASWELL-NEXT:    retq # sched: [7:1.00]
1952;
1953; BROADWELL-LABEL: test_vfnmaddpd_256:
1954; BROADWELL:       # %bb.0:
1955; BROADWELL-NEXT:    #APP
1956; BROADWELL-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
1957; BROADWELL-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
1958; BROADWELL-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
1959; BROADWELL-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
1960; BROADWELL-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
1961; BROADWELL-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
1962; BROADWELL-NEXT:    #NO_APP
1963; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
1964; BROADWELL-NEXT:    retq # sched: [7:1.00]
1965;
1966; SKYLAKE-LABEL: test_vfnmaddpd_256:
1967; SKYLAKE:       # %bb.0:
1968; SKYLAKE-NEXT:    #APP
1969; SKYLAKE-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
1970; SKYLAKE-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
1971; SKYLAKE-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
1972; SKYLAKE-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
1973; SKYLAKE-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
1974; SKYLAKE-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
1975; SKYLAKE-NEXT:    #NO_APP
1976; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
1977; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1978;
1979; KNL-LABEL: test_vfnmaddpd_256:
1980; KNL:       # %bb.0:
1981; KNL-NEXT:    #APP
1982; KNL-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
1983; KNL-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
1984; KNL-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
1985; KNL-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
1986; KNL-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
1987; KNL-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
1988; KNL-NEXT:    #NO_APP
1989; KNL-NEXT:    retq # sched: [7:1.00]
1990;
1991; SKX-LABEL: test_vfnmaddpd_256:
1992; SKX:       # %bb.0:
1993; SKX-NEXT:    #APP
1994; SKX-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
1995; SKX-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
1996; SKX-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
1997; SKX-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
1998; SKX-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
1999; SKX-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
2000; SKX-NEXT:    #NO_APP
2001; SKX-NEXT:    vzeroupper # sched: [4:1.00]
2002; SKX-NEXT:    retq # sched: [7:1.00]
2003;
2004; ZNVER1-LABEL: test_vfnmaddpd_256:
2005; ZNVER1:       # %bb.0:
2006; ZNVER1-NEXT:    #APP
2007; ZNVER1-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
2008; ZNVER1-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
2009; ZNVER1-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
2010; ZNVER1-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
2011; ZNVER1-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
2012; ZNVER1-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
2013; ZNVER1-NEXT:    #NO_APP
2014; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
2015; ZNVER1-NEXT:    retq # sched: [1:0.50]
2016  tail call void asm "vfnmadd132pd $2, $1, $0 \0A\09 vfnmadd213pd $2, $1, $0 \0A\09 vfnmadd231pd $2, $1, $0 \0A\09 vfnmadd132pd $3, $1, $0 \0A\09 vfnmadd213pd $3, $1, $0 \0A\09 vfnmadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
2017  ret void
2018}
2019
2020define void @test_vfnmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
2021; GENERIC-LABEL: test_vfnmaddps_128:
2022; GENERIC:       # %bb.0:
2023; GENERIC-NEXT:    #APP
2024; GENERIC-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
2025; GENERIC-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
2026; GENERIC-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
2027; GENERIC-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
2028; GENERIC-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
2029; GENERIC-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
2030; GENERIC-NEXT:    #NO_APP
2031; GENERIC-NEXT:    retq # sched: [1:1.00]
2032;
2033; HASWELL-LABEL: test_vfnmaddps_128:
2034; HASWELL:       # %bb.0:
2035; HASWELL-NEXT:    #APP
2036; HASWELL-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
2037; HASWELL-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
2038; HASWELL-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
2039; HASWELL-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
2040; HASWELL-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
2041; HASWELL-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
2042; HASWELL-NEXT:    #NO_APP
2043; HASWELL-NEXT:    retq # sched: [7:1.00]
2044;
2045; BROADWELL-LABEL: test_vfnmaddps_128:
2046; BROADWELL:       # %bb.0:
2047; BROADWELL-NEXT:    #APP
2048; BROADWELL-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
2049; BROADWELL-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
2050; BROADWELL-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
2051; BROADWELL-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
2052; BROADWELL-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
2053; BROADWELL-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
2054; BROADWELL-NEXT:    #NO_APP
2055; BROADWELL-NEXT:    retq # sched: [7:1.00]
2056;
2057; SKYLAKE-LABEL: test_vfnmaddps_128:
2058; SKYLAKE:       # %bb.0:
2059; SKYLAKE-NEXT:    #APP
2060; SKYLAKE-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
2061; SKYLAKE-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
2062; SKYLAKE-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
2063; SKYLAKE-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
2064; SKYLAKE-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
2065; SKYLAKE-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
2066; SKYLAKE-NEXT:    #NO_APP
2067; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2068;
2069; KNL-LABEL: test_vfnmaddps_128:
2070; KNL:       # %bb.0:
2071; KNL-NEXT:    #APP
2072; KNL-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
2073; KNL-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
2074; KNL-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
2075; KNL-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
2076; KNL-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
2077; KNL-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
2078; KNL-NEXT:    #NO_APP
2079; KNL-NEXT:    retq # sched: [7:1.00]
2080;
2081; SKX-LABEL: test_vfnmaddps_128:
2082; SKX:       # %bb.0:
2083; SKX-NEXT:    #APP
2084; SKX-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
2085; SKX-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
2086; SKX-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
2087; SKX-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
2088; SKX-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
2089; SKX-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
2090; SKX-NEXT:    #NO_APP
2091; SKX-NEXT:    retq # sched: [7:1.00]
2092;
2093; ZNVER1-LABEL: test_vfnmaddps_128:
2094; ZNVER1:       # %bb.0:
2095; ZNVER1-NEXT:    #APP
2096; ZNVER1-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
2097; ZNVER1-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
2098; ZNVER1-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
2099; ZNVER1-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
2100; ZNVER1-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
2101; ZNVER1-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
2102; ZNVER1-NEXT:    #NO_APP
2103; ZNVER1-NEXT:    retq # sched: [1:0.50]
2104  tail call void asm "vfnmadd132ps $2, $1, $0 \0A\09 vfnmadd213ps $2, $1, $0 \0A\09 vfnmadd231ps $2, $1, $0 \0A\09 vfnmadd132ps $3, $1, $0 \0A\09 vfnmadd213ps $3, $1, $0 \0A\09 vfnmadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
2105  ret void
2106}
2107
2108define void @test_vfnmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
2109; GENERIC-LABEL: test_vfnmaddps_256:
2110; GENERIC:       # %bb.0:
2111; GENERIC-NEXT:    #APP
2112; GENERIC-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
2113; GENERIC-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
2114; GENERIC-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
2115; GENERIC-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
2116; GENERIC-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
2117; GENERIC-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
2118; GENERIC-NEXT:    #NO_APP
2119; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
2120; GENERIC-NEXT:    retq # sched: [1:1.00]
2121;
2122; HASWELL-LABEL: test_vfnmaddps_256:
2123; HASWELL:       # %bb.0:
2124; HASWELL-NEXT:    #APP
2125; HASWELL-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
2126; HASWELL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
2127; HASWELL-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
2128; HASWELL-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
2129; HASWELL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
2130; HASWELL-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
2131; HASWELL-NEXT:    #NO_APP
2132; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
2133; HASWELL-NEXT:    retq # sched: [7:1.00]
2134;
2135; BROADWELL-LABEL: test_vfnmaddps_256:
2136; BROADWELL:       # %bb.0:
2137; BROADWELL-NEXT:    #APP
2138; BROADWELL-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
2139; BROADWELL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
2140; BROADWELL-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
2141; BROADWELL-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
2142; BROADWELL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
2143; BROADWELL-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
2144; BROADWELL-NEXT:    #NO_APP
2145; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
2146; BROADWELL-NEXT:    retq # sched: [7:1.00]
2147;
2148; SKYLAKE-LABEL: test_vfnmaddps_256:
2149; SKYLAKE:       # %bb.0:
2150; SKYLAKE-NEXT:    #APP
2151; SKYLAKE-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
2152; SKYLAKE-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
2153; SKYLAKE-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
2154; SKYLAKE-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
2155; SKYLAKE-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
2156; SKYLAKE-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
2157; SKYLAKE-NEXT:    #NO_APP
2158; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
2159; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2160;
2161; KNL-LABEL: test_vfnmaddps_256:
2162; KNL:       # %bb.0:
2163; KNL-NEXT:    #APP
2164; KNL-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
2165; KNL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
2166; KNL-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
2167; KNL-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
2168; KNL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
2169; KNL-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
2170; KNL-NEXT:    #NO_APP
2171; KNL-NEXT:    retq # sched: [7:1.00]
2172;
2173; SKX-LABEL: test_vfnmaddps_256:
2174; SKX:       # %bb.0:
2175; SKX-NEXT:    #APP
2176; SKX-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
2177; SKX-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
2178; SKX-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
2179; SKX-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
2180; SKX-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
2181; SKX-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
2182; SKX-NEXT:    #NO_APP
2183; SKX-NEXT:    vzeroupper # sched: [4:1.00]
2184; SKX-NEXT:    retq # sched: [7:1.00]
2185;
2186; ZNVER1-LABEL: test_vfnmaddps_256:
2187; ZNVER1:       # %bb.0:
2188; ZNVER1-NEXT:    #APP
2189; ZNVER1-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
2190; ZNVER1-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
2191; ZNVER1-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
2192; ZNVER1-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
2193; ZNVER1-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
2194; ZNVER1-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
2195; ZNVER1-NEXT:    #NO_APP
2196; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
2197; ZNVER1-NEXT:    retq # sched: [1:0.50]
2198  tail call void asm "vfnmadd132ps $2, $1, $0 \0A\09 vfnmadd213ps $2, $1, $0 \0A\09 vfnmadd231ps $2, $1, $0 \0A\09 vfnmadd132ps $3, $1, $0 \0A\09 vfnmadd213ps $3, $1, $0 \0A\09 vfnmadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
2199  ret void
2200}
2201
2202define void @test_vfnmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
2203; GENERIC-LABEL: test_vfnmaddsd_128:
2204; GENERIC:       # %bb.0:
2205; GENERIC-NEXT:    #APP
2206; GENERIC-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
2207; GENERIC-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
2208; GENERIC-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
2209; GENERIC-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
2210; GENERIC-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
2211; GENERIC-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
2212; GENERIC-NEXT:    #NO_APP
2213; GENERIC-NEXT:    retq # sched: [1:1.00]
2214;
2215; HASWELL-LABEL: test_vfnmaddsd_128:
2216; HASWELL:       # %bb.0:
2217; HASWELL-NEXT:    #APP
2218; HASWELL-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
2219; HASWELL-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
2220; HASWELL-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
2221; HASWELL-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
2222; HASWELL-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
2223; HASWELL-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
2224; HASWELL-NEXT:    #NO_APP
2225; HASWELL-NEXT:    retq # sched: [7:1.00]
2226;
2227; BROADWELL-LABEL: test_vfnmaddsd_128:
2228; BROADWELL:       # %bb.0:
2229; BROADWELL-NEXT:    #APP
2230; BROADWELL-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
2231; BROADWELL-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
2232; BROADWELL-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
2233; BROADWELL-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
2234; BROADWELL-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
2235; BROADWELL-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
2236; BROADWELL-NEXT:    #NO_APP
2237; BROADWELL-NEXT:    retq # sched: [7:1.00]
2238;
2239; SKYLAKE-LABEL: test_vfnmaddsd_128:
2240; SKYLAKE:       # %bb.0:
2241; SKYLAKE-NEXT:    #APP
2242; SKYLAKE-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
2243; SKYLAKE-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
2244; SKYLAKE-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
2245; SKYLAKE-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
2246; SKYLAKE-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
2247; SKYLAKE-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
2248; SKYLAKE-NEXT:    #NO_APP
2249; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2250;
2251; KNL-LABEL: test_vfnmaddsd_128:
2252; KNL:       # %bb.0:
2253; KNL-NEXT:    #APP
2254; KNL-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
2255; KNL-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
2256; KNL-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
2257; KNL-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
2258; KNL-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
2259; KNL-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
2260; KNL-NEXT:    #NO_APP
2261; KNL-NEXT:    retq # sched: [7:1.00]
2262;
2263; SKX-LABEL: test_vfnmaddsd_128:
2264; SKX:       # %bb.0:
2265; SKX-NEXT:    #APP
2266; SKX-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
2267; SKX-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
2268; SKX-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
2269; SKX-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
2270; SKX-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
2271; SKX-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
2272; SKX-NEXT:    #NO_APP
2273; SKX-NEXT:    retq # sched: [7:1.00]
2274;
2275; ZNVER1-LABEL: test_vfnmaddsd_128:
2276; ZNVER1:       # %bb.0:
2277; ZNVER1-NEXT:    #APP
2278; ZNVER1-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
2279; ZNVER1-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
2280; ZNVER1-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
2281; ZNVER1-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
2282; ZNVER1-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
2283; ZNVER1-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
2284; ZNVER1-NEXT:    #NO_APP
2285; ZNVER1-NEXT:    retq # sched: [1:0.50]
2286  tail call void asm "vfnmadd132sd $2, $1, $0 \0A\09 vfnmadd213sd $2, $1, $0 \0A\09 vfnmadd231sd $2, $1, $0 \0A\09 vfnmadd132sd $3, $1, $0 \0A\09 vfnmadd213sd $3, $1, $0 \0A\09 vfnmadd231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
2287  ret void
2288}
2289
2290define void @test_vfnmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
2291; GENERIC-LABEL: test_vfnmaddss_128:
2292; GENERIC:       # %bb.0:
2293; GENERIC-NEXT:    #APP
2294; GENERIC-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
2295; GENERIC-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
2296; GENERIC-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
2297; GENERIC-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
2298; GENERIC-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
2299; GENERIC-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
2300; GENERIC-NEXT:    #NO_APP
2301; GENERIC-NEXT:    retq # sched: [1:1.00]
2302;
2303; HASWELL-LABEL: test_vfnmaddss_128:
2304; HASWELL:       # %bb.0:
2305; HASWELL-NEXT:    #APP
2306; HASWELL-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
2307; HASWELL-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
2308; HASWELL-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
2309; HASWELL-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
2310; HASWELL-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
2311; HASWELL-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
2312; HASWELL-NEXT:    #NO_APP
2313; HASWELL-NEXT:    retq # sched: [7:1.00]
2314;
2315; BROADWELL-LABEL: test_vfnmaddss_128:
2316; BROADWELL:       # %bb.0:
2317; BROADWELL-NEXT:    #APP
2318; BROADWELL-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
2319; BROADWELL-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
2320; BROADWELL-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
2321; BROADWELL-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
2322; BROADWELL-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
2323; BROADWELL-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
2324; BROADWELL-NEXT:    #NO_APP
2325; BROADWELL-NEXT:    retq # sched: [7:1.00]
2326;
2327; SKYLAKE-LABEL: test_vfnmaddss_128:
2328; SKYLAKE:       # %bb.0:
2329; SKYLAKE-NEXT:    #APP
2330; SKYLAKE-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
2331; SKYLAKE-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
2332; SKYLAKE-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
2333; SKYLAKE-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
2334; SKYLAKE-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
2335; SKYLAKE-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
2336; SKYLAKE-NEXT:    #NO_APP
2337; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2338;
2339; KNL-LABEL: test_vfnmaddss_128:
2340; KNL:       # %bb.0:
2341; KNL-NEXT:    #APP
2342; KNL-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
2343; KNL-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
2344; KNL-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
2345; KNL-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
2346; KNL-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
2347; KNL-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
2348; KNL-NEXT:    #NO_APP
2349; KNL-NEXT:    retq # sched: [7:1.00]
2350;
2351; SKX-LABEL: test_vfnmaddss_128:
2352; SKX:       # %bb.0:
2353; SKX-NEXT:    #APP
2354; SKX-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
2355; SKX-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
2356; SKX-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
2357; SKX-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
2358; SKX-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
2359; SKX-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
2360; SKX-NEXT:    #NO_APP
2361; SKX-NEXT:    retq # sched: [7:1.00]
2362;
2363; ZNVER1-LABEL: test_vfnmaddss_128:
2364; ZNVER1:       # %bb.0:
2365; ZNVER1-NEXT:    #APP
2366; ZNVER1-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
2367; ZNVER1-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
2368; ZNVER1-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
2369; ZNVER1-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
2370; ZNVER1-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
2371; ZNVER1-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
2372; ZNVER1-NEXT:    #NO_APP
2373; ZNVER1-NEXT:    retq # sched: [1:0.50]
2374  tail call void asm "vfnmadd132ss $2, $1, $0 \0A\09 vfnmadd213ss $2, $1, $0 \0A\09 vfnmadd231ss $2, $1, $0 \0A\09 vfnmadd132ss $3, $1, $0 \0A\09 vfnmadd213ss $3, $1, $0 \0A\09 vfnmadd231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
2375  ret void
2376}
2377
2378;
2379; VFNMSUB
2380;
2381
2382define void @test_vfnmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
2383; GENERIC-LABEL: test_vfnmsubpd_128:
2384; GENERIC:       # %bb.0:
2385; GENERIC-NEXT:    #APP
2386; GENERIC-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2387; GENERIC-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2388; GENERIC-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2389; GENERIC-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2390; GENERIC-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2391; GENERIC-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2392; GENERIC-NEXT:    #NO_APP
2393; GENERIC-NEXT:    retq # sched: [1:1.00]
2394;
2395; HASWELL-LABEL: test_vfnmsubpd_128:
2396; HASWELL:       # %bb.0:
2397; HASWELL-NEXT:    #APP
2398; HASWELL-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2399; HASWELL-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2400; HASWELL-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2401; HASWELL-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
2402; HASWELL-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
2403; HASWELL-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
2404; HASWELL-NEXT:    #NO_APP
2405; HASWELL-NEXT:    retq # sched: [7:1.00]
2406;
2407; BROADWELL-LABEL: test_vfnmsubpd_128:
2408; BROADWELL:       # %bb.0:
2409; BROADWELL-NEXT:    #APP
2410; BROADWELL-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2411; BROADWELL-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2412; BROADWELL-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2413; BROADWELL-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2414; BROADWELL-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2415; BROADWELL-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2416; BROADWELL-NEXT:    #NO_APP
2417; BROADWELL-NEXT:    retq # sched: [7:1.00]
2418;
2419; SKYLAKE-LABEL: test_vfnmsubpd_128:
2420; SKYLAKE:       # %bb.0:
2421; SKYLAKE-NEXT:    #APP
2422; SKYLAKE-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
2423; SKYLAKE-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
2424; SKYLAKE-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
2425; SKYLAKE-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2426; SKYLAKE-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2427; SKYLAKE-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2428; SKYLAKE-NEXT:    #NO_APP
2429; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2430;
2431; KNL-LABEL: test_vfnmsubpd_128:
2432; KNL:       # %bb.0:
2433; KNL-NEXT:    #APP
2434; KNL-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2435; KNL-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2436; KNL-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2437; KNL-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
2438; KNL-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
2439; KNL-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
2440; KNL-NEXT:    #NO_APP
2441; KNL-NEXT:    retq # sched: [7:1.00]
2442;
2443; SKX-LABEL: test_vfnmsubpd_128:
2444; SKX:       # %bb.0:
2445; SKX-NEXT:    #APP
2446; SKX-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
2447; SKX-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
2448; SKX-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
2449; SKX-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2450; SKX-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2451; SKX-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2452; SKX-NEXT:    #NO_APP
2453; SKX-NEXT:    retq # sched: [7:1.00]
2454;
2455; ZNVER1-LABEL: test_vfnmsubpd_128:
2456; ZNVER1:       # %bb.0:
2457; ZNVER1-NEXT:    #APP
2458; ZNVER1-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2459; ZNVER1-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2460; ZNVER1-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2461; ZNVER1-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
2462; ZNVER1-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
2463; ZNVER1-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
2464; ZNVER1-NEXT:    #NO_APP
2465; ZNVER1-NEXT:    retq # sched: [1:0.50]
2466  tail call void asm "vfnmsub132pd $2, $1, $0 \0A\09 vfnmsub213pd $2, $1, $0 \0A\09 vfnmsub231pd $2, $1, $0 \0A\09 vfnmsub132pd $3, $1, $0 \0A\09 vfnmsub213pd $3, $1, $0 \0A\09 vfnmsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
2467  ret void
2468}
2469
2470define void @test_vfnmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
2471; GENERIC-LABEL: test_vfnmsubpd_256:
2472; GENERIC:       # %bb.0:
2473; GENERIC-NEXT:    #APP
2474; GENERIC-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
2475; GENERIC-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
2476; GENERIC-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
2477; GENERIC-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
2478; GENERIC-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
2479; GENERIC-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
2480; GENERIC-NEXT:    #NO_APP
2481; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
2482; GENERIC-NEXT:    retq # sched: [1:1.00]
2483;
2484; HASWELL-LABEL: test_vfnmsubpd_256:
2485; HASWELL:       # %bb.0:
2486; HASWELL-NEXT:    #APP
2487; HASWELL-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
2488; HASWELL-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
2489; HASWELL-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
2490; HASWELL-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
2491; HASWELL-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
2492; HASWELL-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
2493; HASWELL-NEXT:    #NO_APP
2494; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
2495; HASWELL-NEXT:    retq # sched: [7:1.00]
2496;
2497; BROADWELL-LABEL: test_vfnmsubpd_256:
2498; BROADWELL:       # %bb.0:
2499; BROADWELL-NEXT:    #APP
2500; BROADWELL-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
2501; BROADWELL-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
2502; BROADWELL-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
2503; BROADWELL-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
2504; BROADWELL-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
2505; BROADWELL-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
2506; BROADWELL-NEXT:    #NO_APP
2507; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
2508; BROADWELL-NEXT:    retq # sched: [7:1.00]
2509;
2510; SKYLAKE-LABEL: test_vfnmsubpd_256:
2511; SKYLAKE:       # %bb.0:
2512; SKYLAKE-NEXT:    #APP
2513; SKYLAKE-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
2514; SKYLAKE-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
2515; SKYLAKE-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
2516; SKYLAKE-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
2517; SKYLAKE-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
2518; SKYLAKE-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
2519; SKYLAKE-NEXT:    #NO_APP
2520; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
2521; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2522;
2523; KNL-LABEL: test_vfnmsubpd_256:
2524; KNL:       # %bb.0:
2525; KNL-NEXT:    #APP
2526; KNL-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
2527; KNL-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
2528; KNL-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
2529; KNL-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
2530; KNL-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
2531; KNL-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
2532; KNL-NEXT:    #NO_APP
2533; KNL-NEXT:    retq # sched: [7:1.00]
2534;
2535; SKX-LABEL: test_vfnmsubpd_256:
2536; SKX:       # %bb.0:
2537; SKX-NEXT:    #APP
2538; SKX-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
2539; SKX-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
2540; SKX-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
2541; SKX-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
2542; SKX-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
2543; SKX-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
2544; SKX-NEXT:    #NO_APP
2545; SKX-NEXT:    vzeroupper # sched: [4:1.00]
2546; SKX-NEXT:    retq # sched: [7:1.00]
2547;
2548; ZNVER1-LABEL: test_vfnmsubpd_256:
2549; ZNVER1:       # %bb.0:
2550; ZNVER1-NEXT:    #APP
2551; ZNVER1-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
2552; ZNVER1-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
2553; ZNVER1-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
2554; ZNVER1-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
2555; ZNVER1-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
2556; ZNVER1-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
2557; ZNVER1-NEXT:    #NO_APP
2558; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
2559; ZNVER1-NEXT:    retq # sched: [1:0.50]
2560  tail call void asm "vfnmsub132pd $2, $1, $0 \0A\09 vfnmsub213pd $2, $1, $0 \0A\09 vfnmsub231pd $2, $1, $0 \0A\09 vfnmsub132pd $3, $1, $0 \0A\09 vfnmsub213pd $3, $1, $0 \0A\09 vfnmsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
2561  ret void
2562}
2563
2564define void @test_vfnmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
2565; GENERIC-LABEL: test_vfnmsubps_128:
2566; GENERIC:       # %bb.0:
2567; GENERIC-NEXT:    #APP
2568; GENERIC-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2569; GENERIC-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2570; GENERIC-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2571; GENERIC-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2572; GENERIC-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2573; GENERIC-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2574; GENERIC-NEXT:    #NO_APP
2575; GENERIC-NEXT:    retq # sched: [1:1.00]
2576;
2577; HASWELL-LABEL: test_vfnmsubps_128:
2578; HASWELL:       # %bb.0:
2579; HASWELL-NEXT:    #APP
2580; HASWELL-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2581; HASWELL-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2582; HASWELL-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2583; HASWELL-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
2584; HASWELL-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
2585; HASWELL-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
2586; HASWELL-NEXT:    #NO_APP
2587; HASWELL-NEXT:    retq # sched: [7:1.00]
2588;
2589; BROADWELL-LABEL: test_vfnmsubps_128:
2590; BROADWELL:       # %bb.0:
2591; BROADWELL-NEXT:    #APP
2592; BROADWELL-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2593; BROADWELL-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2594; BROADWELL-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2595; BROADWELL-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2596; BROADWELL-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2597; BROADWELL-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2598; BROADWELL-NEXT:    #NO_APP
2599; BROADWELL-NEXT:    retq # sched: [7:1.00]
2600;
2601; SKYLAKE-LABEL: test_vfnmsubps_128:
2602; SKYLAKE:       # %bb.0:
2603; SKYLAKE-NEXT:    #APP
2604; SKYLAKE-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
2605; SKYLAKE-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
2606; SKYLAKE-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
2607; SKYLAKE-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2608; SKYLAKE-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2609; SKYLAKE-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2610; SKYLAKE-NEXT:    #NO_APP
2611; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2612;
2613; KNL-LABEL: test_vfnmsubps_128:
2614; KNL:       # %bb.0:
2615; KNL-NEXT:    #APP
2616; KNL-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2617; KNL-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2618; KNL-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2619; KNL-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
2620; KNL-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
2621; KNL-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
2622; KNL-NEXT:    #NO_APP
2623; KNL-NEXT:    retq # sched: [7:1.00]
2624;
2625; SKX-LABEL: test_vfnmsubps_128:
2626; SKX:       # %bb.0:
2627; SKX-NEXT:    #APP
2628; SKX-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
2629; SKX-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
2630; SKX-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
2631; SKX-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2632; SKX-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2633; SKX-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2634; SKX-NEXT:    #NO_APP
2635; SKX-NEXT:    retq # sched: [7:1.00]
2636;
2637; ZNVER1-LABEL: test_vfnmsubps_128:
2638; ZNVER1:       # %bb.0:
2639; ZNVER1-NEXT:    #APP
2640; ZNVER1-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2641; ZNVER1-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2642; ZNVER1-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2643; ZNVER1-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
2644; ZNVER1-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
2645; ZNVER1-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
2646; ZNVER1-NEXT:    #NO_APP
2647; ZNVER1-NEXT:    retq # sched: [1:0.50]
2648  tail call void asm "vfnmsub132ps $2, $1, $0 \0A\09 vfnmsub213ps $2, $1, $0 \0A\09 vfnmsub231ps $2, $1, $0 \0A\09 vfnmsub132ps $3, $1, $0 \0A\09 vfnmsub213ps $3, $1, $0 \0A\09 vfnmsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
2649  ret void
2650}
2651
2652define void @test_vfnmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
2653; GENERIC-LABEL: test_vfnmsubps_256:
2654; GENERIC:       # %bb.0:
2655; GENERIC-NEXT:    #APP
2656; GENERIC-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
2657; GENERIC-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
2658; GENERIC-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
2659; GENERIC-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
2660; GENERIC-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
2661; GENERIC-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
2662; GENERIC-NEXT:    #NO_APP
2663; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
2664; GENERIC-NEXT:    retq # sched: [1:1.00]
2665;
2666; HASWELL-LABEL: test_vfnmsubps_256:
2667; HASWELL:       # %bb.0:
2668; HASWELL-NEXT:    #APP
2669; HASWELL-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
2670; HASWELL-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
2671; HASWELL-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
2672; HASWELL-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
2673; HASWELL-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
2674; HASWELL-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
2675; HASWELL-NEXT:    #NO_APP
2676; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
2677; HASWELL-NEXT:    retq # sched: [7:1.00]
2678;
2679; BROADWELL-LABEL: test_vfnmsubps_256:
2680; BROADWELL:       # %bb.0:
2681; BROADWELL-NEXT:    #APP
2682; BROADWELL-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
2683; BROADWELL-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
2684; BROADWELL-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
2685; BROADWELL-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
2686; BROADWELL-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
2687; BROADWELL-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
2688; BROADWELL-NEXT:    #NO_APP
2689; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
2690; BROADWELL-NEXT:    retq # sched: [7:1.00]
2691;
2692; SKYLAKE-LABEL: test_vfnmsubps_256:
2693; SKYLAKE:       # %bb.0:
2694; SKYLAKE-NEXT:    #APP
2695; SKYLAKE-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
2696; SKYLAKE-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
2697; SKYLAKE-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
2698; SKYLAKE-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
2699; SKYLAKE-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
2700; SKYLAKE-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
2701; SKYLAKE-NEXT:    #NO_APP
2702; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
2703; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2704;
2705; KNL-LABEL: test_vfnmsubps_256:
2706; KNL:       # %bb.0:
2707; KNL-NEXT:    #APP
2708; KNL-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
2709; KNL-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
2710; KNL-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
2711; KNL-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
2712; KNL-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
2713; KNL-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
2714; KNL-NEXT:    #NO_APP
2715; KNL-NEXT:    retq # sched: [7:1.00]
2716;
2717; SKX-LABEL: test_vfnmsubps_256:
2718; SKX:       # %bb.0:
2719; SKX-NEXT:    #APP
2720; SKX-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
2721; SKX-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
2722; SKX-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
2723; SKX-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
2724; SKX-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
2725; SKX-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
2726; SKX-NEXT:    #NO_APP
2727; SKX-NEXT:    vzeroupper # sched: [4:1.00]
2728; SKX-NEXT:    retq # sched: [7:1.00]
2729;
2730; ZNVER1-LABEL: test_vfnmsubps_256:
2731; ZNVER1:       # %bb.0:
2732; ZNVER1-NEXT:    #APP
2733; ZNVER1-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
2734; ZNVER1-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
2735; ZNVER1-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
2736; ZNVER1-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
2737; ZNVER1-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
2738; ZNVER1-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
2739; ZNVER1-NEXT:    #NO_APP
2740; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
2741; ZNVER1-NEXT:    retq # sched: [1:0.50]
2742  tail call void asm "vfnmsub132ps $2, $1, $0 \0A\09 vfnmsub213ps $2, $1, $0 \0A\09 vfnmsub231ps $2, $1, $0 \0A\09 vfnmsub132ps $3, $1, $0 \0A\09 vfnmsub213ps $3, $1, $0 \0A\09 vfnmsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
2743  ret void
2744}
2745
2746define void @test_vfnmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
2747; GENERIC-LABEL: test_vfnmsubsd_128:
2748; GENERIC:       # %bb.0:
2749; GENERIC-NEXT:    #APP
2750; GENERIC-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2751; GENERIC-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2752; GENERIC-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2753; GENERIC-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2754; GENERIC-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2755; GENERIC-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2756; GENERIC-NEXT:    #NO_APP
2757; GENERIC-NEXT:    retq # sched: [1:1.00]
2758;
2759; HASWELL-LABEL: test_vfnmsubsd_128:
2760; HASWELL:       # %bb.0:
2761; HASWELL-NEXT:    #APP
2762; HASWELL-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2763; HASWELL-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2764; HASWELL-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2765; HASWELL-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2766; HASWELL-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2767; HASWELL-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2768; HASWELL-NEXT:    #NO_APP
2769; HASWELL-NEXT:    retq # sched: [7:1.00]
2770;
2771; BROADWELL-LABEL: test_vfnmsubsd_128:
2772; BROADWELL:       # %bb.0:
2773; BROADWELL-NEXT:    #APP
2774; BROADWELL-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2775; BROADWELL-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2776; BROADWELL-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2777; BROADWELL-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2778; BROADWELL-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2779; BROADWELL-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2780; BROADWELL-NEXT:    #NO_APP
2781; BROADWELL-NEXT:    retq # sched: [7:1.00]
2782;
2783; SKYLAKE-LABEL: test_vfnmsubsd_128:
2784; SKYLAKE:       # %bb.0:
2785; SKYLAKE-NEXT:    #APP
2786; SKYLAKE-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
2787; SKYLAKE-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
2788; SKYLAKE-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
2789; SKYLAKE-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
2790; SKYLAKE-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
2791; SKYLAKE-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
2792; SKYLAKE-NEXT:    #NO_APP
2793; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2794;
2795; KNL-LABEL: test_vfnmsubsd_128:
2796; KNL:       # %bb.0:
2797; KNL-NEXT:    #APP
2798; KNL-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2799; KNL-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2800; KNL-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2801; KNL-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2802; KNL-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2803; KNL-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2804; KNL-NEXT:    #NO_APP
2805; KNL-NEXT:    retq # sched: [7:1.00]
2806;
2807; SKX-LABEL: test_vfnmsubsd_128:
2808; SKX:       # %bb.0:
2809; SKX-NEXT:    #APP
2810; SKX-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
2811; SKX-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
2812; SKX-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
2813; SKX-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
2814; SKX-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
2815; SKX-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
2816; SKX-NEXT:    #NO_APP
2817; SKX-NEXT:    retq # sched: [7:1.00]
2818;
2819; ZNVER1-LABEL: test_vfnmsubsd_128:
2820; ZNVER1:       # %bb.0:
2821; ZNVER1-NEXT:    #APP
2822; ZNVER1-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2823; ZNVER1-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2824; ZNVER1-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2825; ZNVER1-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
2826; ZNVER1-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
2827; ZNVER1-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
2828; ZNVER1-NEXT:    #NO_APP
2829; ZNVER1-NEXT:    retq # sched: [1:0.50]
2830  tail call void asm "vfnmsub132sd $2, $1, $0 \0A\09 vfnmsub213sd $2, $1, $0 \0A\09 vfnmsub231sd $2, $1, $0 \0A\09 vfnmsub132sd $3, $1, $0 \0A\09 vfnmsub213sd $3, $1, $0 \0A\09 vfnmsub231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
2831  ret void
2832}
2833
2834define void @test_vfnmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
2835; GENERIC-LABEL: test_vfnmsubss_128:
2836; GENERIC:       # %bb.0:
2837; GENERIC-NEXT:    #APP
2838; GENERIC-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2839; GENERIC-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2840; GENERIC-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2841; GENERIC-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2842; GENERIC-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2843; GENERIC-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2844; GENERIC-NEXT:    #NO_APP
2845; GENERIC-NEXT:    retq # sched: [1:1.00]
2846;
2847; HASWELL-LABEL: test_vfnmsubss_128:
2848; HASWELL:       # %bb.0:
2849; HASWELL-NEXT:    #APP
2850; HASWELL-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2851; HASWELL-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2852; HASWELL-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2853; HASWELL-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2854; HASWELL-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2855; HASWELL-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2856; HASWELL-NEXT:    #NO_APP
2857; HASWELL-NEXT:    retq # sched: [7:1.00]
2858;
2859; BROADWELL-LABEL: test_vfnmsubss_128:
2860; BROADWELL:       # %bb.0:
2861; BROADWELL-NEXT:    #APP
2862; BROADWELL-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2863; BROADWELL-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2864; BROADWELL-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2865; BROADWELL-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2866; BROADWELL-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2867; BROADWELL-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2868; BROADWELL-NEXT:    #NO_APP
2869; BROADWELL-NEXT:    retq # sched: [7:1.00]
2870;
2871; SKYLAKE-LABEL: test_vfnmsubss_128:
2872; SKYLAKE:       # %bb.0:
2873; SKYLAKE-NEXT:    #APP
2874; SKYLAKE-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
2875; SKYLAKE-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
2876; SKYLAKE-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
2877; SKYLAKE-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
2878; SKYLAKE-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
2879; SKYLAKE-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
2880; SKYLAKE-NEXT:    #NO_APP
2881; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2882;
2883; KNL-LABEL: test_vfnmsubss_128:
2884; KNL:       # %bb.0:
2885; KNL-NEXT:    #APP
2886; KNL-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2887; KNL-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2888; KNL-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2889; KNL-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
2890; KNL-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
2891; KNL-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
2892; KNL-NEXT:    #NO_APP
2893; KNL-NEXT:    retq # sched: [7:1.00]
2894;
2895; SKX-LABEL: test_vfnmsubss_128:
2896; SKX:       # %bb.0:
2897; SKX-NEXT:    #APP
2898; SKX-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
2899; SKX-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
2900; SKX-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
2901; SKX-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
2902; SKX-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
2903; SKX-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
2904; SKX-NEXT:    #NO_APP
2905; SKX-NEXT:    retq # sched: [7:1.00]
2906;
2907; ZNVER1-LABEL: test_vfnmsubss_128:
2908; ZNVER1:       # %bb.0:
2909; ZNVER1-NEXT:    #APP
2910; ZNVER1-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
2911; ZNVER1-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
2912; ZNVER1-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
2913; ZNVER1-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
2914; ZNVER1-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
2915; ZNVER1-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
2916; ZNVER1-NEXT:    #NO_APP
2917; ZNVER1-NEXT:    retq # sched: [1:0.50]
2918  tail call void asm "vfnmsub132ss $2, $1, $0 \0A\09 vfnmsub213ss $2, $1, $0 \0A\09 vfnmsub231ss $2, $1, $0 \0A\09 vfnmsub132ss $3, $1, $0 \0A\09 vfnmsub213ss $3, $1, $0 \0A\09 vfnmsub231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
2919  ret void
2920}
2921