• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
2; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=AVX
3
4; Incremental updates of the instruction depths should be enough for this test
5; case.
6; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
7; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=AVX
8
9; Verify that the first two adds are independent regardless of how the inputs are
10; commuted. The destination registers are used as source registers for the third add.
11
12define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
13; SSE-LABEL: reassociate_adds1:
14; SSE:       # %bb.0:
15; SSE-NEXT:    addss %xmm1, %xmm0
16; SSE-NEXT:    addss %xmm3, %xmm2
17; SSE-NEXT:    addss %xmm2, %xmm0
18; SSE-NEXT:    retq
19;
20; AVX-LABEL: reassociate_adds1:
21; AVX:       # %bb.0:
22; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
23; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
24; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
25; AVX-NEXT:    retq
26  %t0 = fadd float %x0, %x1
27  %t1 = fadd float %t0, %x2
28  %t2 = fadd float %t1, %x3
29  ret float %t2
30}
31
32define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
33; SSE-LABEL: reassociate_adds2:
34; SSE:       # %bb.0:
35; SSE-NEXT:    addss %xmm1, %xmm0
36; SSE-NEXT:    addss %xmm3, %xmm2
37; SSE-NEXT:    addss %xmm2, %xmm0
38; SSE-NEXT:    retq
39;
40; AVX-LABEL: reassociate_adds2:
41; AVX:       # %bb.0:
42; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
43; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
44; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
45; AVX-NEXT:    retq
46  %t0 = fadd float %x0, %x1
47  %t1 = fadd float %x2, %t0
48  %t2 = fadd float %t1, %x3
49  ret float %t2
50}
51
52define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
53; SSE-LABEL: reassociate_adds3:
54; SSE:       # %bb.0:
55; SSE-NEXT:    addss %xmm1, %xmm0
56; SSE-NEXT:    addss %xmm3, %xmm2
57; SSE-NEXT:    addss %xmm2, %xmm0
58; SSE-NEXT:    retq
59;
60; AVX-LABEL: reassociate_adds3:
61; AVX:       # %bb.0:
62; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
63; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
64; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
65; AVX-NEXT:    retq
66  %t0 = fadd float %x0, %x1
67  %t1 = fadd float %t0, %x2
68  %t2 = fadd float %x3, %t1
69  ret float %t2
70}
71
72define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
73; SSE-LABEL: reassociate_adds4:
74; SSE:       # %bb.0:
75; SSE-NEXT:    addss %xmm1, %xmm0
76; SSE-NEXT:    addss %xmm3, %xmm2
77; SSE-NEXT:    addss %xmm2, %xmm0
78; SSE-NEXT:    retq
79;
80; AVX-LABEL: reassociate_adds4:
81; AVX:       # %bb.0:
82; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
83; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
84; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
85; AVX-NEXT:    retq
86  %t0 = fadd float %x0, %x1
87  %t1 = fadd float %x2, %t0
88  %t2 = fadd float %x3, %t1
89  ret float %t2
90}
91
92; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
93; produced because that would cost more compile time.
94
95define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
96; SSE-LABEL: reassociate_adds5:
97; SSE:       # %bb.0:
98; SSE-NEXT:    addss %xmm1, %xmm0
99; SSE-NEXT:    addss %xmm3, %xmm2
100; SSE-NEXT:    addss %xmm2, %xmm0
101; SSE-NEXT:    addss %xmm5, %xmm4
102; SSE-NEXT:    addss %xmm6, %xmm4
103; SSE-NEXT:    addss %xmm4, %xmm0
104; SSE-NEXT:    addss %xmm7, %xmm0
105; SSE-NEXT:    retq
106;
107; AVX-LABEL: reassociate_adds5:
108; AVX:       # %bb.0:
109; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
110; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
111; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
112; AVX-NEXT:    vaddss %xmm5, %xmm4, %xmm1
113; AVX-NEXT:    vaddss %xmm6, %xmm1, %xmm1
114; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
115; AVX-NEXT:    vaddss %xmm7, %xmm0, %xmm0
116; AVX-NEXT:    retq
117  %t0 = fadd float %x0, %x1
118  %t1 = fadd float %t0, %x2
119  %t2 = fadd float %t1, %x3
120  %t3 = fadd float %t2, %x4
121  %t4 = fadd float %t3, %x5
122  %t5 = fadd float %t4, %x6
123  %t6 = fadd float %t5, %x7
124  ret float %t6
125}
126
127; Verify that we only need two associative operations to reassociate the operands.
128; Also, we should reassociate such that the result of the high latency division
129; is used by the final 'add' rather than reassociating the %x3 operand with the
130; division. The latter reassociation would not improve anything.
131
132define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
133; SSE-LABEL: reassociate_adds6:
134; SSE:       # %bb.0:
135; SSE-NEXT:    divss %xmm1, %xmm0
136; SSE-NEXT:    addss %xmm3, %xmm2
137; SSE-NEXT:    addss %xmm2, %xmm0
138; SSE-NEXT:    retq
139;
140; AVX-LABEL: reassociate_adds6:
141; AVX:       # %bb.0:
142; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
143; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
144; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
145; AVX-NEXT:    retq
146  %t0 = fdiv float %x0, %x1
147  %t1 = fadd float %x2, %t0
148  %t2 = fadd float %x3, %t1
149  ret float %t2
150}
151
152; Verify that SSE and AVX scalar single-precision multiplies are reassociated.
153
154define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
155; SSE-LABEL: reassociate_muls1:
156; SSE:       # %bb.0:
157; SSE-NEXT:    divss %xmm1, %xmm0
158; SSE-NEXT:    mulss %xmm3, %xmm2
159; SSE-NEXT:    mulss %xmm2, %xmm0
160; SSE-NEXT:    retq
161;
162; AVX-LABEL: reassociate_muls1:
163; AVX:       # %bb.0:
164; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
165; AVX-NEXT:    vmulss %xmm3, %xmm2, %xmm1
166; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
167; AVX-NEXT:    retq
168  %t0 = fdiv float %x0, %x1
169  %t1 = fmul float %x2, %t0
170  %t2 = fmul float %x3, %t1
171  ret float %t2
172}
173
174; Verify that SSE and AVX scalar double-precision adds are reassociated.
175
176define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
177; SSE-LABEL: reassociate_adds_double:
178; SSE:       # %bb.0:
179; SSE-NEXT:    divsd %xmm1, %xmm0
180; SSE-NEXT:    addsd %xmm3, %xmm2
181; SSE-NEXT:    addsd %xmm2, %xmm0
182; SSE-NEXT:    retq
183;
184; AVX-LABEL: reassociate_adds_double:
185; AVX:       # %bb.0:
186; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
187; AVX-NEXT:    vaddsd %xmm3, %xmm2, %xmm1
188; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
189; AVX-NEXT:    retq
190  %t0 = fdiv double %x0, %x1
191  %t1 = fadd double %x2, %t0
192  %t2 = fadd double %x3, %t1
193  ret double %t2
194}
195
196; Verify that SSE and AVX scalar double-precision multiplies are reassociated.
197
198define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
199; SSE-LABEL: reassociate_muls_double:
200; SSE:       # %bb.0:
201; SSE-NEXT:    divsd %xmm1, %xmm0
202; SSE-NEXT:    mulsd %xmm3, %xmm2
203; SSE-NEXT:    mulsd %xmm2, %xmm0
204; SSE-NEXT:    retq
205;
206; AVX-LABEL: reassociate_muls_double:
207; AVX:       # %bb.0:
208; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
209; AVX-NEXT:    vmulsd %xmm3, %xmm2, %xmm1
210; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
211; AVX-NEXT:    retq
212  %t0 = fdiv double %x0, %x1
213  %t1 = fmul double %x2, %t0
214  %t2 = fmul double %x3, %t1
215  ret double %t2
216}
217
218; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated.
219
220define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
221; SSE-LABEL: reassociate_adds_v4f32:
222; SSE:       # %bb.0:
223; SSE-NEXT:    mulps %xmm1, %xmm0
224; SSE-NEXT:    addps %xmm3, %xmm2
225; SSE-NEXT:    addps %xmm2, %xmm0
226; SSE-NEXT:    retq
227;
228; AVX-LABEL: reassociate_adds_v4f32:
229; AVX:       # %bb.0:
230; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
231; AVX-NEXT:    vaddps %xmm3, %xmm2, %xmm1
232; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
233; AVX-NEXT:    retq
234  %t0 = fmul <4 x float> %x0, %x1
235  %t1 = fadd <4 x float> %x2, %t0
236  %t2 = fadd <4 x float> %x3, %t1
237  ret <4 x float> %t2
238}
239
240; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated.
241
242define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
243; SSE-LABEL: reassociate_adds_v2f64:
244; SSE:       # %bb.0:
245; SSE-NEXT:    mulpd %xmm1, %xmm0
246; SSE-NEXT:    addpd %xmm3, %xmm2
247; SSE-NEXT:    addpd %xmm2, %xmm0
248; SSE-NEXT:    retq
249;
250; AVX-LABEL: reassociate_adds_v2f64:
251; AVX:       # %bb.0:
252; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
253; AVX-NEXT:    vaddpd %xmm3, %xmm2, %xmm1
254; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
255; AVX-NEXT:    retq
256  %t0 = fmul <2 x double> %x0, %x1
257  %t1 = fadd <2 x double> %x2, %t0
258  %t2 = fadd <2 x double> %x3, %t1
259  ret <2 x double> %t2
260}
261
262; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated.
263
264define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
265; SSE-LABEL: reassociate_muls_v4f32:
266; SSE:       # %bb.0:
267; SSE-NEXT:    addps %xmm1, %xmm0
268; SSE-NEXT:    mulps %xmm3, %xmm2
269; SSE-NEXT:    mulps %xmm2, %xmm0
270; SSE-NEXT:    retq
271;
272; AVX-LABEL: reassociate_muls_v4f32:
273; AVX:       # %bb.0:
274; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
275; AVX-NEXT:    vmulps %xmm3, %xmm2, %xmm1
276; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
277; AVX-NEXT:    retq
278  %t0 = fadd <4 x float> %x0, %x1
279  %t1 = fmul <4 x float> %x2, %t0
280  %t2 = fmul <4 x float> %x3, %t1
281  ret <4 x float> %t2
282}
283
284; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated.
285
286define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
287; SSE-LABEL: reassociate_muls_v2f64:
288; SSE:       # %bb.0:
289; SSE-NEXT:    addpd %xmm1, %xmm0
290; SSE-NEXT:    mulpd %xmm3, %xmm2
291; SSE-NEXT:    mulpd %xmm2, %xmm0
292; SSE-NEXT:    retq
293;
294; AVX-LABEL: reassociate_muls_v2f64:
295; AVX:       # %bb.0:
296; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
297; AVX-NEXT:    vmulpd %xmm3, %xmm2, %xmm1
298; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
299; AVX-NEXT:    retq
300  %t0 = fadd <2 x double> %x0, %x1
301  %t1 = fmul <2 x double> %x2, %t0
302  %t2 = fmul <2 x double> %x3, %t1
303  ret <2 x double> %t2
304}
305
306; Verify that AVX 256-bit vector single-precision adds are reassociated.
307
308define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
309; AVX-LABEL: reassociate_adds_v8f32:
310; AVX:       # %bb.0:
311; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
312; AVX-NEXT:    vaddps %ymm3, %ymm2, %ymm1
313; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
314; AVX-NEXT:    retq
315  %t0 = fmul <8 x float> %x0, %x1
316  %t1 = fadd <8 x float> %x2, %t0
317  %t2 = fadd <8 x float> %x3, %t1
318  ret <8 x float> %t2
319}
320
321; Verify that AVX 256-bit vector double-precision adds are reassociated.
322
323define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
324; AVX-LABEL: reassociate_adds_v4f64:
325; AVX:       # %bb.0:
326; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
327; AVX-NEXT:    vaddpd %ymm3, %ymm2, %ymm1
328; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
329; AVX-NEXT:    retq
330  %t0 = fmul <4 x double> %x0, %x1
331  %t1 = fadd <4 x double> %x2, %t0
332  %t2 = fadd <4 x double> %x3, %t1
333  ret <4 x double> %t2
334}
335
336; Verify that AVX 256-bit vector single-precision multiplies are reassociated.
337
338define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
339; AVX-LABEL: reassociate_muls_v8f32:
340; AVX:       # %bb.0:
341; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
342; AVX-NEXT:    vmulps %ymm3, %ymm2, %ymm1
343; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
344; AVX-NEXT:    retq
345  %t0 = fadd <8 x float> %x0, %x1
346  %t1 = fmul <8 x float> %x2, %t0
347  %t2 = fmul <8 x float> %x3, %t1
348  ret <8 x float> %t2
349}
350
351; Verify that AVX 256-bit vector double-precision multiplies are reassociated.
352
353define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
354; AVX-LABEL: reassociate_muls_v4f64:
355; AVX:       # %bb.0:
356; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
357; AVX-NEXT:    vmulpd %ymm3, %ymm2, %ymm1
358; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
359; AVX-NEXT:    retq
360  %t0 = fadd <4 x double> %x0, %x1
361  %t1 = fmul <4 x double> %x2, %t0
362  %t2 = fmul <4 x double> %x3, %t1
363  ret <4 x double> %t2
364}
365
366; Verify that SSE and AVX scalar single-precision minimum ops are reassociated.
367
368define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) {
369; SSE-LABEL: reassociate_mins_single:
370; SSE:       # %bb.0:
371; SSE-NEXT:    divss %xmm1, %xmm0
372; SSE-NEXT:    minss %xmm3, %xmm2
373; SSE-NEXT:    minss %xmm2, %xmm0
374; SSE-NEXT:    retq
375;
376; AVX-LABEL: reassociate_mins_single:
377; AVX:       # %bb.0:
378; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
379; AVX-NEXT:    vminss %xmm3, %xmm2, %xmm1
380; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
381; AVX-NEXT:    retq
382  %t0 = fdiv float %x0, %x1
383  %cmp1 = fcmp olt float %x2, %t0
384  %sel1 = select i1 %cmp1, float %x2, float %t0
385  %cmp2 = fcmp olt float %x3, %sel1
386  %sel2 = select i1 %cmp2, float %x3, float %sel1
387  ret float %sel2
388}
389
390; Verify that SSE and AVX scalar single-precision maximum ops are reassociated.
391
392define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) {
393; SSE-LABEL: reassociate_maxs_single:
394; SSE:       # %bb.0:
395; SSE-NEXT:    divss %xmm1, %xmm0
396; SSE-NEXT:    maxss %xmm3, %xmm2
397; SSE-NEXT:    maxss %xmm2, %xmm0
398; SSE-NEXT:    retq
399;
400; AVX-LABEL: reassociate_maxs_single:
401; AVX:       # %bb.0:
402; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
403; AVX-NEXT:    vmaxss %xmm3, %xmm2, %xmm1
404; AVX-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
405; AVX-NEXT:    retq
406  %t0 = fdiv float %x0, %x1
407  %cmp1 = fcmp ogt float %x2, %t0
408  %sel1 = select i1 %cmp1, float %x2, float %t0
409  %cmp2 = fcmp ogt float %x3, %sel1
410  %sel2 = select i1 %cmp2, float %x3, float %sel1
411  ret float %sel2
412}
413
414; Verify that SSE and AVX scalar double-precision minimum ops are reassociated.
415
416define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) {
417; SSE-LABEL: reassociate_mins_double:
418; SSE:       # %bb.0:
419; SSE-NEXT:    divsd %xmm1, %xmm0
420; SSE-NEXT:    minsd %xmm3, %xmm2
421; SSE-NEXT:    minsd %xmm2, %xmm0
422; SSE-NEXT:    retq
423;
424; AVX-LABEL: reassociate_mins_double:
425; AVX:       # %bb.0:
426; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
427; AVX-NEXT:    vminsd %xmm3, %xmm2, %xmm1
428; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
429; AVX-NEXT:    retq
430  %t0 = fdiv double %x0, %x1
431  %cmp1 = fcmp olt double %x2, %t0
432  %sel1 = select i1 %cmp1, double %x2, double %t0
433  %cmp2 = fcmp olt double %x3, %sel1
434  %sel2 = select i1 %cmp2, double %x3, double %sel1
435  ret double %sel2
436}
437
438; Verify that SSE and AVX scalar double-precision maximum ops are reassociated.
439
440define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) {
441; SSE-LABEL: reassociate_maxs_double:
442; SSE:       # %bb.0:
443; SSE-NEXT:    divsd %xmm1, %xmm0
444; SSE-NEXT:    maxsd %xmm3, %xmm2
445; SSE-NEXT:    maxsd %xmm2, %xmm0
446; SSE-NEXT:    retq
447;
448; AVX-LABEL: reassociate_maxs_double:
449; AVX:       # %bb.0:
450; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
451; AVX-NEXT:    vmaxsd %xmm3, %xmm2, %xmm1
452; AVX-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
453; AVX-NEXT:    retq
454  %t0 = fdiv double %x0, %x1
455  %cmp1 = fcmp ogt double %x2, %t0
456  %sel1 = select i1 %cmp1, double %x2, double %t0
457  %cmp2 = fcmp ogt double %x3, %sel1
458  %sel2 = select i1 %cmp2, double %x3, double %sel1
459  ret double %sel2
460}
461
462; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated.
463
464define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
465; SSE-LABEL: reassociate_mins_v4f32:
466; SSE:       # %bb.0:
467; SSE-NEXT:    addps %xmm1, %xmm0
468; SSE-NEXT:    minps %xmm3, %xmm2
469; SSE-NEXT:    minps %xmm2, %xmm0
470; SSE-NEXT:    retq
471;
472; AVX-LABEL: reassociate_mins_v4f32:
473; AVX:       # %bb.0:
474; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
475; AVX-NEXT:    vminps %xmm3, %xmm2, %xmm1
476; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
477; AVX-NEXT:    retq
478  %t0 = fadd <4 x float> %x0, %x1
479  %cmp1 = fcmp olt <4 x float> %x2, %t0
480  %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
481  %cmp2 = fcmp olt <4 x float> %x3, %sel1
482  %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
483  ret <4 x float> %sel2
484}
485
486; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated.
487
488define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
489; SSE-LABEL: reassociate_maxs_v4f32:
490; SSE:       # %bb.0:
491; SSE-NEXT:    addps %xmm1, %xmm0
492; SSE-NEXT:    maxps %xmm3, %xmm2
493; SSE-NEXT:    maxps %xmm2, %xmm0
494; SSE-NEXT:    retq
495;
496; AVX-LABEL: reassociate_maxs_v4f32:
497; AVX:       # %bb.0:
498; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
499; AVX-NEXT:    vmaxps %xmm3, %xmm2, %xmm1
500; AVX-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
501; AVX-NEXT:    retq
502  %t0 = fadd <4 x float> %x0, %x1
503  %cmp1 = fcmp ogt <4 x float> %x2, %t0
504  %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
505  %cmp2 = fcmp ogt <4 x float> %x3, %sel1
506  %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
507  ret <4 x float> %sel2
508}
509
510; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated.
511
512define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
513; SSE-LABEL: reassociate_mins_v2f64:
514; SSE:       # %bb.0:
515; SSE-NEXT:    addpd %xmm1, %xmm0
516; SSE-NEXT:    minpd %xmm3, %xmm2
517; SSE-NEXT:    minpd %xmm2, %xmm0
518; SSE-NEXT:    retq
519;
520; AVX-LABEL: reassociate_mins_v2f64:
521; AVX:       # %bb.0:
522; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
523; AVX-NEXT:    vminpd %xmm3, %xmm2, %xmm1
524; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
525; AVX-NEXT:    retq
526  %t0 = fadd <2 x double> %x0, %x1
527  %cmp1 = fcmp olt <2 x double> %x2, %t0
528  %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
529  %cmp2 = fcmp olt <2 x double> %x3, %sel1
530  %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
531  ret <2 x double> %sel2
532}
533
534; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated.
535
536define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
537; SSE-LABEL: reassociate_maxs_v2f64:
538; SSE:       # %bb.0:
539; SSE-NEXT:    addpd %xmm1, %xmm0
540; SSE-NEXT:    maxpd %xmm3, %xmm2
541; SSE-NEXT:    maxpd %xmm2, %xmm0
542; SSE-NEXT:    retq
543;
544; AVX-LABEL: reassociate_maxs_v2f64:
545; AVX:       # %bb.0:
546; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
547; AVX-NEXT:    vmaxpd %xmm3, %xmm2, %xmm1
548; AVX-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
549; AVX-NEXT:    retq
550  %t0 = fadd <2 x double> %x0, %x1
551  %cmp1 = fcmp ogt <2 x double> %x2, %t0
552  %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
553  %cmp2 = fcmp ogt <2 x double> %x3, %sel1
554  %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
555  ret <2 x double> %sel2
556}
557
558; Verify that AVX 256-bit vector single-precision minimum ops are reassociated.
559
560define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
561; AVX-LABEL: reassociate_mins_v8f32:
562; AVX:       # %bb.0:
563; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
564; AVX-NEXT:    vminps %ymm3, %ymm2, %ymm1
565; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
566; AVX-NEXT:    retq
567  %t0 = fadd <8 x float> %x0, %x1
568  %cmp1 = fcmp olt <8 x float> %x2, %t0
569  %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
570  %cmp2 = fcmp olt <8 x float> %x3, %sel1
571  %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
572  ret <8 x float> %sel2
573}
574
575; Verify that AVX 256-bit vector single-precision maximum ops are reassociated.
576
577define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
578; AVX-LABEL: reassociate_maxs_v8f32:
579; AVX:       # %bb.0:
580; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
581; AVX-NEXT:    vmaxps %ymm3, %ymm2, %ymm1
582; AVX-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
583; AVX-NEXT:    retq
584  %t0 = fadd <8 x float> %x0, %x1
585  %cmp1 = fcmp ogt <8 x float> %x2, %t0
586  %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
587  %cmp2 = fcmp ogt <8 x float> %x3, %sel1
588  %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
589  ret <8 x float> %sel2
590}
591
592; Verify that AVX 256-bit vector double-precision minimum ops are reassociated.
593
594define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
595; AVX-LABEL: reassociate_mins_v4f64:
596; AVX:       # %bb.0:
597; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
598; AVX-NEXT:    vminpd %ymm3, %ymm2, %ymm1
599; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
600; AVX-NEXT:    retq
601  %t0 = fadd <4 x double> %x0, %x1
602  %cmp1 = fcmp olt <4 x double> %x2, %t0
603  %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
604  %cmp2 = fcmp olt <4 x double> %x3, %sel1
605  %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
606  ret <4 x double> %sel2
607}
608
609; Verify that AVX 256-bit vector double-precision maximum ops are reassociated.
610
611define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
612; AVX-LABEL: reassociate_maxs_v4f64:
613; AVX:       # %bb.0:
614; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
615; AVX-NEXT:    vmaxpd %ymm3, %ymm2, %ymm1
616; AVX-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
617; AVX-NEXT:    retq
618  %t0 = fadd <4 x double> %x0, %x1
619  %cmp1 = fcmp ogt <4 x double> %x2, %t0
620  %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
621  %cmp2 = fcmp ogt <4 x double> %x3, %sel1
622  %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
623  ret <4 x double> %sel2
624}
625
626; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
627; Verify that reassociation is not happening needlessly or wrongly.
628
629declare double @bar()
630
631define double @reassociate_adds_from_calls() {
632; AVX-LABEL: reassociate_adds_from_calls:
633; AVX:       callq   bar
634; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
635; AVX-NEXT:  callq   bar
636; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
637; AVX-NEXT:  callq   bar
638; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
639; AVX-NEXT:  callq   bar
640; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
641; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
642; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
643; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
644
645  %x0 = call double @bar()
646  %x1 = call double @bar()
647  %x2 = call double @bar()
648  %x3 = call double @bar()
649  %t0 = fadd double %x0, %x1
650  %t1 = fadd double %t0, %x2
651  %t2 = fadd double %t1, %x3
652  ret double %t2
653}
654
655define double @already_reassociated() {
656; AVX-LABEL: already_reassociated:
657; AVX:       callq   bar
658; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
659; AVX-NEXT:  callq   bar
660; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
661; AVX-NEXT:  callq   bar
662; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
663; AVX-NEXT:  callq   bar
664; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
665; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
666; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
667; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
668
669  %x0 = call double @bar()
670  %x1 = call double @bar()
671  %x2 = call double @bar()
672  %x3 = call double @bar()
673  %t0 = fadd double %x0, %x1
674  %t1 = fadd double %x2, %x3
675  %t2 = fadd double %t0, %t1
676  ret double %t2
677}
678
679