• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE
2; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX
3
4; Verify that the first two adds are independent regardless of how the inputs are
5; commuted. The destination registers are used as source registers for the third add.
6
7define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
8; SSE-LABEL: reassociate_adds1:
9; SSE:       # BB#0:
10; SSE-NEXT:    addss %xmm1, %xmm0
11; SSE-NEXT:    addss %xmm3, %xmm2
12; SSE-NEXT:    addss %xmm2, %xmm0
13; SSE-NEXT:    retq
14;
15; AVX-LABEL: reassociate_adds1:
16; AVX:       # BB#0:
17; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
18; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
19; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
20; AVX-NEXT:    retq
21  %t0 = fadd float %x0, %x1
22  %t1 = fadd float %t0, %x2
23  %t2 = fadd float %t1, %x3
24  ret float %t2
25}
26
27define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
28; SSE-LABEL: reassociate_adds2:
29; SSE:       # BB#0:
30; SSE-NEXT:    addss %xmm1, %xmm0
31; SSE-NEXT:    addss %xmm3, %xmm2
32; SSE-NEXT:    addss %xmm2, %xmm0
33; SSE-NEXT:    retq
34;
35; AVX-LABEL: reassociate_adds2:
36; AVX:       # BB#0:
37; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
38; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
39; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
40; AVX-NEXT:    retq
41  %t0 = fadd float %x0, %x1
42  %t1 = fadd float %x2, %t0
43  %t2 = fadd float %t1, %x3
44  ret float %t2
45}
46
47define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
48; SSE-LABEL: reassociate_adds3:
49; SSE:       # BB#0:
50; SSE-NEXT:    addss %xmm1, %xmm0
51; SSE-NEXT:    addss %xmm3, %xmm2
52; SSE-NEXT:    addss %xmm2, %xmm0
53; SSE-NEXT:    retq
54;
55; AVX-LABEL: reassociate_adds3:
56; AVX:       # BB#0:
57; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
58; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
59; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
60; AVX-NEXT:    retq
61  %t0 = fadd float %x0, %x1
62  %t1 = fadd float %t0, %x2
63  %t2 = fadd float %x3, %t1
64  ret float %t2
65}
66
67define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
68; SSE-LABEL: reassociate_adds4:
69; SSE:       # BB#0:
70; SSE-NEXT:    addss %xmm1, %xmm0
71; SSE-NEXT:    addss %xmm3, %xmm2
72; SSE-NEXT:    addss %xmm2, %xmm0
73; SSE-NEXT:    retq
74;
75; AVX-LABEL: reassociate_adds4:
76; AVX:       # BB#0:
77; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
78; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
79; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
80; AVX-NEXT:    retq
81  %t0 = fadd float %x0, %x1
82  %t1 = fadd float %x2, %t0
83  %t2 = fadd float %x3, %t1
84  ret float %t2
85}
86
87; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
88; produced because that would cost more compile time.
89
90define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
91; SSE-LABEL: reassociate_adds5:
92; SSE:       # BB#0:
93; SSE-NEXT:    addss %xmm1, %xmm0
94; SSE-NEXT:    addss %xmm3, %xmm2
95; SSE-NEXT:    addss %xmm2, %xmm0
96; SSE-NEXT:    addss %xmm5, %xmm4
97; SSE-NEXT:    addss %xmm6, %xmm4
98; SSE-NEXT:    addss %xmm4, %xmm0
99; SSE-NEXT:    addss %xmm7, %xmm0
100; SSE-NEXT:    retq
101;
102; AVX-LABEL: reassociate_adds5:
103; AVX:       # BB#0:
104; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
105; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
106; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
107; AVX-NEXT:    vaddss %xmm5, %xmm4, %xmm1
108; AVX-NEXT:    vaddss %xmm6, %xmm1, %xmm1
109; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
110; AVX-NEXT:    vaddss %xmm7, %xmm0, %xmm0
111; AVX-NEXT:    retq
112  %t0 = fadd float %x0, %x1
113  %t1 = fadd float %t0, %x2
114  %t2 = fadd float %t1, %x3
115  %t3 = fadd float %t2, %x4
116  %t4 = fadd float %t3, %x5
117  %t5 = fadd float %t4, %x6
118  %t6 = fadd float %t5, %x7
119  ret float %t6
120}
121
122; Verify that we only need two associative operations to reassociate the operands.
123; Also, we should reassociate such that the result of the high latency division
124; is used by the final 'add' rather than reassociating the %x3 operand with the
125; division. The latter reassociation would not improve anything.
126
127define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
128; SSE-LABEL: reassociate_adds6:
129; SSE:       # BB#0:
130; SSE-NEXT:    divss %xmm1, %xmm0
131; SSE-NEXT:    addss %xmm3, %xmm2
132; SSE-NEXT:    addss %xmm2, %xmm0
133; SSE-NEXT:    retq
134;
135; AVX-LABEL: reassociate_adds6:
136; AVX:       # BB#0:
137; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
138; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
139; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
140; AVX-NEXT:    retq
141  %t0 = fdiv float %x0, %x1
142  %t1 = fadd float %x2, %t0
143  %t2 = fadd float %x3, %t1
144  ret float %t2
145}
146
147; Verify that SSE and AVX scalar single-precision multiplies are reassociated.
148
149define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
150; SSE-LABEL: reassociate_muls1:
151; SSE:       # BB#0:
152; SSE-NEXT:    divss %xmm1, %xmm0
153; SSE-NEXT:    mulss %xmm3, %xmm2
154; SSE-NEXT:    mulss %xmm2, %xmm0
155; SSE-NEXT:    retq
156;
157; AVX-LABEL: reassociate_muls1:
158; AVX:       # BB#0:
159; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
160; AVX-NEXT:    vmulss %xmm3, %xmm2, %xmm1
161; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
162; AVX-NEXT:    retq
163  %t0 = fdiv float %x0, %x1
164  %t1 = fmul float %x2, %t0
165  %t2 = fmul float %x3, %t1
166  ret float %t2
167}
168
169; Verify that SSE and AVX scalar double-precision adds are reassociated.
170
171define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
172; SSE-LABEL: reassociate_adds_double:
173; SSE:       # BB#0:
174; SSE-NEXT:    divsd %xmm1, %xmm0
175; SSE-NEXT:    addsd %xmm3, %xmm2
176; SSE-NEXT:    addsd %xmm2, %xmm0
177; SSE-NEXT:    retq
178;
179; AVX-LABEL: reassociate_adds_double:
180; AVX:       # BB#0:
181; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
182; AVX-NEXT:    vaddsd %xmm3, %xmm2, %xmm1
183; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
184; AVX-NEXT:    retq
185  %t0 = fdiv double %x0, %x1
186  %t1 = fadd double %x2, %t0
187  %t2 = fadd double %x3, %t1
188  ret double %t2
189}
190
191; Verify that SSE and AVX scalar double-precision multiplies are reassociated.
192
193define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
194; SSE-LABEL: reassociate_muls_double:
195; SSE:       # BB#0:
196; SSE-NEXT:    divsd %xmm1, %xmm0
197; SSE-NEXT:    mulsd %xmm3, %xmm2
198; SSE-NEXT:    mulsd %xmm2, %xmm0
199; SSE-NEXT:    retq
200;
201; AVX-LABEL: reassociate_muls_double:
202; AVX:       # BB#0:
203; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
204; AVX-NEXT:    vmulsd %xmm3, %xmm2, %xmm1
205; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
206; AVX-NEXT:    retq
207  %t0 = fdiv double %x0, %x1
208  %t1 = fmul double %x2, %t0
209  %t2 = fmul double %x3, %t1
210  ret double %t2
211}
212
213; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated.
214
215define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
216; SSE-LABEL: reassociate_adds_v4f32:
217; SSE:       # BB#0:
218; SSE-NEXT:    mulps %xmm1, %xmm0
219; SSE-NEXT:    addps %xmm3, %xmm2
220; SSE-NEXT:    addps %xmm2, %xmm0
221; SSE-NEXT:    retq
222;
223; AVX-LABEL: reassociate_adds_v4f32:
224; AVX:       # BB#0:
225; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
226; AVX-NEXT:    vaddps %xmm3, %xmm2, %xmm1
227; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
228; AVX-NEXT:    retq
229  %t0 = fmul <4 x float> %x0, %x1
230  %t1 = fadd <4 x float> %x2, %t0
231  %t2 = fadd <4 x float> %x3, %t1
232  ret <4 x float> %t2
233}
234
235; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated.
236
237define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
238; SSE-LABEL: reassociate_adds_v2f64:
239; SSE:       # BB#0:
240; SSE-NEXT:    mulpd %xmm1, %xmm0
241; SSE-NEXT:    addpd %xmm3, %xmm2
242; SSE-NEXT:    addpd %xmm2, %xmm0
243; SSE-NEXT:    retq
244;
245; AVX-LABEL: reassociate_adds_v2f64:
246; AVX:       # BB#0:
247; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
248; AVX-NEXT:    vaddpd %xmm3, %xmm2, %xmm1
249; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
250; AVX-NEXT:    retq
251  %t0 = fmul <2 x double> %x0, %x1
252  %t1 = fadd <2 x double> %x2, %t0
253  %t2 = fadd <2 x double> %x3, %t1
254  ret <2 x double> %t2
255}
256
257; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated.
258
259define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
260; SSE-LABEL: reassociate_muls_v4f32:
261; SSE:       # BB#0:
262; SSE-NEXT:    addps %xmm1, %xmm0
263; SSE-NEXT:    mulps %xmm3, %xmm2
264; SSE-NEXT:    mulps %xmm2, %xmm0
265; SSE-NEXT:    retq
266;
267; AVX-LABEL: reassociate_muls_v4f32:
268; AVX:       # BB#0:
269; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
270; AVX-NEXT:    vmulps %xmm3, %xmm2, %xmm1
271; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
272; AVX-NEXT:    retq
273  %t0 = fadd <4 x float> %x0, %x1
274  %t1 = fmul <4 x float> %x2, %t0
275  %t2 = fmul <4 x float> %x3, %t1
276  ret <4 x float> %t2
277}
278
279; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated.
280
281define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
282; SSE-LABEL: reassociate_muls_v2f64:
283; SSE:       # BB#0:
284; SSE-NEXT:    addpd %xmm1, %xmm0
285; SSE-NEXT:    mulpd %xmm3, %xmm2
286; SSE-NEXT:    mulpd %xmm2, %xmm0
287; SSE-NEXT:    retq
288;
289; AVX-LABEL: reassociate_muls_v2f64:
290; AVX:       # BB#0:
291; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
292; AVX-NEXT:    vmulpd %xmm3, %xmm2, %xmm1
293; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
294; AVX-NEXT:    retq
295  %t0 = fadd <2 x double> %x0, %x1
296  %t1 = fmul <2 x double> %x2, %t0
297  %t2 = fmul <2 x double> %x3, %t1
298  ret <2 x double> %t2
299}
300
301; Verify that AVX 256-bit vector single-precision adds are reassociated.
302
303define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
304; AVX-LABEL: reassociate_adds_v8f32:
305; AVX:       # BB#0:
306; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
307; AVX-NEXT:    vaddps %ymm3, %ymm2, %ymm1
308; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
309; AVX-NEXT:    retq
310  %t0 = fmul <8 x float> %x0, %x1
311  %t1 = fadd <8 x float> %x2, %t0
312  %t2 = fadd <8 x float> %x3, %t1
313  ret <8 x float> %t2
314}
315
316; Verify that AVX 256-bit vector double-precision adds are reassociated.
317
318define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
319; AVX-LABEL: reassociate_adds_v4f64:
320; AVX:       # BB#0:
321; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
322; AVX-NEXT:    vaddpd %ymm3, %ymm2, %ymm1
323; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
324; AVX-NEXT:    retq
325  %t0 = fmul <4 x double> %x0, %x1
326  %t1 = fadd <4 x double> %x2, %t0
327  %t2 = fadd <4 x double> %x3, %t1
328  ret <4 x double> %t2
329}
330
331; Verify that AVX 256-bit vector single-precision multiplies are reassociated.
332
333define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
334; AVX-LABEL: reassociate_muls_v8f32:
335; AVX:       # BB#0:
336; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
337; AVX-NEXT:    vmulps %ymm3, %ymm2, %ymm1
338; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
339; AVX-NEXT:    retq
340  %t0 = fadd <8 x float> %x0, %x1
341  %t1 = fmul <8 x float> %x2, %t0
342  %t2 = fmul <8 x float> %x3, %t1
343  ret <8 x float> %t2
344}
345
346; Verify that AVX 256-bit vector double-precision multiplies are reassociated.
347
348define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
349; AVX-LABEL: reassociate_muls_v4f64:
350; AVX:       # BB#0:
351; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
352; AVX-NEXT:    vmulpd %ymm3, %ymm2, %ymm1
353; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
354; AVX-NEXT:    retq
355  %t0 = fadd <4 x double> %x0, %x1
356  %t1 = fmul <4 x double> %x2, %t0
357  %t2 = fmul <4 x double> %x3, %t1
358  ret <4 x double> %t2
359}
360
361; Verify that SSE and AVX scalar single-precision minimum ops are reassociated.
362
363define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) {
364; SSE-LABEL: reassociate_mins_single:
365; SSE:       # BB#0:
366; SSE-NEXT:    divss %xmm1, %xmm0
367; SSE-NEXT:    minss %xmm3, %xmm2
368; SSE-NEXT:    minss %xmm2, %xmm0
369; SSE-NEXT:    retq
370;
371; AVX-LABEL: reassociate_mins_single:
372; AVX:       # BB#0:
373; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
374; AVX-NEXT:    vminss %xmm3, %xmm2, %xmm1
375; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
376; AVX-NEXT:    retq
377  %t0 = fdiv float %x0, %x1
378  %cmp1 = fcmp olt float %x2, %t0
379  %sel1 = select i1 %cmp1, float %x2, float %t0
380  %cmp2 = fcmp olt float %x3, %sel1
381  %sel2 = select i1 %cmp2, float %x3, float %sel1
382  ret float %sel2
383}
384
385; Verify that SSE and AVX scalar single-precision maximum ops are reassociated.
386
387define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) {
388; SSE-LABEL: reassociate_maxs_single:
389; SSE:       # BB#0:
390; SSE-NEXT:    divss %xmm1, %xmm0
391; SSE-NEXT:    maxss %xmm3, %xmm2
392; SSE-NEXT:    maxss %xmm2, %xmm0
393; SSE-NEXT:    retq
394;
395; AVX-LABEL: reassociate_maxs_single:
396; AVX:       # BB#0:
397; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
398; AVX-NEXT:    vmaxss %xmm3, %xmm2, %xmm1
399; AVX-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
400; AVX-NEXT:    retq
401  %t0 = fdiv float %x0, %x1
402  %cmp1 = fcmp ogt float %x2, %t0
403  %sel1 = select i1 %cmp1, float %x2, float %t0
404  %cmp2 = fcmp ogt float %x3, %sel1
405  %sel2 = select i1 %cmp2, float %x3, float %sel1
406  ret float %sel2
407}
408
409; Verify that SSE and AVX scalar double-precision minimum ops are reassociated.
410
411define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) {
412; SSE-LABEL: reassociate_mins_double:
413; SSE:       # BB#0:
414; SSE-NEXT:    divsd %xmm1, %xmm0
415; SSE-NEXT:    minsd %xmm3, %xmm2
416; SSE-NEXT:    minsd %xmm2, %xmm0
417; SSE-NEXT:    retq
418;
419; AVX-LABEL: reassociate_mins_double:
420; AVX:       # BB#0:
421; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
422; AVX-NEXT:    vminsd %xmm3, %xmm2, %xmm1
423; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
424; AVX-NEXT:    retq
425  %t0 = fdiv double %x0, %x1
426  %cmp1 = fcmp olt double %x2, %t0
427  %sel1 = select i1 %cmp1, double %x2, double %t0
428  %cmp2 = fcmp olt double %x3, %sel1
429  %sel2 = select i1 %cmp2, double %x3, double %sel1
430  ret double %sel2
431}
432
433; Verify that SSE and AVX scalar double-precision maximum ops are reassociated.
434
435define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) {
436; SSE-LABEL: reassociate_maxs_double:
437; SSE:       # BB#0:
438; SSE-NEXT:    divsd %xmm1, %xmm0
439; SSE-NEXT:    maxsd %xmm3, %xmm2
440; SSE-NEXT:    maxsd %xmm2, %xmm0
441; SSE-NEXT:    retq
442;
443; AVX-LABEL: reassociate_maxs_double:
444; AVX:       # BB#0:
445; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
446; AVX-NEXT:    vmaxsd %xmm3, %xmm2, %xmm1
447; AVX-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
448; AVX-NEXT:    retq
449  %t0 = fdiv double %x0, %x1
450  %cmp1 = fcmp ogt double %x2, %t0
451  %sel1 = select i1 %cmp1, double %x2, double %t0
452  %cmp2 = fcmp ogt double %x3, %sel1
453  %sel2 = select i1 %cmp2, double %x3, double %sel1
454  ret double %sel2
455}
456
457; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated.
458
459define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
460; SSE-LABEL: reassociate_mins_v4f32:
461; SSE:       # BB#0:
462; SSE-NEXT:    addps %xmm1, %xmm0
463; SSE-NEXT:    minps %xmm3, %xmm2
464; SSE-NEXT:    minps %xmm2, %xmm0
465; SSE-NEXT:    retq
466;
467; AVX-LABEL: reassociate_mins_v4f32:
468; AVX:       # BB#0:
469; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
470; AVX-NEXT:    vminps %xmm3, %xmm2, %xmm1
471; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
472; AVX-NEXT:    retq
473  %t0 = fadd <4 x float> %x0, %x1
474  %cmp1 = fcmp olt <4 x float> %x2, %t0
475  %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
476  %cmp2 = fcmp olt <4 x float> %x3, %sel1
477  %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
478  ret <4 x float> %sel2
479}
480
481; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated.
482
483define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
484; SSE-LABEL: reassociate_maxs_v4f32:
485; SSE:       # BB#0:
486; SSE-NEXT:    addps %xmm1, %xmm0
487; SSE-NEXT:    maxps %xmm3, %xmm2
488; SSE-NEXT:    maxps %xmm2, %xmm0
489; SSE-NEXT:    retq
490;
491; AVX-LABEL: reassociate_maxs_v4f32:
492; AVX:       # BB#0:
493; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
494; AVX-NEXT:    vmaxps %xmm3, %xmm2, %xmm1
495; AVX-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
496; AVX-NEXT:    retq
497  %t0 = fadd <4 x float> %x0, %x1
498  %cmp1 = fcmp ogt <4 x float> %x2, %t0
499  %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
500  %cmp2 = fcmp ogt <4 x float> %x3, %sel1
501  %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
502  ret <4 x float> %sel2
503}
504
505; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated.
506
507define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
508; SSE-LABEL: reassociate_mins_v2f64:
509; SSE:       # BB#0:
510; SSE-NEXT:    addpd %xmm1, %xmm0
511; SSE-NEXT:    minpd %xmm3, %xmm2
512; SSE-NEXT:    minpd %xmm2, %xmm0
513; SSE-NEXT:    retq
514;
515; AVX-LABEL: reassociate_mins_v2f64:
516; AVX:       # BB#0:
517; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
518; AVX-NEXT:    vminpd %xmm3, %xmm2, %xmm1
519; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
520; AVX-NEXT:    retq
521  %t0 = fadd <2 x double> %x0, %x1
522  %cmp1 = fcmp olt <2 x double> %x2, %t0
523  %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
524  %cmp2 = fcmp olt <2 x double> %x3, %sel1
525  %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
526  ret <2 x double> %sel2
527}
528
529; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated.
530
531define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
532; SSE-LABEL: reassociate_maxs_v2f64:
533; SSE:       # BB#0:
534; SSE-NEXT:    addpd %xmm1, %xmm0
535; SSE-NEXT:    maxpd %xmm3, %xmm2
536; SSE-NEXT:    maxpd %xmm2, %xmm0
537; SSE-NEXT:    retq
538;
539; AVX-LABEL: reassociate_maxs_v2f64:
540; AVX:       # BB#0:
541; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
542; AVX-NEXT:    vmaxpd %xmm3, %xmm2, %xmm1
543; AVX-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
544; AVX-NEXT:    retq
545  %t0 = fadd <2 x double> %x0, %x1
546  %cmp1 = fcmp ogt <2 x double> %x2, %t0
547  %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
548  %cmp2 = fcmp ogt <2 x double> %x3, %sel1
549  %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
550  ret <2 x double> %sel2
551}
552
553; Verify that AVX 256-bit vector single-precision minimum ops are reassociated.
554
555define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
556; AVX-LABEL: reassociate_mins_v8f32:
557; AVX:       # BB#0:
558; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
559; AVX-NEXT:    vminps %ymm3, %ymm2, %ymm1
560; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
561; AVX-NEXT:    retq
562  %t0 = fadd <8 x float> %x0, %x1
563  %cmp1 = fcmp olt <8 x float> %x2, %t0
564  %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
565  %cmp2 = fcmp olt <8 x float> %x3, %sel1
566  %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
567  ret <8 x float> %sel2
568}
569
570; Verify that AVX 256-bit vector single-precision maximum ops are reassociated.
571
572define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
573; AVX-LABEL: reassociate_maxs_v8f32:
574; AVX:       # BB#0:
575; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
576; AVX-NEXT:    vmaxps %ymm3, %ymm2, %ymm1
577; AVX-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
578; AVX-NEXT:    retq
579  %t0 = fadd <8 x float> %x0, %x1
580  %cmp1 = fcmp ogt <8 x float> %x2, %t0
581  %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
582  %cmp2 = fcmp ogt <8 x float> %x3, %sel1
583  %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
584  ret <8 x float> %sel2
585}
586
587; Verify that AVX 256-bit vector double-precision minimum ops are reassociated.
588
589define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
590; AVX-LABEL: reassociate_mins_v4f64:
591; AVX:       # BB#0:
592; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
593; AVX-NEXT:    vminpd %ymm3, %ymm2, %ymm1
594; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
595; AVX-NEXT:    retq
596  %t0 = fadd <4 x double> %x0, %x1
597  %cmp1 = fcmp olt <4 x double> %x2, %t0
598  %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
599  %cmp2 = fcmp olt <4 x double> %x3, %sel1
600  %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
601  ret <4 x double> %sel2
602}
603
604; Verify that AVX 256-bit vector double-precision maximum ops are reassociated.
605
606define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
607; AVX-LABEL: reassociate_maxs_v4f64:
608; AVX:       # BB#0:
609; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
610; AVX-NEXT:    vmaxpd %ymm3, %ymm2, %ymm1
611; AVX-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
612; AVX-NEXT:    retq
613  %t0 = fadd <4 x double> %x0, %x1
614  %cmp1 = fcmp ogt <4 x double> %x2, %t0
615  %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
616  %cmp2 = fcmp ogt <4 x double> %x3, %sel1
617  %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
618  ret <4 x double> %sel2
619}
620
621; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
622; Verify that reassociation is not happening needlessly or wrongly.
623
624declare double @bar()
625
626define double @reassociate_adds_from_calls() {
627; AVX-LABEL: reassociate_adds_from_calls:
628; AVX:       callq   bar
629; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
630; AVX-NEXT:  callq   bar
631; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
632; AVX-NEXT:  callq   bar
633; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
634; AVX-NEXT:  callq   bar
635; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
636; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
637; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
638; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
639
640  %x0 = call double @bar()
641  %x1 = call double @bar()
642  %x2 = call double @bar()
643  %x3 = call double @bar()
644  %t0 = fadd double %x0, %x1
645  %t1 = fadd double %t0, %x2
646  %t2 = fadd double %t1, %x3
647  ret double %t2
648}
649
650define double @already_reassociated() {
651; AVX-LABEL: already_reassociated:
652; AVX:       callq   bar
653; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
654; AVX-NEXT:  callq   bar
655; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
656; AVX-NEXT:  callq   bar
657; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
658; AVX-NEXT:  callq   bar
659; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
660; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
661; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
662; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
663
664  %x0 = call double @bar()
665  %x1 = call double @bar()
666  %x2 = call double @bar()
667  %x3 = call double @bar()
668  %t0 = fadd double %x0, %x1
669  %t1 = fadd double %x2, %x3
670  %t2 = fadd double %t0, %t1
671  ret double %t2
672}
673
674