• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt < %s -instcombine -S | FileCheck %s
2
3; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }"
4; 1.2f and 2.3f is supposed to be fold.
5define float @fold(float %a) {
6  %mul = fmul fast float %a, 0x3FF3333340000000
7  %mul1 = fmul fast float %mul, 0x4002666660000000
8  ret float %mul1
9; CHECK-LABEL: @fold(
10; CHECK: fmul fast float %a, 0x4006147AE0000000
11}
12
13; Same testing-case as the one used in fold() except that the operators have
14; fixed FP mode.
15define float @notfold(float %a) {
16; CHECK-LABEL: @notfold(
17; CHECK: %mul = fmul fast float %a, 0x3FF3333340000000
18  %mul = fmul fast float %a, 0x3FF3333340000000
19  %mul1 = fmul float %mul, 0x4002666660000000
20  ret float %mul1
21}
22
23define float @fold2(float %a) {
24; CHECK-LABEL: @fold2(
25; CHECK: fmul fast float %a, 0x4006147AE0000000
26  %mul = fmul float %a, 0x3FF3333340000000
27  %mul1 = fmul fast float %mul, 0x4002666660000000
28  ret float %mul1
29}
30
31; C * f1 + f1 = (C+1) * f1
32define double @fold3(double %f1) {
33  %t1 = fmul fast double 2.000000e+00, %f1
34  %t2 = fadd fast double %f1, %t1
35  ret double %t2
36; CHECK-LABEL: @fold3(
37; CHECK: fmul fast double %f1, 3.000000e+00
38}
39
40; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y)
41define float @fold4(float %f1, float %f2) {
42  %sub = fsub float 4.000000e+00, %f1
43  %sub1 = fsub float 5.000000e+00, %f2
44  %add = fadd fast float %sub, %sub1
45  ret float %add
46; CHECK-LABEL: @fold4(
47; CHECK: %1 = fadd fast float %f1, %f2
48; CHECK: fsub fast float 9.000000e+00, %1
49}
50
51; (X + C1) + C2 => X + (C1 + C2)
52define float @fold5(float %f1, float %f2) {
53  %add = fadd float %f1, 4.000000e+00
54  %add1 = fadd fast float %add, 5.000000e+00
55  ret float %add1
56; CHECK-LABEL: @fold5(
57; CHECK: fadd fast float %f1, 9.000000e+00
58}
59
60; (X + X) + X => 3.0 * X
61define float @fold6(float %f1) {
62  %t1 = fadd fast float %f1, %f1
63  %t2 = fadd fast float %f1, %t1
64  ret float %t2
65; CHECK-LABEL: @fold6(
66; CHECK: fmul fast float %f1, 3.000000e+00
67}
68
69; C1 * X + (X + X) = (C1 + 2) * X
70define float @fold7(float %f1) {
71  %t1 = fmul fast float %f1, 5.000000e+00
72  %t2 = fadd fast float %f1, %f1
73  %t3 = fadd fast float %t1, %t2
74  ret float %t3
75; CHECK-LABEL: @fold7(
76; CHECK: fmul fast float %f1, 7.000000e+00
77}
78
79; (X + X) + (X + X) => 4.0 * X
80define float @fold8(float %f1) {
81  %t1 = fadd fast float %f1, %f1
82  %t2 = fadd fast float %f1, %f1
83  %t3 = fadd fast float %t1, %t2
84  ret float %t3
85; CHECK: fold8
86; CHECK: fmul fast float %f1, 4.000000e+00
87}
88
89; X - (X + Y) => 0 - Y
90define float @fold9(float %f1, float %f2) {
91  %t1 = fadd float %f1, %f2
92  %t3 = fsub fast float %f1, %t1
93  ret float %t3
94
95; CHECK-LABEL: @fold9(
96; CHECK: fsub fast float -0.000000e+00, %f2
97}
98
99; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of
100; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the
101; top of resulting simplified expression tree may potentially reveal some
102; optimization opportunities in the super-expression trees.
103;
104define float @fold10(float %f1, float %f2) {
105  %t1 = fadd fast float 2.000000e+00, %f1
106  %t2 = fsub fast float %f2, 3.000000e+00
107  %t3 = fadd fast float %t1, %t2
108  ret float %t3
109; CHECK-LABEL: @fold10(
110; CHECK: %t3 = fadd fast float %t2, -1.000000e+00
111; CHECK: ret float %t3
112}
113
114; once cause Crash/miscompilation
115define float @fail1(float %f1, float %f2) {
116  %conv3 = fadd fast float %f1, -1.000000e+00
117  %add = fadd fast float %conv3, %conv3
118  %add2 = fadd fast float %add, %conv3
119  ret float %add2
120; CHECK-LABEL: @fail1(
121; CHECK: ret
122}
123
124define double @fail2(double %f1, double %f2) {
125  %t1 = fsub fast double %f1, %f2
126  %t2 = fadd fast double %f1, %f2
127  %t3 = fsub fast double %t1, %t2
128  ret double %t3
129; CHECK-LABEL: @fail2(
130; CHECK: ret
131}
132
133; c1 * x - x => (c1 - 1.0) * x
134define float @fold13(float %x) {
135  %mul = fmul fast float %x, 7.000000e+00
136  %sub = fsub fast float %mul, %x
137  ret float %sub
138; CHECK: fold13
139; CHECK: fmul fast float %x, 6.000000e+00
140; CHECK: ret
141}
142
143; -x + y => y - x
144define float @fold14(float %x, float %y) {
145  %neg = fsub fast float -0.0, %x
146  %add = fadd fast float %neg, %y
147  ret float %add
148; CHECK: fold14
149; CHECK: fsub fast float %y, %x
150; CHECK: ret
151}
152
153; x + -y => x - y
154define float @fold15(float %x, float %y) {
155  %neg = fsub fast float -0.0, %y
156  %add = fadd fast float %x, %neg
157  ret float %add
158; CHECK: fold15
159; CHECK: fsub fast float %x, %y
160; CHECK: ret
161}
162
163; (select X+Y, X-Y) => X + (select Y, -Y)
164define float @fold16(float %x, float %y) {
165  %cmp = fcmp ogt float %x, %y
166  %plus = fadd fast float %x, %y
167  %minus = fsub fast float %x, %y
168  %r = select i1 %cmp, float %plus, float %minus
169  ret float %r
170; CHECK: fold16
171; CHECK: fsub fast float
172; CHECK: select
173; CHECK: fadd fast float
174; CHECK: ret
175}
176
177
178
179; =========================================================================
180;
181;   Testing-cases about fmul begin
182;
183; =========================================================================
184
185; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution)
186define float @fmul_distribute1(float %f1) {
187  %t1 = fmul float %f1, 6.0e+3
188  %t2 = fadd float %t1, 2.0e+3
189  %t3 = fmul fast float %t2, 5.0e+3
190  ret float %t3
191; CHECK-LABEL: @fmul_distribute1(
192; CHECK: %1 = fmul fast float %f1, 3.000000e+07
193; CHECK: %t3 = fadd fast float %1, 1.000000e+07
194}
195
196; (X/C1 + C2) * C3 => X/(C1/C3) + C2*C3
197define double @fmul_distribute2(double %f1, double %f2) {
198  %t1 = fdiv double %f1, 3.0e+0
199  %t2 = fadd double %t1, 5.0e+1
200  ; 0x10000000000000 = DBL_MIN
201  %t3 = fmul fast double %t2, 0x10000000000000
202  ret double %t3
203
204; CHECK-LABEL: @fmul_distribute2(
205; CHECK: %1 = fdiv fast double %f1, 0x7FE8000000000000
206; CHECK: fadd fast double %1, 0x69000000000000
207}
208
209; 5.0e-1 * DBL_MIN yields denormal, so "(f1*3.0 + 5.0e-1) * DBL_MIN" cannot
210; be simplified into f1 * (3.0*DBL_MIN) + (5.0e-1*DBL_MIN)
211define double @fmul_distribute3(double %f1) {
212  %t1 = fdiv double %f1, 3.0e+0
213  %t2 = fadd double %t1, 5.0e-1
214  %t3 = fmul fast double %t2, 0x10000000000000
215  ret double %t3
216
217; CHECK-LABEL: @fmul_distribute3(
218; CHECK: fmul fast double %t2, 0x10000000000000
219}
220
221; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution)
222define float @fmul_distribute4(float %f1) {
223  %t1 = fmul float %f1, 6.0e+3
224  %t2 = fsub float 2.0e+3, %t1
225  %t3 = fmul fast float %t2, 5.0e+3
226  ret float %t3
227; CHECK-LABEL: @fmul_distribute4(
228; CHECK: %1 = fmul fast float %f1, 3.000000e+07
229; CHECK: %t3 = fsub fast float 1.000000e+07, %1
230}
231
232; C1/X * C2 => (C1*C2) / X
233define float @fmul2(float %f1) {
234  %t1 = fdiv float 2.0e+3, %f1
235  %t3 = fmul fast float %t1, 6.0e+3
236  ret float %t3
237; CHECK-LABEL: @fmul2(
238; CHECK: fdiv fast float 1.200000e+07, %f1
239}
240
241; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses
242@fmul2_external = external global float
243define float @fmul2_disable(float %f1) {
244  %div = fdiv fast float 1.000000e+00, %f1
245  store float %div, float* @fmul2_external
246  %mul = fmul fast float %div, 2.000000e+00
247  ret float %mul
248; CHECK-LABEL: @fmul2_disable
249; CHECK: store
250; CHECK: fmul fast
251}
252
253; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal Fp)
254define float @fmul3(float %f1, float %f2) {
255  %t1 = fdiv float %f1, 2.0e+3
256  %t3 = fmul fast float %t1, 6.0e+3
257  ret float %t3
258; CHECK-LABEL: @fmul3(
259; CHECK: fmul fast float %f1, 3.000000e+00
260}
261
262define <4 x float> @fmul3_vec(<4 x float> %f1, <4 x float> %f2) {
263  %t1 = fdiv <4 x float> %f1, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3>
264  %t3 = fmul fast <4 x float> %t1, <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3>
265  ret <4 x float> %t3
266; CHECK-LABEL: @fmul3_vec(
267; CHECK: fmul fast <4 x float> %f1, <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00>
268}
269
270; Make sure fmul with constant expression doesn't assert.
271define <4 x float> @fmul3_vec_constexpr(<4 x float> %f1, <4 x float> %f2) {
272  %constExprMul = bitcast i128 trunc (i160 bitcast (<5 x float> <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3, float undef> to i160) to i128) to <4 x float>
273  %t1 = fdiv <4 x float> %f1, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3>
274  %t3 = fmul fast <4 x float> %t1, %constExprMul
275  ret <4 x float> %t3
276}
277
278; Rule "X/C1 * C2 => X * (C2/C1) is not applicable if C2/C1 is either a special
279; value of a denormal. The 0x3810000000000000 here take value FLT_MIN
280;
281define float @fmul4(float %f1, float %f2) {
282  %t1 = fdiv float %f1, 2.0e+3
283  %t3 = fmul fast float %t1, 0x3810000000000000
284  ret float %t3
285; CHECK-LABEL: @fmul4(
286; CHECK: fmul fast float %t1, 0x3810000000000000
287}
288
289; X / C1 * C2 => X / (C2/C1) if  C1/C2 is either a special value of a denormal,
290;  and C2/C1 is a normal value.
291;
292define float @fmul5(float %f1, float %f2) {
293  %t1 = fdiv float %f1, 3.0e+0
294  %t3 = fmul fast float %t1, 0x3810000000000000
295  ret float %t3
296; CHECK-LABEL: @fmul5(
297; CHECK: fdiv fast float %f1, 0x47E8000000000000
298}
299
300; (X*Y) * X => (X*X) * Y
301define float @fmul6(float %f1, float %f2) {
302  %mul = fmul float %f1, %f2
303  %mul1 = fmul fast float %mul, %f1
304  ret float %mul1
305; CHECK-LABEL: @fmul6(
306; CHECK: fmul fast float %f1, %f1
307}
308
309; "(X*Y) * X => (X*X) * Y" is disabled if "X*Y" has multiple uses
310define float @fmul7(float %f1, float %f2) {
311  %mul = fmul float %f1, %f2
312  %mul1 = fmul fast float %mul, %f1
313  %add = fadd float %mul1, %mul
314  ret float %add
315; CHECK-LABEL: @fmul7(
316; CHECK: fmul fast float %mul, %f1
317}
318
319; =========================================================================
320;
321;   Testing-cases about negation
322;
323; =========================================================================
324define float @fneg1(float %f1, float %f2) {
325  %sub = fsub float -0.000000e+00, %f1
326  %sub1 = fsub nsz float 0.000000e+00, %f2
327  %mul = fmul float %sub, %sub1
328  ret float %mul
329; CHECK-LABEL: @fneg1(
330; CHECK: fmul float %f1, %f2
331}
332
333define float @fneg2(float %x) {
334  %sub = fsub nsz float 0.0, %x
335  ret float %sub
336; CHECK-LABEL: @fneg2(
337; CHECK-NEXT: fsub nsz float -0.000000e+00, %x
338; CHECK-NEXT: ret float
339}
340
341; =========================================================================
342;
343;   Testing-cases about div
344;
345; =========================================================================
346
347; X/C1 / C2 => X * (1/(C2*C1))
348define float @fdiv1(float %x) {
349  %div = fdiv float %x, 0x3FF3333340000000
350  %div1 = fdiv fast float %div, 0x4002666660000000
351  ret float %div1
352; 0x3FF3333340000000 = 1.2f
353; 0x4002666660000000 = 2.3f
354; 0x3FD7303B60000000 = 0.36231884057971014492
355; CHECK-LABEL: @fdiv1(
356; CHECK: fmul fast float %x, 0x3FD7303B60000000
357}
358
359; X*C1 / C2 => X * (C1/C2)
360define float @fdiv2(float %x) {
361  %mul = fmul float %x, 0x3FF3333340000000
362  %div1 = fdiv fast float %mul, 0x4002666660000000
363  ret float %div1
364
365; 0x3FF3333340000000 = 1.2f
366; 0x4002666660000000 = 2.3f
367; 0x3FE0B21660000000 = 0.52173918485641479492
368; CHECK-LABEL: @fdiv2(
369; CHECK: fmul fast float %x, 0x3FE0B21660000000
370}
371
372define <2 x float> @fdiv2_vec(<2 x float> %x) {
373  %mul = fmul <2 x float> %x, <float 6.0, float 9.0>
374  %div1 = fdiv fast <2 x float> %mul, <float 2.0, float 3.0>
375  ret <2 x float> %div1
376
377; CHECK-LABEL: @fdiv2_vec(
378; CHECK: fmul fast <2 x float> %x, <float 3.000000e+00, float 3.000000e+00>
379}
380
381; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal
382;
383define float @fdiv3(float %x) {
384  %div = fdiv float %x, 0x47EFFFFFE0000000
385  %div1 = fdiv fast float %div, 0x4002666660000000
386  ret float %div1
387; CHECK-LABEL: @fdiv3(
388; CHECK: fdiv float %x, 0x47EFFFFFE0000000
389}
390
391; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal
392define float @fdiv4(float %x) {
393  %mul = fmul float %x, 0x47EFFFFFE0000000
394  %div = fdiv float %mul, 0x3FC99999A0000000
395  ret float %div
396; CHECK-LABEL: @fdiv4(
397; CHECK: fmul float %x, 0x47EFFFFFE0000000
398}
399
400; (X/Y)/Z = > X/(Y*Z)
401define float @fdiv5(float %f1, float %f2, float %f3) {
402  %t1 = fdiv float %f1, %f2
403  %t2 = fdiv fast float %t1, %f3
404  ret float %t2
405; CHECK-LABEL: @fdiv5(
406; CHECK: fmul float %f2, %f3
407}
408
409; Z/(X/Y) = > (Z*Y)/X
410define float @fdiv6(float %f1, float %f2, float %f3) {
411  %t1 = fdiv float %f1, %f2
412  %t2 = fdiv fast float %f3, %t1
413  ret float %t2
414; CHECK-LABEL: @fdiv6(
415; CHECK: fmul float %f3, %f2
416}
417
418; C1/(X*C2) => (C1/C2) / X
419define float @fdiv7(float %x) {
420  %t1 = fmul float %x, 3.0e0
421  %t2 = fdiv fast float 15.0e0, %t1
422  ret float %t2
423; CHECK-LABEL: @fdiv7(
424; CHECK: fdiv fast float 5.000000e+00, %x
425}
426
427; C1/(X/C2) => (C1*C2) / X
428define float @fdiv8(float %x) {
429  %t1 = fdiv float %x, 3.0e0
430  %t2 = fdiv fast float 15.0e0, %t1
431  ret float %t2
432; CHECK-LABEL: @fdiv8(
433; CHECK: fdiv fast float 4.500000e+01, %x
434}
435
436; C1/(C2/X) => (C1/C2) * X
437define float @fdiv9(float %x) {
438  %t1 = fdiv float 3.0e0, %x
439  %t2 = fdiv fast float 15.0e0, %t1
440  ret float %t2
441; CHECK-LABEL: @fdiv9(
442; CHECK: fmul fast float %x, 5.000000e+00
443}
444
445; =========================================================================
446;
447;   Testing-cases about factorization
448;
449; =========================================================================
450; x*z + y*z => (x+y) * z
451define float @fact_mul1(float %x, float %y, float %z) {
452  %t1 = fmul fast float %x, %z
453  %t2 = fmul fast float %y, %z
454  %t3 = fadd fast float %t1, %t2
455  ret float %t3
456; CHECK-LABEL: @fact_mul1(
457; CHECK: fmul fast float %1, %z
458}
459
460; z*x + y*z => (x+y) * z
461define float @fact_mul2(float %x, float %y, float %z) {
462  %t1 = fmul fast float %z, %x
463  %t2 = fmul fast float %y, %z
464  %t3 = fsub fast float %t1, %t2
465  ret float %t3
466; CHECK-LABEL: @fact_mul2(
467; CHECK: fmul fast float %1, %z
468}
469
470; z*x - z*y => (x-y) * z
471define float @fact_mul3(float %x, float %y, float %z) {
472  %t2 = fmul fast float %z, %y
473  %t1 = fmul fast float %z, %x
474  %t3 = fsub fast float %t1, %t2
475  ret float %t3
476; CHECK-LABEL: @fact_mul3(
477; CHECK: fmul fast float %1, %z
478}
479
480; x*z - z*y => (x-y) * z
481define float @fact_mul4(float %x, float %y, float %z) {
482  %t1 = fmul fast float %x, %z
483  %t2 = fmul fast float %z, %y
484  %t3 = fsub fast float %t1, %t2
485  ret float %t3
486; CHECK-LABEL: @fact_mul4(
487; CHECK: fmul fast float %1, %z
488}
489
490; x/y + x/z, no xform
491define float @fact_div1(float %x, float %y, float %z) {
492  %t1 = fdiv fast float %x, %y
493  %t2 = fdiv fast float %x, %z
494  %t3 = fadd fast float %t1, %t2
495  ret float %t3
496; CHECK: fact_div1
497; CHECK: fadd fast float %t1, %t2
498}
499
500; x/y + z/x; no xform
501define float @fact_div2(float %x, float %y, float %z) {
502  %t1 = fdiv fast float %x, %y
503  %t2 = fdiv fast float %z, %x
504  %t3 = fadd fast float %t1, %t2
505  ret float %t3
506; CHECK: fact_div2
507; CHECK: fadd fast float %t1, %t2
508}
509
510; y/x + z/x => (y+z)/x
511define float @fact_div3(float %x, float %y, float %z) {
512  %t1 = fdiv fast float %y, %x
513  %t2 = fdiv fast float %z, %x
514  %t3 = fadd fast float %t1, %t2
515  ret float %t3
516; CHECK: fact_div3
517; CHECK: fdiv fast float %1, %x
518}
519
520; y/x - z/x => (y-z)/x
521define float @fact_div4(float %x, float %y, float %z) {
522  %t1 = fdiv fast float %y, %x
523  %t2 = fdiv fast float %z, %x
524  %t3 = fsub fast float %t1, %t2
525  ret float %t3
526; CHECK: fact_div4
527; CHECK: fdiv fast float %1, %x
528}
529
530; y/x - z/x => (y-z)/x is disabled if y-z is denormal.
531define float @fact_div5(float %x) {
532  %t1 = fdiv fast float 0x3810000000000000, %x
533  %t2 = fdiv fast float 0x3800000000000000, %x
534  %t3 = fadd fast float %t1, %t2
535  ret float %t3
536; CHECK: fact_div5
537; CHECK: fdiv fast float 0x3818000000000000, %x
538}
539
540; y/x - z/x => (y-z)/x is disabled if y-z is denormal.
541define float @fact_div6(float %x) {
542  %t1 = fdiv fast float 0x3810000000000000, %x
543  %t2 = fdiv fast float 0x3800000000000000, %x
544  %t3 = fsub fast float %t1, %t2
545  ret float %t3
546; CHECK: fact_div6
547; CHECK: %t3 = fsub fast float %t1, %t2
548}
549
550; =========================================================================
551;
552;   Test-cases for square root
553;
554; =========================================================================
555
556; A squared factor fed into a square root intrinsic should be hoisted out
557; as a fabs() value.
558; We have to rely on a function-level attribute to enable this optimization
559; because intrinsics don't currently have access to IR-level fast-math
560; flags. If that changes, we can relax the requirement on all of these
561; tests to just specify 'fast' on the sqrt.
562
563attributes #0 = { "unsafe-fp-math" = "true" }
564
565declare double @llvm.sqrt.f64(double)
566
567define double @sqrt_intrinsic_arg_squared(double %x) #0 {
568  %mul = fmul fast double %x, %x
569  %sqrt = call double @llvm.sqrt.f64(double %mul)
570  ret double %sqrt
571
572; CHECK-LABEL: sqrt_intrinsic_arg_squared(
573; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
574; CHECK-NEXT: ret double %fabs
575}
576
577; Check all 6 combinations of a 3-way multiplication tree where
578; one factor is repeated.
579
580define double @sqrt_intrinsic_three_args1(double %x, double %y) #0 {
581  %mul = fmul fast double %y, %x
582  %mul2 = fmul fast double %mul, %x
583  %sqrt = call double @llvm.sqrt.f64(double %mul2)
584  ret double %sqrt
585
586; CHECK-LABEL: sqrt_intrinsic_three_args1(
587; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
588; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
589; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
590; CHECK-NEXT: ret double %1
591}
592
593define double @sqrt_intrinsic_three_args2(double %x, double %y) #0 {
594  %mul = fmul fast double %x, %y
595  %mul2 = fmul fast double %mul, %x
596  %sqrt = call double @llvm.sqrt.f64(double %mul2)
597  ret double %sqrt
598
599; CHECK-LABEL: sqrt_intrinsic_three_args2(
600; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
601; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
602; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
603; CHECK-NEXT: ret double %1
604}
605
606define double @sqrt_intrinsic_three_args3(double %x, double %y) #0 {
607  %mul = fmul fast double %x, %x
608  %mul2 = fmul fast double %mul, %y
609  %sqrt = call double @llvm.sqrt.f64(double %mul2)
610  ret double %sqrt
611
612; CHECK-LABEL: sqrt_intrinsic_three_args3(
613; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
614; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
615; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
616; CHECK-NEXT: ret double %1
617}
618
619define double @sqrt_intrinsic_three_args4(double %x, double %y) #0 {
620  %mul = fmul fast double %y, %x
621  %mul2 = fmul fast double %x, %mul
622  %sqrt = call double @llvm.sqrt.f64(double %mul2)
623  ret double %sqrt
624
625; CHECK-LABEL: sqrt_intrinsic_three_args4(
626; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
627; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
628; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
629; CHECK-NEXT: ret double %1
630}
631
632define double @sqrt_intrinsic_three_args5(double %x, double %y) #0 {
633  %mul = fmul fast double %x, %y
634  %mul2 = fmul fast double %x, %mul
635  %sqrt = call double @llvm.sqrt.f64(double %mul2)
636  ret double %sqrt
637
638; CHECK-LABEL: sqrt_intrinsic_three_args5(
639; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
640; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
641; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
642; CHECK-NEXT: ret double %1
643}
644
645define double @sqrt_intrinsic_three_args6(double %x, double %y) #0 {
646  %mul = fmul fast double %x, %x
647  %mul2 = fmul fast double %y, %mul
648  %sqrt = call double @llvm.sqrt.f64(double %mul2)
649  ret double %sqrt
650
651; CHECK-LABEL: sqrt_intrinsic_three_args6(
652; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
653; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
654; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
655; CHECK-NEXT: ret double %1
656}
657
658define double @sqrt_intrinsic_arg_4th(double %x) #0 {
659  %mul = fmul fast double %x, %x
660  %mul2 = fmul fast double %mul, %mul
661  %sqrt = call double @llvm.sqrt.f64(double %mul2)
662  ret double %sqrt
663
664; CHECK-LABEL: sqrt_intrinsic_arg_4th(
665; CHECK-NEXT: %mul = fmul fast double %x, %x
666; CHECK-NEXT: ret double %mul
667}
668
669define double @sqrt_intrinsic_arg_5th(double %x) #0 {
670  %mul = fmul fast double %x, %x
671  %mul2 = fmul fast double %mul, %x
672  %mul3 = fmul fast double %mul2, %mul
673  %sqrt = call double @llvm.sqrt.f64(double %mul3)
674  ret double %sqrt
675
676; CHECK-LABEL: sqrt_intrinsic_arg_5th(
677; CHECK-NEXT: %mul = fmul fast double %x, %x
678; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x)
679; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1
680; CHECK-NEXT: ret double %1
681}
682
683; Check that square root calls have the same behavior.
684
685declare float @sqrtf(float)
686declare double @sqrt(double)
687declare fp128 @sqrtl(fp128)
688
689define float @sqrt_call_squared_f32(float %x) #0 {
690  %mul = fmul fast float %x, %x
691  %sqrt = call float @sqrtf(float %mul)
692  ret float %sqrt
693
694; CHECK-LABEL: sqrt_call_squared_f32(
695; CHECK-NEXT: %fabs = call fast float @llvm.fabs.f32(float %x)
696; CHECK-NEXT: ret float %fabs
697}
698
699define double @sqrt_call_squared_f64(double %x) #0 {
700  %mul = fmul fast double %x, %x
701  %sqrt = call double @sqrt(double %mul)
702  ret double %sqrt
703
704; CHECK-LABEL: sqrt_call_squared_f64(
705; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
706; CHECK-NEXT: ret double %fabs
707}
708
709define fp128 @sqrt_call_squared_f128(fp128 %x) #0 {
710  %mul = fmul fast fp128 %x, %x
711  %sqrt = call fp128 @sqrtl(fp128 %mul)
712  ret fp128 %sqrt
713
714; CHECK-LABEL: sqrt_call_squared_f128(
715; CHECK-NEXT: %fabs = call fast fp128 @llvm.fabs.f128(fp128 %x)
716; CHECK-NEXT: ret fp128 %fabs
717}
718
719; =========================================================================
720;
721;   Test-cases for fmin / fmax
722;
723; =========================================================================
724
725declare double @fmax(double, double)
726declare double @fmin(double, double)
727declare float @fmaxf(float, float)
728declare float @fminf(float, float)
729declare fp128 @fmaxl(fp128, fp128)
730declare fp128 @fminl(fp128, fp128)
731
732; No NaNs is the minimum requirement to replace these calls.
733; This should always be set when unsafe-fp-math is true, but
734; alternate the attributes for additional test coverage.
735; 'nsz' is implied by the definition of fmax or fmin itself.
736attributes #1 = { "no-nans-fp-math" = "true" }
737
738; Shrink and remove the call.
739define float @max1(float %a, float %b) #0 {
740  %c = fpext float %a to double
741  %d = fpext float %b to double
742  %e = call double @fmax(double %c, double %d)
743  %f = fptrunc double %e to float
744  ret float %f
745
746; CHECK-LABEL: max1(
747; CHECK-NEXT:  fcmp fast ogt float %a, %b
748; CHECK-NEXT:  select {{.*}} float %a, float %b
749; CHECK-NEXT:  ret
750}
751
752define float @max2(float %a, float %b) #1 {
753  %c = call float @fmaxf(float %a, float %b)
754  ret float %c
755
756; CHECK-LABEL: max2(
757; CHECK-NEXT:  fcmp nnan nsz ogt float %a, %b
758; CHECK-NEXT:  select {{.*}} float %a, float %b
759; CHECK-NEXT:  ret
760}
761
762
763define double @max3(double %a, double %b) #0 {
764  %c = call double @fmax(double %a, double %b)
765  ret double %c
766
767; CHECK-LABEL: max3(
768; CHECK-NEXT:  fcmp fast ogt double %a, %b
769; CHECK-NEXT:  select {{.*}} double %a, double %b
770; CHECK-NEXT:  ret
771}
772
773define fp128 @max4(fp128 %a, fp128 %b) #1 {
774  %c = call fp128 @fmaxl(fp128 %a, fp128 %b)
775  ret fp128 %c
776
777; CHECK-LABEL: max4(
778; CHECK-NEXT:  fcmp nnan nsz ogt fp128 %a, %b
779; CHECK-NEXT:  select {{.*}} fp128 %a, fp128 %b
780; CHECK-NEXT:  ret
781}
782
783; Shrink and remove the call.
784define float @min1(float %a, float %b) #1 {
785  %c = fpext float %a to double
786  %d = fpext float %b to double
787  %e = call double @fmin(double %c, double %d)
788  %f = fptrunc double %e to float
789  ret float %f
790
791; CHECK-LABEL: min1(
792; CHECK-NEXT:  fcmp nnan nsz olt float %a, %b
793; CHECK-NEXT:  select {{.*}} float %a, float %b
794; CHECK-NEXT:  ret
795}
796
797define float @min2(float %a, float %b) #0 {
798  %c = call float @fminf(float %a, float %b)
799  ret float %c
800
801; CHECK-LABEL: min2(
802; CHECK-NEXT:  fcmp fast olt float %a, %b
803; CHECK-NEXT:  select {{.*}} float %a, float %b
804; CHECK-NEXT:  ret
805}
806
807define double @min3(double %a, double %b) #1 {
808  %c = call double @fmin(double %a, double %b)
809  ret double %c
810
811; CHECK-LABEL: min3(
812; CHECK-NEXT:  fcmp nnan nsz olt double %a, %b
813; CHECK-NEXT:  select {{.*}} double %a, double %b
814; CHECK-NEXT:  ret
815}
816
817define fp128 @min4(fp128 %a, fp128 %b) #0 {
818  %c = call fp128 @fminl(fp128 %a, fp128 %b)
819  ret fp128 %c
820
821; CHECK-LABEL: min4(
822; CHECK-NEXT:  fcmp fast olt fp128 %a, %b
823; CHECK-NEXT:  select {{.*}} fp128 %a, fp128 %b
824; CHECK-NEXT:  ret
825}
826