• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf,vec-divf | FileCheck %s --check-prefix=RECIP
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf:2,vec-divf:2 | FileCheck %s --check-prefix=REFINE
4
5; If the target's divss/divps instructions are substantially
6; slower than rcpss/rcpps with a Newton-Raphson refinement,
7; we should generate the estimate sequence.
8
9; See PR21385 ( http://llvm.org/bugs/show_bug.cgi?id=21385 )
10; for details about the accuracy, speed, and implementation
11; differences of x86 reciprocal estimates.
12
13define float @reciprocal_estimate(float %x) #0 {
14  %div = fdiv fast float 1.0, %x
15  ret float %div
16
17; NORECIP-LABEL: reciprocal_estimate:
18; NORECIP: movss
19; NORECIP-NEXT: divss
20; NORECIP-NEXT: movaps
21; NORECIP-NEXT: retq
22
23; RECIP-LABEL: reciprocal_estimate:
24; RECIP: vrcpss
25; RECIP: vmulss
26; RECIP: vsubss
27; RECIP: vmulss
28; RECIP: vaddss
29; RECIP-NEXT: retq
30
31; REFINE-LABEL: reciprocal_estimate:
32; REFINE: vrcpss
33; REFINE: vmulss
34; REFINE: vsubss
35; REFINE: vmulss
36; REFINE: vaddss
37; REFINE: vmulss
38; REFINE: vsubss
39; REFINE: vmulss
40; REFINE: vaddss
41; REFINE-NEXT: retq
42}
43
44define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
45  %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
46  ret <4 x float> %div
47
48; NORECIP-LABEL: reciprocal_estimate_v4f32:
49; NORECIP: movaps
50; NORECIP-NEXT: divps
51; NORECIP-NEXT: movaps
52; NORECIP-NEXT: retq
53
54; RECIP-LABEL: reciprocal_estimate_v4f32:
55; RECIP: vrcpps
56; RECIP: vmulps
57; RECIP: vsubps
58; RECIP: vmulps
59; RECIP: vaddps
60; RECIP-NEXT: retq
61
62; REFINE-LABEL: reciprocal_estimate_v4f32:
63; REFINE: vrcpps
64; REFINE: vmulps
65; REFINE: vsubps
66; REFINE: vmulps
67; REFINE: vaddps
68; REFINE: vmulps
69; REFINE: vsubps
70; REFINE: vmulps
71; REFINE: vaddps
72; REFINE-NEXT: retq
73}
74
75define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
76  %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
77  ret <8 x float> %div
78
79; NORECIP-LABEL: reciprocal_estimate_v8f32:
80; NORECIP: movaps
81; NORECIP: movaps
82; NORECIP-NEXT: divps
83; NORECIP-NEXT: divps
84; NORECIP-NEXT: movaps
85; NORECIP-NEXT: movaps
86; NORECIP-NEXT: retq
87
88; RECIP-LABEL: reciprocal_estimate_v8f32:
89; RECIP: vrcpps
90; RECIP: vmulps
91; RECIP: vsubps
92; RECIP: vmulps
93; RECIP: vaddps
94; RECIP-NEXT: retq
95
96; REFINE-LABEL: reciprocal_estimate_v8f32:
97; REFINE: vrcpps
98; REFINE: vmulps
99; REFINE: vsubps
100; REFINE: vmulps
101; REFINE: vaddps
102; REFINE: vmulps
103; REFINE: vsubps
104; REFINE: vmulps
105; REFINE: vaddps
106; REFINE-NEXT: retq
107}
108
109attributes #0 = { "unsafe-fp-math"="true" }
110