• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefix=GFX78 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=GFX78 %s
5
6define double @v_floor_f64_ieee(double %x) {
7; GFX6-LABEL: v_floor_f64_ieee:
8; GFX6:       ; %bb.0:
9; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GFX6-NEXT:    v_fract_f64_e32 v[2:3], v[0:1]
11; GFX6-NEXT:    s_mov_b32 s4, -1
12; GFX6-NEXT:    s_mov_b32 s5, 0x3fefffff
13; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], s[4:5]
14; GFX6-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
15; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
16; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
17; GFX6-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
18; GFX6-NEXT:    s_setpc_b64 s[30:31]
19;
20; GFX78-LABEL: v_floor_f64_ieee:
21; GFX78:       ; %bb.0:
22; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23; GFX78-NEXT:    v_floor_f64_e32 v[0:1], v[0:1]
24; GFX78-NEXT:    s_setpc_b64 s[30:31]
25  %result = call double @llvm.floor.f64(double %x)
26  ret double %result
27}
28
29define double @v_floor_f64_ieee_nnan(double %x) {
30; GFX6-LABEL: v_floor_f64_ieee_nnan:
31; GFX6:       ; %bb.0:
32; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX6-NEXT:    v_fract_f64_e32 v[2:3], v[0:1]
34; GFX6-NEXT:    s_mov_b32 s4, -1
35; GFX6-NEXT:    s_mov_b32 s5, 0x3fefffff
36; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], s[4:5]
37; GFX6-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
38; GFX6-NEXT:    s_setpc_b64 s[30:31]
39;
40; GFX78-LABEL: v_floor_f64_ieee_nnan:
41; GFX78:       ; %bb.0:
42; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43; GFX78-NEXT:    v_floor_f64_e32 v[0:1], v[0:1]
44; GFX78-NEXT:    s_setpc_b64 s[30:31]
45  %result = call nnan double @llvm.floor.f64(double %x)
46  ret double %result
47}
48
49define double @v_floor_f64_ieee_fneg(double %x) {
50; GFX6-LABEL: v_floor_f64_ieee_fneg:
51; GFX6:       ; %bb.0:
52; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53; GFX6-NEXT:    v_fract_f64_e64 v[2:3], -v[0:1]
54; GFX6-NEXT:    s_mov_b32 s4, -1
55; GFX6-NEXT:    s_mov_b32 s5, 0x3fefffff
56; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], s[4:5]
57; GFX6-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
58; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
59; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
60; GFX6-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[2:3]
61; GFX6-NEXT:    s_setpc_b64 s[30:31]
62;
63; GFX78-LABEL: v_floor_f64_ieee_fneg:
64; GFX78:       ; %bb.0:
65; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66; GFX78-NEXT:    v_floor_f64_e64 v[0:1], -v[0:1]
67; GFX78-NEXT:    s_setpc_b64 s[30:31]
68  %neg.x = fneg double %x
69  %result = call double @llvm.floor.f64(double %neg.x)
70  ret double %result
71}
72
73define double @v_floor_f64_nonieee(double %x) #1 {
74; GFX6-LABEL: v_floor_f64_nonieee:
75; GFX6:       ; %bb.0:
76; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77; GFX6-NEXT:    v_fract_f64_e32 v[2:3], v[0:1]
78; GFX6-NEXT:    s_mov_b32 s4, -1
79; GFX6-NEXT:    s_mov_b32 s5, 0x3fefffff
80; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], s[4:5]
81; GFX6-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
82; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
83; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
84; GFX6-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
85; GFX6-NEXT:    s_setpc_b64 s[30:31]
86;
87; GFX78-LABEL: v_floor_f64_nonieee:
88; GFX78:       ; %bb.0:
89; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; GFX78-NEXT:    v_floor_f64_e32 v[0:1], v[0:1]
91; GFX78-NEXT:    s_setpc_b64 s[30:31]
92  %result = call double @llvm.floor.f64(double %x)
93  ret double %result
94}
95
96define double @v_floor_f64_nonieee_nnan(double %x) #1 {
97; GFX6-LABEL: v_floor_f64_nonieee_nnan:
98; GFX6:       ; %bb.0:
99; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; GFX6-NEXT:    v_fract_f64_e32 v[2:3], v[0:1]
101; GFX6-NEXT:    s_mov_b32 s4, -1
102; GFX6-NEXT:    s_mov_b32 s5, 0x3fefffff
103; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], s[4:5]
104; GFX6-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
105; GFX6-NEXT:    s_setpc_b64 s[30:31]
106;
107; GFX78-LABEL: v_floor_f64_nonieee_nnan:
108; GFX78:       ; %bb.0:
109; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110; GFX78-NEXT:    v_floor_f64_e32 v[0:1], v[0:1]
111; GFX78-NEXT:    s_setpc_b64 s[30:31]
112  %result = call nnan double @llvm.floor.f64(double %x)
113  ret double %result
114}
115
116define double @v_floor_f64_non_ieee_fneg(double %x) #1 {
117; GFX6-LABEL: v_floor_f64_non_ieee_fneg:
118; GFX6:       ; %bb.0:
119; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120; GFX6-NEXT:    v_fract_f64_e64 v[2:3], -v[0:1]
121; GFX6-NEXT:    s_mov_b32 s4, -1
122; GFX6-NEXT:    s_mov_b32 s5, 0x3fefffff
123; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], s[4:5]
124; GFX6-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
125; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
126; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
127; GFX6-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[2:3]
128; GFX6-NEXT:    s_setpc_b64 s[30:31]
129;
130; GFX78-LABEL: v_floor_f64_non_ieee_fneg:
131; GFX78:       ; %bb.0:
132; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133; GFX78-NEXT:    v_floor_f64_e64 v[0:1], -v[0:1]
134; GFX78-NEXT:    s_setpc_b64 s[30:31]
135  %neg.x = fneg double %x
136  %result = call double @llvm.floor.f64(double %neg.x)
137  ret double %result
138}
139
140define double @v_floor_f64_fabs(double %x) {
141; GFX6-LABEL: v_floor_f64_fabs:
142; GFX6:       ; %bb.0:
143; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
144; GFX6-NEXT:    v_fract_f64_e64 v[2:3], |v[0:1]|
145; GFX6-NEXT:    s_mov_b32 s4, -1
146; GFX6-NEXT:    s_mov_b32 s5, 0x3fefffff
147; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], s[4:5]
148; GFX6-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
149; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
150; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
151; GFX6-NEXT:    v_add_f64 v[0:1], |v[0:1]|, -v[2:3]
152; GFX6-NEXT:    s_setpc_b64 s[30:31]
153;
154; GFX78-LABEL: v_floor_f64_fabs:
155; GFX78:       ; %bb.0:
156; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX78-NEXT:    v_floor_f64_e64 v[0:1], |v[0:1]|
158; GFX78-NEXT:    s_setpc_b64 s[30:31]
159; GFX7-LABEL: v_floor_f64_fabs:
160; GFX7:       ; %bb.0:
161; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162; GFX7-NEXT:    v_floor_f64_e64 v[0:1], |v[0:1]|
163; GFX7-NEXT:    s_setpc_b64 s[30:31]
164  %abs.x = call double @llvm.fabs.f64(double %x)
165  %result = call double @llvm.floor.f64(double %abs.x)
166  ret double %result
167}
168
169define double @v_floor_f64_fneg_fabs(double %x) {
170; GFX6-LABEL: v_floor_f64_fneg_fabs:
171; GFX6:       ; %bb.0:
172; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173; GFX6-NEXT:    v_fract_f64_e64 v[2:3], -|v[0:1]|
174; GFX6-NEXT:    s_mov_b32 s4, -1
175; GFX6-NEXT:    s_mov_b32 s5, 0x3fefffff
176; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], s[4:5]
177; GFX6-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
178; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
179; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
180; GFX6-NEXT:    v_add_f64 v[0:1], -|v[0:1]|, -v[2:3]
181; GFX6-NEXT:    s_setpc_b64 s[30:31]
182;
183; GFX78-LABEL: v_floor_f64_fneg_fabs:
184; GFX78:       ; %bb.0:
185; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186; GFX78-NEXT:    v_floor_f64_e64 v[0:1], -|v[0:1]|
187; GFX78-NEXT:    s_setpc_b64 s[30:31]
188  %abs.x = call double @llvm.fabs.f64(double %x)
189  %neg.abs.x = fneg double %abs.x
190  %result = call double @llvm.floor.f64(double %neg.abs.x)
191  ret double %result
192}
193
194define amdgpu_ps <2 x float> @s_floor_f64(double inreg %x) {
195; GFX6-LABEL: s_floor_f64:
196; GFX6:       ; %bb.0:
197; GFX6-NEXT:    v_fract_f64_e32 v[0:1], s[2:3]
198; GFX6-NEXT:    s_mov_b32 s0, -1
199; GFX6-NEXT:    s_mov_b32 s1, 0x3fefffff
200; GFX6-NEXT:    v_min_f64 v[0:1], v[0:1], s[0:1]
201; GFX6-NEXT:    v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
202; GFX6-NEXT:    v_mov_b32_e32 v2, s2
203; GFX6-NEXT:    v_mov_b32_e32 v3, s3
204; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
205; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
206; GFX6-NEXT:    v_add_f64 v[0:1], s[2:3], -v[0:1]
207; GFX6-NEXT:    ; return to shader part epilog
208;
209; GFX78-LABEL: s_floor_f64:
210; GFX78:       ; %bb.0:
211; GFX78-NEXT:    v_floor_f64_e32 v[0:1], s[2:3]
212; GFX78-NEXT:    ; return to shader part epilog
213  %result = call double @llvm.floor.f64(double %x)
214  %cast = bitcast double %result to <2 x float>
215  ret <2 x float> %cast
216}
217
218define amdgpu_ps <2 x float> @s_floor_f64_fneg(double inreg %x) {
219; GFX6-LABEL: s_floor_f64_fneg:
220; GFX6:       ; %bb.0:
221; GFX6-NEXT:    v_fract_f64_e64 v[0:1], -s[2:3]
222; GFX6-NEXT:    s_mov_b32 s0, -1
223; GFX6-NEXT:    s_mov_b32 s1, 0x3fefffff
224; GFX6-NEXT:    v_min_f64 v[0:1], v[0:1], s[0:1]
225; GFX6-NEXT:    v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
226; GFX6-NEXT:    v_mov_b32_e32 v2, s2
227; GFX6-NEXT:    v_mov_b32_e32 v3, s3
228; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
229; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
230; GFX6-NEXT:    v_add_f64 v[0:1], -s[2:3], -v[0:1]
231; GFX6-NEXT:    ; return to shader part epilog
232;
233; GFX78-LABEL: s_floor_f64_fneg:
234; GFX78:       ; %bb.0:
235; GFX78-NEXT:    v_floor_f64_e64 v[0:1], -s[2:3]
236; GFX78-NEXT:    ; return to shader part epilog
237  %neg.x = fneg double %x
238  %result = call double @llvm.floor.f64(double %neg.x)
239  %cast = bitcast double %result to <2 x float>
240  ret <2 x float> %cast
241}
242
243define amdgpu_ps <2 x float> @s_floor_f64_fabs(double inreg %x) {
244; GFX6-LABEL: s_floor_f64_fabs:
245; GFX6:       ; %bb.0:
246; GFX6-NEXT:    v_fract_f64_e64 v[0:1], |s[2:3]|
247; GFX6-NEXT:    s_mov_b32 s0, -1
248; GFX6-NEXT:    s_mov_b32 s1, 0x3fefffff
249; GFX6-NEXT:    v_min_f64 v[0:1], v[0:1], s[0:1]
250; GFX6-NEXT:    v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
251; GFX6-NEXT:    v_mov_b32_e32 v2, s2
252; GFX6-NEXT:    v_mov_b32_e32 v3, s3
253; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
254; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
255; GFX6-NEXT:    v_add_f64 v[0:1], |s[2:3]|, -v[0:1]
256; GFX6-NEXT:    ; return to shader part epilog
257;
258; GFX78-LABEL: s_floor_f64_fabs:
259; GFX78:       ; %bb.0:
260; GFX78-NEXT:    v_floor_f64_e64 v[0:1], |s[2:3]|
261; GFX78-NEXT:    ; return to shader part epilog
262  %abs.x = call double @llvm.fabs.f64(double %x)
263  %result = call double @llvm.floor.f64(double %abs.x)
264  %cast = bitcast double %result to <2 x float>
265  ret <2 x float> %cast
266}
267
268define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) {
269; GFX6-LABEL: s_floor_f64_fneg_fabs:
270; GFX6:       ; %bb.0:
271; GFX6-NEXT:    v_fract_f64_e64 v[0:1], -|s[2:3]|
272; GFX6-NEXT:    s_mov_b32 s0, -1
273; GFX6-NEXT:    s_mov_b32 s1, 0x3fefffff
274; GFX6-NEXT:    v_min_f64 v[0:1], v[0:1], s[0:1]
275; GFX6-NEXT:    v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
276; GFX6-NEXT:    v_mov_b32_e32 v2, s2
277; GFX6-NEXT:    v_mov_b32_e32 v3, s3
278; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
279; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
280; GFX6-NEXT:    v_add_f64 v[0:1], -|s[2:3]|, -v[0:1]
281; GFX6-NEXT:    ; return to shader part epilog
282;
283; GFX78-LABEL: s_floor_f64_fneg_fabs:
284; GFX78:       ; %bb.0:
285; GFX78-NEXT:    v_floor_f64_e64 v[0:1], -|s[2:3]|
286; GFX78-NEXT:    ; return to shader part epilog
287  %abs.x = call double @llvm.fabs.f64(double %x)
288  %neg.abs.x = fneg double %abs.x
289  %result = call double @llvm.floor.f64(double %neg.abs.x)
290  %cast = bitcast double %result to <2 x float>
291  ret <2 x float> %cast
292}
293
294declare double @llvm.floor.f64(double) #0
295declare double @llvm.fabs.f64(double) #0
296
297attributes #0 = { nounwind readnone speculatable willreturn }
298attributes #1 = { "amdgpu-ieee"="false" }
299